mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
anthropic captioning
This commit is contained in:
@ -1654,12 +1654,12 @@
|
|||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="range-block" data-source="openai,openrouter,makersuite,custom">
|
<div class="range-block" data-source="openai,openrouter,makersuite,claude,custom">
|
||||||
<label for="openai_image_inlining" class="checkbox_label flexWrap widthFreeExpand">
|
<label for="openai_image_inlining" class="checkbox_label flexWrap widthFreeExpand">
|
||||||
<input id="openai_image_inlining" type="checkbox" />
|
<input id="openai_image_inlining" type="checkbox" />
|
||||||
<span data-i18n="Send inline images">Send inline images</span>
|
<span data-i18n="Send inline images">Send inline images</span>
|
||||||
<div id="image_inlining_hint" class="flexBasis100p toggle-description justifyLeft">
|
<div id="image_inlining_hint" class="flexBasis100p toggle-description justifyLeft">
|
||||||
Sends images in prompts if the model supports it (e.g. GPT-4V or Llava 13B).
|
Sends images in prompts if the model supports it (e.g. GPT-4V, Claude 3 or Llava 13B).
|
||||||
Use the <code><i class="fa-solid fa-paperclip"></i></code> action on any message or the
|
Use the <code><i class="fa-solid fa-paperclip"></i></code> action on any message or the
|
||||||
<code><i class="fa-solid fa-wand-magic-sparkles"></i></code> menu to attach an image file to the chat.
|
<code><i class="fa-solid fa-wand-magic-sparkles"></i></code> menu to attach an image file to the chat.
|
||||||
</div>
|
</div>
|
||||||
|
@ -284,6 +284,7 @@ jQuery(function () {
|
|||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) ||
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) ||
|
||||||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'anthropic' && secret_state[SECRET_KEYS.CLAUDE]) ||
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
|
||||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ooba' && textgenerationwebui_settings.server_urls[textgen_types.OOBA]) ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ooba' && textgenerationwebui_settings.server_urls[textgen_types.OOBA]) ||
|
||||||
@ -343,7 +344,7 @@ jQuery(function () {
|
|||||||
<label for="caption_source">Source</label>
|
<label for="caption_source">Source</label>
|
||||||
<select id="caption_source" class="text_pole">
|
<select id="caption_source" class="text_pole">
|
||||||
<option value="local">Local</option>
|
<option value="local">Local</option>
|
||||||
<option value="multimodal">Multimodal (OpenAI / llama / Google)</option>
|
<option value="multimodal">Multimodal (OpenAI / Anthropic / llama / Google)</option>
|
||||||
<option value="extras">Extras</option>
|
<option value="extras">Extras</option>
|
||||||
<option value="horde">Horde</option>
|
<option value="horde">Horde</option>
|
||||||
</select>
|
</select>
|
||||||
@ -355,6 +356,7 @@ jQuery(function () {
|
|||||||
<option value="ooba">Text Generation WebUI (oobabooga)</option>
|
<option value="ooba">Text Generation WebUI (oobabooga)</option>
|
||||||
<option value="ollama">Ollama</option>
|
<option value="ollama">Ollama</option>
|
||||||
<option value="openai">OpenAI</option>
|
<option value="openai">OpenAI</option>
|
||||||
|
<option value="anthropic">Anthropic</option>
|
||||||
<option value="openrouter">OpenRouter</option>
|
<option value="openrouter">OpenRouter</option>
|
||||||
<option value="google">Google MakerSuite</option>
|
<option value="google">Google MakerSuite</option>
|
||||||
<option value="custom">Custom (OpenAI-compatible)</option>
|
<option value="custom">Custom (OpenAI-compatible)</option>
|
||||||
@ -364,6 +366,8 @@ jQuery(function () {
|
|||||||
<label for="caption_multimodal_model">Model</label>
|
<label for="caption_multimodal_model">Model</label>
|
||||||
<select id="caption_multimodal_model" class="flex1 text_pole">
|
<select id="caption_multimodal_model" class="flex1 text_pole">
|
||||||
<option data-type="openai" value="gpt-4-vision-preview">gpt-4-vision-preview</option>
|
<option data-type="openai" value="gpt-4-vision-preview">gpt-4-vision-preview</option>
|
||||||
|
<option data-type="anthropic" value="claude-3-opus-20240229">claude-3-opus-20240229</option>
|
||||||
|
<option data-type="anthropic" value="claude-3-sonnet-20240229">claude-3-sonnet-20240229</option>
|
||||||
<option data-type="google" value="gemini-pro-vision">gemini-pro-vision</option>
|
<option data-type="google" value="gemini-pro-vision">gemini-pro-vision</option>
|
||||||
<option data-type="openrouter" value="openai/gpt-4-vision-preview">openai/gpt-4-vision-preview</option>
|
<option data-type="openrouter" value="openai/gpt-4-vision-preview">openai/gpt-4-vision-preview</option>
|
||||||
<option data-type="openrouter" value="haotian-liu/llava-13b">haotian-liu/llava-13b</option>
|
<option data-type="openrouter" value="haotian-liu/llava-13b">haotian-liu/llava-13b</option>
|
||||||
|
@ -23,6 +23,7 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
|||||||
// OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy.
|
// OpenRouter has a payload limit of ~2MB. Google is 4MB, but we love democracy.
|
||||||
// Ooba requires all images to be JPEGs.
|
// Ooba requires all images to be JPEGs.
|
||||||
const isGoogle = extension_settings.caption.multimodal_api === 'google';
|
const isGoogle = extension_settings.caption.multimodal_api === 'google';
|
||||||
|
const isClaude = extension_settings.caption.multimodal_api === 'anthropic';
|
||||||
const isOllama = extension_settings.caption.multimodal_api === 'ollama';
|
const isOllama = extension_settings.caption.multimodal_api === 'ollama';
|
||||||
const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp';
|
const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp';
|
||||||
const isCustom = extension_settings.caption.multimodal_api === 'custom';
|
const isCustom = extension_settings.caption.multimodal_api === 'custom';
|
||||||
@ -39,7 +40,7 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const useReverseProxy =
|
const useReverseProxy =
|
||||||
extension_settings.caption.multimodal_api === 'openai'
|
(extension_settings.caption.multimodal_api === 'openai' || extension_settings.caption.multimodal_api === 'anthropic')
|
||||||
&& extension_settings.caption.allow_reverse_proxy
|
&& extension_settings.caption.allow_reverse_proxy
|
||||||
&& oai_settings.reverse_proxy
|
&& oai_settings.reverse_proxy
|
||||||
&& isValidUrl(oai_settings.reverse_proxy);
|
&& isValidUrl(oai_settings.reverse_proxy);
|
||||||
@ -87,6 +88,8 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
|||||||
switch (extension_settings.caption.multimodal_api) {
|
switch (extension_settings.caption.multimodal_api) {
|
||||||
case 'google':
|
case 'google':
|
||||||
return '/api/google/caption-image';
|
return '/api/google/caption-image';
|
||||||
|
case 'anthropic':
|
||||||
|
return '/api/anthropic/caption-image';
|
||||||
case 'llamacpp':
|
case 'llamacpp':
|
||||||
return '/api/backends/text-completions/llamacpp/caption-image';
|
return '/api/backends/text-completions/llamacpp/caption-image';
|
||||||
case 'ollama':
|
case 'ollama':
|
||||||
|
@ -3836,6 +3836,7 @@ export function isImageInliningSupported() {
|
|||||||
|
|
||||||
const gpt4v = 'gpt-4-vision';
|
const gpt4v = 'gpt-4-vision';
|
||||||
const geminiProV = 'gemini-pro-vision';
|
const geminiProV = 'gemini-pro-vision';
|
||||||
|
const claude = 'claude-3';
|
||||||
const llava = 'llava';
|
const llava = 'llava';
|
||||||
|
|
||||||
if (!oai_settings.image_inlining) {
|
if (!oai_settings.image_inlining) {
|
||||||
@ -3847,6 +3848,8 @@ export function isImageInliningSupported() {
|
|||||||
return oai_settings.openai_model.includes(gpt4v);
|
return oai_settings.openai_model.includes(gpt4v);
|
||||||
case chat_completion_sources.MAKERSUITE:
|
case chat_completion_sources.MAKERSUITE:
|
||||||
return oai_settings.google_model.includes(geminiProV);
|
return oai_settings.google_model.includes(geminiProV);
|
||||||
|
case chat_completion_sources.CLAUDE:
|
||||||
|
return oai_settings.claude_model.includes(claude);
|
||||||
case chat_completion_sources.OPENROUTER:
|
case chat_completion_sources.OPENROUTER:
|
||||||
return !oai_settings.openrouter_force_instruct && (oai_settings.openrouter_model.includes(gpt4v) || oai_settings.openrouter_model.includes(llava));
|
return !oai_settings.openrouter_force_instruct && (oai_settings.openrouter_model.includes(gpt4v) || oai_settings.openrouter_model.includes(llava));
|
||||||
case chat_completion_sources.CUSTOM:
|
case chat_completion_sources.CUSTOM:
|
||||||
|
@ -505,6 +505,9 @@ app.use('/api/openai', require('./src/endpoints/openai').router);
|
|||||||
//Google API
|
//Google API
|
||||||
app.use('/api/google', require('./src/endpoints/google').router);
|
app.use('/api/google', require('./src/endpoints/google').router);
|
||||||
|
|
||||||
|
//Anthropic API
|
||||||
|
app.use('/api/anthropic', require('./src/endpoints/anthropic').router);
|
||||||
|
|
||||||
// Tokenizers
|
// Tokenizers
|
||||||
app.use('/api/tokenizers', require('./src/endpoints/tokenizers').router);
|
app.use('/api/tokenizers', require('./src/endpoints/tokenizers').router);
|
||||||
|
|
||||||
|
66
src/endpoints/anthropic.js
Normal file
66
src/endpoints/anthropic.js
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
const { readSecret, SECRET_KEYS } = require('./secrets');
|
||||||
|
const fetch = require('node-fetch').default;
|
||||||
|
const express = require('express');
|
||||||
|
const { jsonParser } = require('../express-common');
|
||||||
|
|
||||||
|
const router = express.Router();
|
||||||
|
|
||||||
|
router.post('/caption-image', jsonParser, async (request, response) => {
|
||||||
|
try {
|
||||||
|
const mimeType = request.body.image.split(';')[0].split(':')[1];
|
||||||
|
const base64Data = request.body.image.split(',')[1];
|
||||||
|
const url = 'https://api.anthropic.com/v1/messages';
|
||||||
|
const body = {
|
||||||
|
model: request.body.model,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
'role': 'user', 'content': [
|
||||||
|
{
|
||||||
|
'type': 'image',
|
||||||
|
'source': {
|
||||||
|
'type': 'base64',
|
||||||
|
'media_type': mimeType,
|
||||||
|
'data': base64Data,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{ 'type': 'text', 'text': request.body.prompt },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
max_tokens: 800,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('Multimodal captioning request', body);
|
||||||
|
|
||||||
|
const result = await fetch(url, {
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'anthropic-version': '2023-06-01',
|
||||||
|
'x-api-key': readSecret(SECRET_KEYS.CLAUDE),
|
||||||
|
},
|
||||||
|
timeout: 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!result.ok) {
|
||||||
|
console.log(color.red(`Claude API returned error: ${result.status} ${result.statusText}`));
|
||||||
|
return response.status(result.status).send({ error: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
const generateResponseJson = await result.json();
|
||||||
|
const caption = generateResponseJson.content[0].text;
|
||||||
|
console.log('Claude response:', generateResponseJson);
|
||||||
|
|
||||||
|
if (!caption) {
|
||||||
|
return response.status(500).send('No caption found');
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.json({ caption });
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error);
|
||||||
|
response.status(500).send('Internal server error');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
module.exports = { router };
|
Reference in New Issue
Block a user