Add native GPT-4V image inlining

This commit is contained in:
Cohee 2023-11-12 00:09:48 +02:00
parent 5d34c8aef5
commit 2c4f53e7b5
6 changed files with 229 additions and 17 deletions

View File

@ -229,6 +229,10 @@
display: flex;
}
.flexBasis100p {
flex-basis: 100%;
}
.flexBasis50p {
flex-basis: 50%
}
@ -263,6 +267,10 @@
flex-shrink: 1
}
.flexWrap {
flex-wrap: wrap;
}
.flexnowrap {
flex-wrap: nowrap;
}

View File

@ -358,3 +358,11 @@ body.expandMessageActions .mes .mes_buttons .extraMesButtons {
body.expandMessageActions .mes .mes_buttons .extraMesButtonsHint {
display: none !important;
}
#openai_image_inlining:not(:checked) ~ #image_inlining_hint {
display: none;
}
#openai_image_inlining:checked ~ #image_inlining_hint {
display: block;
}

View File

@ -1723,6 +1723,7 @@
</optgroup>
<optgroup label="GPT-4">
<option value="gpt-4">gpt-4</option>
<option value="gpt-4-vision-preview">gpt-4-vision-preview</option>
<option value="gpt-4-1106-preview">gpt-4-1106-preview</option>
<option value="gpt-4-0613">gpt-4-0613</option>
<option value="gpt-4-0314">gpt-4-0314</option>
@ -1745,6 +1746,17 @@
<input id="openai_show_external_models" type="checkbox" />
<span data-i18n="Show External models (provided by API)">Show "External" models (provided by API)</span>
</label>
<label for="openai_image_inlining" class="checkbox_label flexWrap">
<input id="openai_image_inlining" type="checkbox" />
<span data-i18n="Send inline images">Send inline images (only GPT-4V model)</span>
<div id="image_inlining_hint" class="flexBasis100p">
<small>
Natively replaces captioning if the model supports it.<br>
Use the <code><i class="fa-solid fa-image"></i></code> action on any message or the
<code><i class="fa-solid fa-wand-magic-sparkles"></i></code> menu to attach an image to the chat.
</small>
</div>
</label>
</div>
</form>
<form id="claude_form" data-source="claude" action="javascript:void(null);" method="post" enctype="multipart/form-data">
@ -4057,6 +4069,7 @@
<div title="Prompt" class="mes_prompt fa-solid fa-square-poll-horizontal " data-i18n="[title]Prompt"></div>
<div title="Exclude message from prompts" class="mes_hide fa-solid fa-eye" data-i18n="[title]Exclude message from prompts"></div>
<div title="Include message in prompts" class="mes_unhide fa-solid fa-eye-slash" data-i18n="[title]Include message in prompts"></div>
<div title="Embed image" class="mes_embed fa-solid fa-image" data-i18n="[title]Embed image"></div>
<div title="Create bookmark" class="mes_create_bookmark fa-regular fa-solid fa-book-bookmark" data-i18n="[title]Create Bookmark"></div>
<div title="Create branch" class="mes_create_branch fa-regular fa-code-branch" data-i18n="[title]Create Branch"></div>
<div title="Copy" class="mes_copy fa-solid fa-copy " data-i18n="[title]Copy"></div>

View File

@ -3409,7 +3409,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
generate_data = getNovelGenerationData(finalPrompt, presetSettings, maxLength, isImpersonate, cfgValues);
}
else if (main_api == 'openai') {
let [prompt, counts] = prepareOpenAIMessages({
let [prompt, counts] = await prepareOpenAIMessages({
name2: name2,
charDescription: description,
charPersonality: personality,

View File

@ -1,8 +1,9 @@
import { getBase64Async, saveBase64AsFile } from "../../utils.js";
import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules } from "../../extensions.js";
import { callPopup, getRequestHeaders, saveSettingsDebounced, substituteParams } from "../../../script.js";
import { appendImageToMessage, callPopup, getRequestHeaders, saveSettingsDebounced, substituteParams } from "../../../script.js";
import { getMessageTimeStamp } from "../../RossAscends-mods.js";
import { SECRET_KEYS, secret_state } from "../../secrets.js";
import { isImageInliningSupported } from "../../openai.js";
export { MODULE_NAME };
const MODULE_NAME = 'caption';
@ -223,6 +224,83 @@ function onRefineModeInput() {
saveSettingsDebounced();
}
async function sendEmbeddedImage(e) {
const file = e.target.files[0];
if (!file || !(file instanceof File)) {
return;
}
try {
const context = getContext();
const fileData = await getBase64Async(file);
const base64Format = fileData.split(',')[0].split(';')[0].split('/')[1];
const base64Data = fileData.split(',')[1];
const caption = await callPopup('<h3>Enter a comment or question (optional)</h3>', 'input', 'What is this?', { okButton: 'Send', rows: 2 });
const imagePath = await saveBase64AsFile(base64Data, context.name2, '', base64Format);
const message = {
name: context.name1,
is_user: true,
send_date: getMessageTimeStamp(),
mes: caption || `[${context.name1} sends ${context.name2} a picture]`,
extra: {
image: imagePath,
inline_image: !!caption,
title: caption || '',
},
};
context.chat.push(message);
context.addOneMessage(message);
await context.generate('caption');
}
catch (error) {
console.log(error);
}
finally {
e.target.form.reset();
setImageIcon();
}
}
function onImageEmbedClicked() {
const context = getContext();
const messageElement = $(this).closest('.mes');
const messageId = messageElement.attr('mesid');
const message = context.chat[messageId];
if (!message) {
console.warn('Failed to find message with id', messageId);
return;
}
$('#embed_img_file')
.off('change')
.on('change', parseAndUploadEmbed)
.trigger('click');
async function parseAndUploadEmbed(e) {
const file = e.target.files[0];
if (!file || !(file instanceof File)) {
return;
}
const fileData = await getBase64Async(file);
const base64Data = fileData.split(',')[1];
const base64Format = fileData.split(',')[0].split(';')[0].split('/')[1];
const imagePath = await saveBase64AsFile(base64Data, context.name2, '', base64Format);
if (!message.extra) {
message.extra = {};
}
message.extra.image = imagePath;
message.extra.inline_image = true;
message.extra.title = '';
appendImageToMessage(message, messageElement);
await context.saveChat();
}
}
jQuery(function () {
function addSendPictureButton() {
const sendButton = $(`
@ -234,6 +312,12 @@ jQuery(function () {
$('#extensionsMenu').prepend(sendButton);
$(sendButton).hide();
$(sendButton).on('click', () => {
if (isImageInliningSupported()) {
console.log('Native image inlining is supported. Skipping captioning.');
$('#embed_img_file').off('change').on('change', sendEmbeddedImage).trigger('click');
return;
}
const hasCaptionModule =
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
(extension_settings.caption.source === 'openai' && secret_state[SECRET_KEYS.OPENAI]) ||
@ -249,10 +333,12 @@ jQuery(function () {
});
}
function addPictureSendForm() {
const inputHtml = `<input id="img_file" type="file" accept="image/*">`;
const inputHtml = `<input id="img_file" type="file" hidden accept="image/*">`;
const embedInputHtml = `<input id="embed_img_file" type="file" hidden accept="image/*">`;
const imgForm = document.createElement('form');
imgForm.id = 'img_form';
$(imgForm).append(inputHtml);
$(imgForm).append(embedInputHtml);
$(imgForm).hide();
$('#form_sheld').append(imgForm);
$('#img_file').on('change', onSelectImage);
@ -312,5 +398,6 @@ jQuery(function () {
extension_settings.caption.template = String($('#caption_template').val());
saveSettingsDebounced();
});
$(document).on('click', '.mes_embed', onImageEmbedClicked);
setInterval(moduleWorker, UPDATE_INTERVAL);
});

View File

@ -54,7 +54,9 @@ import {
import {
delay,
download,
getBase64Async,
getFileText, getSortableDelay,
isDataURL,
parseJsonFile,
resetScrollHeight,
stringFormat,
@ -70,7 +72,6 @@ export {
setOpenAIMessages,
setOpenAIMessageExamples,
setupChatCompletionPromptManager,
prepareOpenAIMessages,
sendOpenAIRequest,
getChatCompletionModel,
TokenHandler,
@ -221,6 +222,7 @@ const default_settings = {
exclude_assistant: false,
use_alt_scale: false,
squash_system_messages: false,
image_inlining: false,
};
const oai_settings = {
@ -267,6 +269,7 @@ const oai_settings = {
exclude_assistant: false,
use_alt_scale: false,
squash_system_messages: false,
image_inlining: false,
};
let openai_setting_names;
@ -409,7 +412,8 @@ function setOpenAIMessages(chat) {
// Apply the "wrap in quotes" option
if (role == 'user' && oai_settings.wrap_in_quotes) content = `"${content}"`;
const name = chat[j]['name'];
openai_msgs[i] = { "role": role, "content": content, name: name };
const image = chat[j]?.extra?.image;
openai_msgs[i] = { "role": role, "content": content, name: name, "image": image };
j++;
}
}
@ -592,7 +596,7 @@ export function isOpenRouterWithInstruct() {
* @param type
* @param cyclePrompt
*/
function populateChatHistory(prompts, chatCompletion, type = null, cyclePrompt = null) {
async function populateChatHistory(prompts, chatCompletion, type = null, cyclePrompt = null) {
chatCompletion.add(new MessageCollection('chatHistory'), prompts.index('chatHistory'));
let names = (selected_group && groups.find(x => x.id === selected_group)?.members.map(member => characters.find(c => c.avatar === member)?.name).filter(Boolean).join(', ')) || '';
@ -629,8 +633,13 @@ function populateChatHistory(prompts, chatCompletion, type = null, cyclePrompt =
chatCompletion.insert(message, 'chatHistory');
}
const imageInlining = isImageInliningSupported();
// Insert chat messages as long as there is budget available
[...openai_msgs].reverse().every((chatPrompt, index) => {
const chatPool = [...openai_msgs].reverse();
for (let index = 0; index < chatPool.length; index++) {
const chatPrompt = chatPool[index];
// We do not want to mutate the prompt
const prompt = new Prompt(chatPrompt);
prompt.identifier = `chatHistory-${openai_msgs.length - index}`;
@ -641,10 +650,16 @@ function populateChatHistory(prompts, chatCompletion, type = null, cyclePrompt =
chatMessage.setName(messageName);
}
if (chatCompletion.canAfford(chatMessage)) chatCompletion.insertAtStart(chatMessage, 'chatHistory');
else return false;
return true;
});
if (imageInlining && chatPrompt.image) {
await chatMessage.addImage(chatPrompt.image);
}
if (chatCompletion.canAfford(chatMessage)) {
chatCompletion.insertAtStart(chatMessage, 'chatHistory');
} else {
break;
}
}
// Insert and free new chat
chatCompletion.freeBudget(newChatMessage);
@ -724,7 +739,7 @@ function getPromptPosition(position) {
* @param {string} options.quietPrompt - Instruction prompt for extras
* @param {string} options.type - The type of the chat, can be 'impersonate'.
*/
function populateChatCompletion(prompts, chatCompletion, { bias, quietPrompt, type, cyclePrompt } = {}) {
async function populateChatCompletion(prompts, chatCompletion, { bias, quietPrompt, type, cyclePrompt } = {}) {
// Helper function for preparing a prompt, that already exists within the prompt collection, for completion
const addToChatCompletion = (source, target = null) => {
// We need the prompts array to determine a position for the source.
@ -825,9 +840,9 @@ function populateChatCompletion(prompts, chatCompletion, { bias, quietPrompt, ty
// Decide whether dialogue examples should always be added
if (power_user.pin_examples) {
populateDialogueExamples(prompts, chatCompletion);
populateChatHistory(prompts, chatCompletion, type, cyclePrompt);
await populateChatHistory(prompts, chatCompletion, type, cyclePrompt);
} else {
populateChatHistory(prompts, chatCompletion, type, cyclePrompt);
await populateChatHistory(prompts, chatCompletion, type, cyclePrompt);
populateDialogueExamples(prompts, chatCompletion);
}
@ -969,7 +984,7 @@ function preparePromptsForChatCompletion({ Scenario, charPersonality, name2, wor
* @param dryRun - Whether this is a live call or not.
* @returns {(*[]|boolean)[]} An array where the first element is the prepared chat and the second element is a boolean flag.
*/
function prepareOpenAIMessages({
export async function prepareOpenAIMessages({
name2,
charDescription,
charPersonality,
@ -1012,7 +1027,7 @@ function prepareOpenAIMessages({
});
// Fill the chat completion with as much context as the budget allows
populateChatCompletion(prompts, chatCompletion, { bias, quietPrompt, type, cyclePrompt });
await populateChatCompletion(prompts, chatCompletion, { bias, quietPrompt, type, cyclePrompt });
} catch (error) {
if (error instanceof TokenBudgetExceededError) {
toastr.error('An error occurred while counting tokens: Token budget exceeded.')
@ -1372,6 +1387,16 @@ async function sendOpenAIRequest(type, openai_msgs_tosend, signal) {
"stop": getCustomStoppingStrings(openai_max_stop_strings),
};
// Empty array will produce a validation error
if (!Array.isArray(generate_data.stop) || !generate_data.stop.length) {
delete generate_data.stop;
}
// Vision models don't support logit bias
if (isImageInliningSupported()) {
delete generate_data.logit_bias;
}
// Proxy is only supported for Claude and OpenAI
if (oai_settings.reverse_proxy && [chat_completion_sources.CLAUDE, chat_completion_sources.OPENAI].includes(oai_settings.chat_completion_source)) {
validateReverseProxy();
@ -1640,7 +1665,18 @@ class InvalidCharacterNameError extends Error {
* Used for creating, managing, and interacting with a specific message object.
*/
class Message {
tokens; identifier; role; content; name;
static tokensPerImage = 85;
/** @type {number} */
tokens;
/** @type {string} */
identifier;
/** @type {string} */
role;
/** @type {string|any[]} */
content;
/** @type {string} */
name;
/**
* @constructor
@ -1665,6 +1701,30 @@ class Message {
this.tokens = tokenHandler.count({ role: this.role, content: this.content, name: this.name });
}
async addImage(image) {
const textContent = this.content;
const isDataUrl = isDataURL(image);
if (!isDataUrl) {
try {
const response = await fetch(image, { method: 'GET', cache: 'force-cache' });
if (!response.ok) throw new Error('Failed to fetch image');
const blob = await response.blob();
image = await getBase64Async(blob);
} catch (error) {
console.error('Image adding skipped', error);
return;
}
}
this.content = [
{ type: "text", text: textContent },
{ type: "image_url", image_url: { "url": image, "detail": "low" } },
];
this.tokens += Message.tokensPerImage;
}
/**
* Create a new Message instance from a prompt.
* @static
@ -2148,6 +2208,7 @@ function loadOpenAISettings(data, settings) {
oai_settings.show_external_models = settings.show_external_models ?? default_settings.show_external_models;
oai_settings.proxy_password = settings.proxy_password ?? default_settings.proxy_password;
oai_settings.assistant_prefill = settings.assistant_prefill ?? default_settings.assistant_prefill;
oai_settings.image_inlining = settings.image_inlining ?? default_settings.image_inlining;
oai_settings.prompts = settings.prompts ?? default_settings.prompts;
oai_settings.prompt_order = settings.prompt_order ?? default_settings.prompt_order;
@ -2168,6 +2229,7 @@ function loadOpenAISettings(data, settings) {
$('#api_url_scale').val(oai_settings.api_url_scale);
$('#openai_proxy_password').val(oai_settings.proxy_password);
$('#claude_assistant_prefill').val(oai_settings.assistant_prefill);
$('#openai_image_inlining').prop('checked', oai_settings.image_inlining);
$('#model_openai_select').val(oai_settings.openai_model);
$(`#model_openai_select option[value="${oai_settings.openai_model}"`).attr('selected', true);
@ -2388,6 +2450,7 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) {
exclude_assistant: settings.exclude_assistant,
use_alt_scale: settings.use_alt_scale,
squash_system_messages: settings.squash_system_messages,
image_inlining: settings.image_inlining,
};
const savePresetSettings = await fetch(`/api/presets/save-openai?name=${name}`, {
@ -2741,6 +2804,7 @@ function onSettingsPresetChange() {
exclude_assistant: ['#exclude_assistant', 'exclude_assistant', true],
use_alt_scale: ['#use_alt_scale', 'use_alt_scale', true],
squash_system_messages: ['#squash_system_messages', 'squash_system_messages', true],
image_inlining: ['#openai_image_inlining', 'image_inlining', true],
};
const presetName = $('#settings_preset_openai').find(":selected").text();
@ -2785,6 +2849,9 @@ function getMaxContextOpenAI(value) {
else if (value.includes('gpt-4-1106')) {
return max_128k;
}
else if (value.includes('gpt-4-vision')) {
return max_128k;
}
else if (value.includes('gpt-3.5-turbo-1106')) {
return max_16k;
}
@ -2831,6 +2898,9 @@ function getMaxContextWindowAI(value) {
else if (value.includes('gpt-4-1106')) {
return max_128k;
}
else if (value.includes('gpt-4-vision')) {
return max_128k;
}
else if (value.includes('gpt-4-32k')) {
return max_32k;
}
@ -3217,6 +3287,27 @@ function updateScaleForm() {
}
}
/**
* Check if the model supports image inlining
* @returns {boolean} True if the model supports image inlining
*/
export function isImageInliningSupported() {
const modelId = 'gpt-4-vision';
if (!oai_settings.image_inlining) {
return false;
}
switch (oai_settings.chat_completion_source) {
case chat_completion_sources.OPENAI:
return oai_settings.openai_model.includes(modelId);
case chat_completion_sources.OPENROUTER:
return oai_settings.openrouter_model.includes(modelId);
default:
return false;
}
}
$(document).ready(async function () {
$('#test_api_button').on('click', testApiConnection);
@ -3463,6 +3554,11 @@ $(document).ready(async function () {
saveSettingsDebounced();
});
$('#openai_image_inlining').on('input', function () {
oai_settings.image_inlining = !!$(this).prop('checked');
saveSettingsDebounced();
});
$(document).on('input', '#openai_settings .autoSetHeight', function () {
resetScrollHeight($(this));
});