Add a function tool for image generation

This commit is contained in:
Cohee 2024-10-04 13:04:19 +00:00
parent cc3cc58a06
commit c853547b11
2 changed files with 58 additions and 0 deletions

View File

@ -32,6 +32,7 @@ import { debounce_timeout } from '../../constants.js';
import { SlashCommandEnumValue } from '../../slash-commands/SlashCommandEnumValue.js';
import { POPUP_RESULT, POPUP_TYPE, Popup, callGenericPopup } from '../../popup.js';
import { commonEnumProviders } from '../../slash-commands/SlashCommandCommonEnumsProvider.js';
import { ToolManager } from '../../tool-calling.js';
export { MODULE_NAME };
const MODULE_NAME = 'sd';
@ -62,6 +63,7 @@ const initiators = {
interactive: 'interactive',
wand: 'wand',
swipe: 'swipe',
tool: 'tool',
};
const generationMode = {
@ -226,6 +228,7 @@ const defaultSettings = {
multimodal_captioning: false,
snap: false,
free_extend: false,
function_tool: false,
prompts: promptTemplates,
@ -291,6 +294,10 @@ const defaultSettings = {
const writePromptFieldsDebounced = debounce(writePromptFields, debounce_timeout.relaxed);
function processTriggers(chat, _, abort) {
if (extension_settings.sd.function_tool && ToolManager.isToolCallingSupported()) {
return;
}
if (!extension_settings.sd.interactive_mode) {
return;
}
@ -447,6 +454,7 @@ async function loadSettings() {
$('#sd_interactive_visible').prop('checked', extension_settings.sd.interactive_visible);
$('#sd_stability_style_preset').val(extension_settings.sd.stability_style_preset);
$('#sd_huggingface_model_id').val(extension_settings.sd.huggingface_model_id);
$('#sd_function_tool').prop('checked', extension_settings.sd.function_tool);
for (const style of extension_settings.sd.styles) {
const option = document.createElement('option');
@ -461,6 +469,7 @@ async function loadSettings() {
toggleSourceControls();
addPromptTemplates();
registerFunctionTool();
await loadSettingOptions();
}
@ -910,6 +919,12 @@ async function onSourceChange() {
await loadSettingOptions();
}
function onFunctionToolInput() {
extension_settings.sd.function_tool = !!$(this).prop('checked');
saveSettingsDebounced();
registerFunctionTool();
}
async function onOpenAiStyleSelect() {
extension_settings.sd.openai_style = String($('#sd_openai_style').find(':selected').val());
saveSettingsDebounced();
@ -3822,6 +3837,44 @@ function applyCommandArguments(args) {
return currentSettings;
}
function registerFunctionTool() {
if (!extension_settings.sd.function_tool) {
return ToolManager.unregisterFunctionTool('GenerateImage');
}
ToolManager.registerFunctionTool({
name: 'GenerateImage',
displayName: 'Generate Image',
description: [
'Generate an image from a given text prompt.',
'Use when a user asks for an image, a selfie, to picture a scene, etc.',
].join(' '),
parameters: Object.freeze({
$schema: 'http://json-schema.org/draft-04/schema#',
type: 'object',
properties: {
prompt: {
type: 'string',
description: [
'The text prompt used to generate the image.',
'Must represent an exhaustive description of the desired image that will allow an artist or a photographer to perfectly recreate it.',
],
},
},
required: [
'prompt',
],
}),
action: async (args) => {
if (!isValidState()) throw new Error('Image generation is not configured.');
if (!args) throw new Error('Missing arguments');
if (!args.prompt) throw new Error('Missing prompt');
return generatePicture(initiators.tool, {}, args.prompt);
},
formatMessage: () => 'Generating an image...',
});
}
jQuery(async () => {
await addSDGenButtons();
@ -4175,6 +4228,7 @@ jQuery(async () => {
$('#sd_stability_key').on('click', onStabilityKeyClick);
$('#sd_stability_style_preset').on('change', onStabilityStylePresetChange);
$('#sd_huggingface_model_id').on('input', onHFModelInput);
$('#sd_function_tool').on('input', onFunctionToolInput);
if (!CSS.supports('field-sizing', 'content')) {
$('.sd_settings .inline-drawer-toggle').on('click', function () {

View File

@ -18,6 +18,10 @@
<input id="sd_interactive_mode" type="checkbox" />
<span data-i18n="sd_interactive_mode_txt">Interactive mode</span>
</label>
<label for="sd_function_tool" class="checkbox_label" data-i18n="[title]sd_function_tool" title="Use the function tool to automatically detect intents to generate images.">
<input id="sd_function_tool" type="checkbox" />
<span data-i18n="sd_function_tool_txt">Enable function tool</span>
</label>
<label for="sd_multimodal_captioning" class="checkbox_label" data-i18n="[title]sd_multimodal_captioning" title="Use multimodal captioning to generate prompts for user and character portraits based on their avatars.">
<input id="sd_multimodal_captioning" type="checkbox" />
<span data-i18n="sd_multimodal_captioning_txt">Use multimodal captioning for portraits</span>