diff --git a/public/scripts/extensions/stable-diffusion/index.js b/public/scripts/extensions/stable-diffusion/index.js
index 31e05c321..ad12381a7 100644
--- a/public/scripts/extensions/stable-diffusion/index.js
+++ b/public/scripts/extensions/stable-diffusion/index.js
@@ -34,39 +34,40 @@ const generationMode = {
}
const triggerWords = {
- [generationMode.CHARACTER]: ['yourself', 'you', 'bot', 'AI', 'character'],
- [generationMode.USER]: ['me', 'user', 'myself'],
- [generationMode.SCENARIO]: ['scenario', 'world', 'surroundings', 'scenery'],
- [generationMode.NOW]: ['now', 'last'],
- [generationMode.FACE]: ['selfie', 'face'],
+ [generationMode.CHARACTER]: ['you'],
+ [generationMode.USER]: ['me'],
+ [generationMode.SCENARIO]: ['scene'],
+ [generationMode.NOW]: ['last'],
+ [generationMode.FACE]: ['face'],
}
const quietPrompts = {
//face-specific prompt
- [generationMode.FACE]: "[In the next reponse I want you to provide only a detailed comma-delimited list of keywords and phrases which describe {{char}}. The list must include all of the following items in this order: species and race, gender, age, facial features and expresisons, occupation, hair and hair accessories (if any), what they are wearing on their upper body (if anything). Do not describe anything below their neck. Do not include descriptions of non-visual qualities such as personality, movements, scents, mental traits, or anything which could not be seen in a still photograph. Do not write in full sentences. Prefix your description with the phrase 'close up facial portrait:']",
+ [generationMode.FACE]: "[In the next response I want you to provide only a detailed comma-delimited list of keywords and phrases which describe {{char}}. The list must include all of the following items in this order: name, species and race, gender, age, facial features and expressions, occupation, hair and hair accessories (if any), what they are wearing on their upper body (if anything). Do not describe anything below their neck. Do not include descriptions of non-visual qualities such as personality, movements, scents, mental traits, or anything which could not be seen in a still photograph. Do not write in full sentences. Prefix your description with the phrase 'close up facial portrait:']",
//prompt for only the last message
[generationMode.NOW]: "[Pause your roleplay and provide a brief description of the last chat message. Focus on visual details, clothing, actions. Ignore the emotions and thoughts of {{char}} and {{user}} as well as any spoken dialog. Do not roleplay as {{char}} while writing this description. Do not continue the roleplay story.]",
- [generationMode.CHARACTER]: "[In the next reponse I want you to provide only a detailed comma-delimited list of keywords and phrases which describe {{char}}. The list must include all of the following items in this order: species and race, gender, age, clothing, occupation, physical features and appearances. Do not include descriptions of non-visual qualities such as personality, movements, scents, mental traits, or anything which could not be seen in a still photograph. Do not write in full sentences. Prefix your description with the phrase 'full body portrait:']",
+ [generationMode.CHARACTER]: "[In the next response I want you to provide only a detailed comma-delimited list of keywords and phrases which describe {{char}}. The list must include all of the following items in this order: name, species and race, gender, age, clothing, occupation, physical features and appearances. Do not include descriptions of non-visual qualities such as personality, movements, scents, mental traits, or anything which could not be seen in a still photograph. Do not write in full sentences. Prefix your description with the phrase 'full body portrait:']",
/*OLD: [generationMode.CHARACTER]: "Pause your roleplay and provide comma-delimited list of phrases and keywords which describe {{char}}'s physical appearance and clothing. Ignore {{char}}'s personality traits, and chat history when crafting this description. End your response once the comma-delimited list is complete. Do not roleplay when writing this description, and do not attempt to continue the story.", */
[generationMode.USER]: "[Pause your roleplay and provide a detailed description of {{user}}'s appearance from the perspective of {{char}} in the form of a comma-delimited list of keywords and phrases. Ignore the rest of the story when crafting this description. Do not roleplay as {{char}}}} when writing this description, and do not attempt to continue the story.]",
[generationMode.SCENARIO]: "[Pause your roleplay and provide a detailed description for all of the following: a brief recap of recent events in the story, {{char}}'s appearance, and {{char}}'s surroundings. Do not roleplay while writing this description.]",
- [generationMode.FREE]: "[Pause your roleplay and provide ONLY echo this string back to me verbatim: {0}. Do not write anything after the string. Do not roleplay at all in your response.]",
+ [generationMode.FREE]: "[Pause your roleplay and provide ONLY an echo this string back to me verbatim: {0}. Do not write anything after the string. Do not roleplay at all in your response.]",
}
const helpString = [
- `${m('what')} – requests an SD generation. Supported "what" arguments:`,
+ `${m('(argument)')} – requests SD to make an image. Supported arguments:`,
'
',
- `${m(j(triggerWords[generationMode.CHARACTER]))} – AI character image `,
- `${m(j(triggerWords[generationMode.USER]))} – user character image `,
- `${m(j(triggerWords[generationMode.SCENARIO]))} – world scenario image `,
- `${m(j(triggerWords[generationMode.FACE]))} – character face-up selfie image `,
+ `${m(j(triggerWords[generationMode.CHARACTER]))} – AI character full body selfie `,
+ `${m(j(triggerWords[generationMode.FACE]))} – AI character face-only selfie `,
+ `${m(j(triggerWords[generationMode.USER]))} – user character full body selfie `,
+ `${m(j(triggerWords[generationMode.SCENARIO]))} – visual recap of the whole chat scenario `,
`${m(j(triggerWords[generationMode.NOW]))} – visual recap of the last chat message `,
' ',
- `Anything else would trigger a "free mode" with AI describing whatever you prompted.`,
+ `Anything else would trigger a "free mode" to make SD generate whatever you prompted.
+ example: '/sd apple tree' would generate a picture of an apple tree.`,
].join('
');
const defaultSettings = {
@@ -236,9 +237,17 @@ function getQuietPrompt(mode, trigger) {
function processReply(str) {
str = str.replaceAll('"', '')
str = str.replaceAll('“', '')
- str = str.replaceAll('\n', ' ')
+ str = str.replaceAll('\n', ', ')
+ str = str.replace(/[^a-zA-Z0-9,:]+/g, ' ') // Replace everything except alphanumeric characters and commas with spaces
+ str = str.replace(/\s+/g, ' '); // Collapse multiple whitespaces into one
str = str.trim();
+ str = str
+ .split(',') // list split by commas
+ .map(x => x.trim()) // trim each entry
+ .filter(x => x) // remove empty entries
+ .join(', '); // join it back with proper spacing
+
return str;
}
@@ -258,7 +267,7 @@ async function generatePicture(_, trigger) {
const prompt = processReply(await new Promise(
async function promptPromise(resolve, reject) {
try {
- await context.generate('quiet', { resolve, reject, quiet_prompt });
+ await context.generate('quiet', { resolve, reject, quiet_prompt, force_name2: true, });
}
catch {
reject();
@@ -268,6 +277,8 @@ async function generatePicture(_, trigger) {
context.deactivateSendButtons();
hideSwipeButtons();
+ console.log('Processed Stable Diffusion prompt:', prompt);
+
const url = new URL(getApiUrl());
url.pathname = '/api/image';
const result = await fetch(url, {
@@ -294,7 +305,7 @@ async function generatePicture(_, trigger) {
sendMessage(prompt, base64Image);
}
} catch (err) {
- console.error(err);
+ console.trace(err);
throw new Error('SD prompt text generation failed.')
}
finally {
@@ -325,7 +336,7 @@ async function sendMessage(prompt, image) {
function addSDGenButtons() {
const buttonHtml = `
-
+
diff --git a/public/scripts/extensions/tts/elevenlabs.js b/public/scripts/extensions/tts/elevenlabs.js
index 0429f5ea8..65c3cf302 100644
--- a/public/scripts/extensions/tts/elevenlabs.js
+++ b/public/scripts/extensions/tts/elevenlabs.js
@@ -7,6 +7,7 @@ class ElevenLabsTtsProvider {
settings
voices = []
+ separator = ' ... ... ... '
get settings() {
return this.settings
diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js
index 2f9f3a4fb..4eeded46c 100644
--- a/public/scripts/extensions/tts/index.js
+++ b/public/scripts/extensions/tts/index.js
@@ -48,10 +48,8 @@ async function moduleWorker() {
return;
}
- // Chat/character/group changed
+ // Chat changed
if (
- (context.groupId && lastGroupId !== context.groupId) ||
- context.characterId !== lastCharacterId ||
context.chatId !== lastChatId
) {
currentMessageNumber = context.chat.length ? context.chat.length : 0
@@ -75,6 +73,7 @@ async function moduleWorker() {
// We're currently swiping or streaming. Don't generate voice
if (
message.mes === '...' ||
+ message.mes === '' ||
(context.streamingProcessor && !context.streamingProcessor.isFinished)
) {
return
@@ -164,7 +163,7 @@ function onAudioControlClicked() {
function addAudioControl() {
$('#send_but_sheld').prepend('
')
- $('#send_but_sheld').on('click', onAudioControlClicked)
+ $('#tts_media_control').attr('title', 'TTS play/pause').on('click', onAudioControlClicked)
audioControl = document.getElementById('tts_media_control')
updateUiAudioPlayState()
}
@@ -181,7 +180,7 @@ function completeCurrentAudioJob() {
*/
async function addAudioJob(response) {
const audioData = await response.blob()
- if (!audioData.type in ['audio/mpeg', 'audio/wav']) {
+ if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave']) {
throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}`
}
audioJobQueue.push(audioData)
@@ -240,12 +239,26 @@ async function processTtsQueue() {
console.debug('New message found, running TTS')
currentTtsJob = ttsJobQueue.shift()
- const text = extension_settings.tts.narrate_dialogues_only
- ? currentTtsJob.mes.replace(/\*[^\*]*?(\*|$)/g, '') // remove asterisks content
- : currentTtsJob.mes.replaceAll('*', '') // remove just the asterisks
+ let text = extension_settings.tts.narrate_dialogues_only
+ ? currentTtsJob.mes.replace(/\*[^\*]*?(\*|$)/g, '').trim() // remove asterisks content
+ : currentTtsJob.mes.replaceAll('*', '').trim() // remove just the asterisks
+
+ if (extension_settings.tts.narrate_quoted_only) {
+ const special_quotes = /[“”]/g; // Extend this regex to include other special quotes
+ text = text.replace(special_quotes, '"');
+ const matches = text.match(/".*?"/g); // Matches text inside double quotes, non-greedily
+ const partJoiner = (ttsProvider?.separator || ' ... ');
+ text = matches ? matches.join(partJoiner) : text;
+ }
+ console.log(`TTS: ${text}`)
const char = currentTtsJob.name
try {
+ if (!text) {
+ console.warn('Got empty text in TTS queue job.');
+ return;
+ }
+
if (!voiceMap[char]) {
throw `${char} not in voicemap. Configure character in extension settings voice map`
}
@@ -282,6 +295,7 @@ function loadSettings() {
extension_settings.tts.enabled
)
$('#tts_narrate_dialogues').prop('checked', extension_settings.tts.narrate_dialogues_only)
+ $('#tts_narrate_quoted').prop('checked', extension_settings.tts.narrate_quoted_only)
}
const defaultSettings = {
@@ -374,6 +388,13 @@ function onNarrateDialoguesClick() {
saveSettingsDebounced()
}
+
+function onNarrateQuotedClick() {
+ extension_settings.tts.narrate_quoted_only = $('#tts_narrate_quoted').prop('checked');
+ saveSettingsDebounced()
+}
+
+
//##############//
// TTS Provider //
//##############//
@@ -453,6 +474,10 @@ $(document).ready(function () {
Narrate dialogues only
+
+
+ Narrate quoted only
+