TTS: add slash command to speak
This commit is contained in:
parent
ca1c0e35b1
commit
f2a96427a8
|
@ -6697,6 +6697,7 @@ window["SillyTavern"].getContext = function () {
|
|||
deactivateSendButtons,
|
||||
saveReply,
|
||||
registerSlashCommand: registerSlashCommand,
|
||||
executeSlashCommands: executeSlashCommands,
|
||||
registerHelper: registerExtensionHelper,
|
||||
registedDebugFunction: registerDebugFunction,
|
||||
renderExtensionTemplate: renderExtensionTemplate,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import { callPopup, cancelTtsPlay, eventSource, event_types, saveSettingsDebounced } from '../../../script.js'
|
||||
import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js'
|
||||
import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js'
|
||||
import { escapeRegex, getStringHash } from '../../utils.js'
|
||||
import { EdgeTtsProvider } from './edge.js'
|
||||
|
@ -8,6 +8,7 @@ import { CoquiTtsProvider } from './coqui.js'
|
|||
import { SystemTtsProvider } from './system.js'
|
||||
import { NovelTtsProvider } from './novel.js'
|
||||
import { power_user } from '../../power-user.js'
|
||||
import { registerSlashCommand } from '../../slash-commands.js'
|
||||
export { talkingAnimation };
|
||||
|
||||
const UPDATE_INTERVAL = 1000
|
||||
|
@ -91,6 +92,36 @@ async function onNarrateOneMessage() {
|
|||
moduleWorker();
|
||||
}
|
||||
|
||||
async function onNarrateText(args, text) {
|
||||
if (!text) {
|
||||
return;
|
||||
}
|
||||
|
||||
audioElement.src = '/sounds/silence.mp3';
|
||||
|
||||
// To load all characters in the voice map, set unrestricted to true
|
||||
await initVoiceMap(true);
|
||||
|
||||
const baseName = args?.voice || name2;
|
||||
const name = (baseName === 'SillyTavern System' ? DEFAULT_VOICE_MARKER : baseName) || DEFAULT_VOICE_MARKER;
|
||||
|
||||
const voiceMapEntry = voiceMap[name] === DEFAULT_VOICE_MARKER
|
||||
? voiceMap[DEFAULT_VOICE_MARKER]
|
||||
: voiceMap[name];
|
||||
|
||||
if (!voiceMapEntry || voiceMapEntry === DISABLED_VOICE_MARKER) {
|
||||
toastr.info(`Specified voice for ${name} was not found. Check the TTS extension settings.`);
|
||||
return;
|
||||
}
|
||||
|
||||
resetTtsPlayback()
|
||||
ttsJobQueue.push({ mes: text, name: name });
|
||||
await moduleWorker();
|
||||
|
||||
// Return back to the chat voices
|
||||
await initVoiceMap(false);
|
||||
}
|
||||
|
||||
async function moduleWorker() {
|
||||
// Primarily determining when to add new chat to the TTS queue
|
||||
const enabled = $('#tts_enabled').is(':checked')
|
||||
|
@ -636,10 +667,22 @@ async function onChatChanged() {
|
|||
await initVoiceMap()
|
||||
}
|
||||
|
||||
function getCharacters(){
|
||||
/**
|
||||
* Get characters in current chat
|
||||
* @param {boolean} unrestricted - If true, will include all characters in voiceMapEntries, even if they are not in the current chat.
|
||||
* @returns {string[]} - Array of character names
|
||||
*/
|
||||
function getCharacters(unrestricted) {
|
||||
const context = getContext()
|
||||
|
||||
if (unrestricted) {
|
||||
const names = context.characters.map(char => char.name);
|
||||
names.unshift(DEFAULT_VOICE_MARKER);
|
||||
return names;
|
||||
}
|
||||
|
||||
let characters = []
|
||||
if (context.groupId === null){
|
||||
if (context.groupId === null) {
|
||||
// Single char chat
|
||||
characters.push(DEFAULT_VOICE_MARKER)
|
||||
characters.push(context.name1)
|
||||
|
@ -651,7 +694,7 @@ function getCharacters(){
|
|||
const group = context.groups.find(group => context.groupId == group.id)
|
||||
for (let member of group.members) {
|
||||
// Remove suffix
|
||||
if (member.endsWith('.png')){
|
||||
if (member.endsWith('.png')) {
|
||||
member = member.slice(0, -4)
|
||||
}
|
||||
characters.push(member)
|
||||
|
@ -661,15 +704,15 @@ function getCharacters(){
|
|||
}
|
||||
|
||||
function sanitizeId(input) {
|
||||
// Remove any non-alphanumeric characters except underscore (_) and hyphen (-)
|
||||
let sanitized = input.replace(/[^a-zA-Z0-9-_]/g, '');
|
||||
// Remove any non-alphanumeric characters except underscore (_) and hyphen (-)
|
||||
let sanitized = input.replace(/[^a-zA-Z0-9-_]/g, '');
|
||||
|
||||
// Ensure first character is always a letter
|
||||
if (!/^[a-zA-Z]/.test(sanitized)) {
|
||||
sanitized = 'element_' + sanitized;
|
||||
}
|
||||
// Ensure first character is always a letter
|
||||
if (!/^[a-zA-Z]/.test(sanitized)) {
|
||||
sanitized = 'element_' + sanitized;
|
||||
}
|
||||
|
||||
return sanitized;
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
function parseVoiceMap(voiceMapString) {
|
||||
|
@ -691,13 +734,13 @@ function parseVoiceMap(voiceMapString) {
|
|||
*/
|
||||
function updateVoiceMap() {
|
||||
const tempVoiceMap = {}
|
||||
for (const voice of voiceMapEntries){
|
||||
if (voice.voiceId === null){
|
||||
for (const voice of voiceMapEntries) {
|
||||
if (voice.voiceId === null) {
|
||||
continue
|
||||
}
|
||||
tempVoiceMap[voice.name] = voice.voiceId
|
||||
}
|
||||
if (Object.keys(tempVoiceMap).length !== 0){
|
||||
if (Object.keys(tempVoiceMap).length !== 0) {
|
||||
voiceMap = tempVoiceMap
|
||||
console.log(`Voicemap updated to ${JSON.stringify(voiceMap)}`)
|
||||
}
|
||||
|
@ -712,13 +755,13 @@ class VoiceMapEntry {
|
|||
name
|
||||
voiceId
|
||||
selectElement
|
||||
constructor (name, voiceId=DEFAULT_VOICE_MARKER) {
|
||||
constructor(name, voiceId = DEFAULT_VOICE_MARKER) {
|
||||
this.name = name
|
||||
this.voiceId = voiceId
|
||||
this.selectElement = null
|
||||
}
|
||||
|
||||
addUI(voiceIds){
|
||||
addUI(voiceIds) {
|
||||
let sanitizedName = sanitizeId(this.name)
|
||||
let defaultOption = this.name === DEFAULT_VOICE_MARKER ?
|
||||
`<option>${DISABLED_VOICE_MARKER}</option>` :
|
||||
|
@ -734,7 +777,7 @@ class VoiceMapEntry {
|
|||
$('#tts_voicemap_block').append(template)
|
||||
|
||||
// Populate voice ID select list
|
||||
for (const voiceId of voiceIds){
|
||||
for (const voiceId of voiceIds) {
|
||||
const option = document.createElement('option');
|
||||
option.innerText = voiceId.name;
|
||||
option.value = voiceId.name;
|
||||
|
@ -754,12 +797,12 @@ class VoiceMapEntry {
|
|||
|
||||
/**
|
||||
* Init voiceMapEntries for character select list.
|
||||
*
|
||||
* @param {boolean} unrestricted - If true, will include all characters in voiceMapEntries, even if they are not in the current chat.
|
||||
*/
|
||||
export async function initVoiceMap(){
|
||||
export async function initVoiceMap(unrestricted = false) {
|
||||
// Gate initialization if not enabled or TTS Provider not ready. Prevents error popups.
|
||||
const enabled = $('#tts_enabled').is(':checked')
|
||||
if (!enabled){
|
||||
if (!enabled) {
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -779,16 +822,16 @@ export async function initVoiceMap(){
|
|||
voiceMapEntries = []
|
||||
|
||||
// Get characters in current chat
|
||||
const characters = getCharacters()
|
||||
const characters = getCharacters(unrestricted);
|
||||
|
||||
// Get saved voicemap from provider settings, handling new and old representations
|
||||
let voiceMapFromSettings = {}
|
||||
if ("voiceMap" in extension_settings.tts[ttsProviderName]) {
|
||||
// Handle previous representation
|
||||
if (typeof extension_settings.tts[ttsProviderName].voiceMap === "string"){
|
||||
if (typeof extension_settings.tts[ttsProviderName].voiceMap === "string") {
|
||||
voiceMapFromSettings = parseVoiceMap(extension_settings.tts[ttsProviderName].voiceMap)
|
||||
// Handle new representation
|
||||
} else if (typeof extension_settings.tts[ttsProviderName].voiceMap === "object"){
|
||||
// Handle new representation
|
||||
} else if (typeof extension_settings.tts[ttsProviderName].voiceMap === "object") {
|
||||
voiceMapFromSettings = extension_settings.tts[ttsProviderName].voiceMap
|
||||
}
|
||||
}
|
||||
|
@ -803,13 +846,13 @@ export async function initVoiceMap(){
|
|||
}
|
||||
|
||||
// Build UI using VoiceMapEntry objects
|
||||
for (const character of characters){
|
||||
if (character === "SillyTavern System"){
|
||||
for (const character of characters) {
|
||||
if (character === "SillyTavern System") {
|
||||
continue
|
||||
}
|
||||
// Check provider settings for voiceIds
|
||||
let voiceId
|
||||
if (character in voiceMapFromSettings){
|
||||
if (character in voiceMapFromSettings) {
|
||||
voiceId = voiceMapFromSettings[character]
|
||||
} else if (character === DEFAULT_VOICE_MARKER) {
|
||||
voiceId = DISABLED_VOICE_MARKER
|
||||
|
@ -904,4 +947,5 @@ $(document).ready(function () {
|
|||
eventSource.on(event_types.MESSAGE_SWIPED, resetTtsPlayback);
|
||||
eventSource.on(event_types.CHAT_CHANGED, onChatChanged)
|
||||
eventSource.on(event_types.GROUP_UPDATED, onChatChanged)
|
||||
registerSlashCommand('speak', onNarrateText, ['narrate', 'tts'], `<span class="monospace">(text)</span> – narrate any text using currently selected character's voice. Use voice="Character Name" argument to set other voice from the voice map, example: <tt>/speak voice="Donald Duck" Quack!</tt>`, true, true);
|
||||
})
|
||||
|
|
|
@ -146,8 +146,8 @@ class SystemTtsProvider {
|
|||
$('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch);
|
||||
|
||||
// Trigger updates
|
||||
$('#system_tts_rate').on("input", () =>{this.onSettingsChange()})
|
||||
$('#system_tts_rate').on("input", () => {this.onSettingsChange()})
|
||||
$('#system_tts_rate').on("input", () => { this.onSettingsChange() })
|
||||
$('#system_tts_rate').on("input", () => { this.onSettingsChange() })
|
||||
|
||||
$('#system_tts_pitch_output').text(this.settings.pitch);
|
||||
$('#system_tts_rate_output').text(this.settings.rate);
|
||||
|
@ -155,7 +155,7 @@ class SystemTtsProvider {
|
|||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady(){
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects()
|
||||
}
|
||||
|
||||
|
@ -171,10 +171,16 @@ class SystemTtsProvider {
|
|||
return [];
|
||||
}
|
||||
|
||||
return speechSynthesis
|
||||
.getVoices()
|
||||
.sort((a, b) => a.lang.localeCompare(b.lang) || a.name.localeCompare(b.name))
|
||||
.map(x => ({ name: x.name, voice_id: x.voiceURI, preview_url: false, lang: x.lang }));
|
||||
return new Promise((resolve) => {
|
||||
setTimeout(() => {
|
||||
const voices = speechSynthesis
|
||||
.getVoices()
|
||||
.sort((a, b) => a.lang.localeCompare(b.lang) || a.name.localeCompare(b.name))
|
||||
.map(x => ({ name: x.name, voice_id: x.voiceURI, preview_url: false, lang: x.lang }));
|
||||
|
||||
resolve(voices);
|
||||
}, 1);
|
||||
});
|
||||
}
|
||||
|
||||
previewTtsVoice(voiceId) {
|
||||
|
|
|
@ -82,7 +82,8 @@ class SlashCommandParser {
|
|||
if (equalsIndex !== -1) {
|
||||
const key = arg.substring(0, equalsIndex);
|
||||
const value = arg.substring(equalsIndex + 1);
|
||||
argObj[key] = value;
|
||||
// Replace "wrapping quotes" used for escaping spaces
|
||||
argObj[key] = value.replace(/(^")|("$)/g, '');
|
||||
}
|
||||
else {
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue