diff --git a/default/content/presets/instruct/Llama 3 Instruct.json b/default/content/presets/instruct/Llama 3 Instruct.json index 111501287..0a84b852d 100644 --- a/default/content/presets/instruct/Llama 3 Instruct.json +++ b/default/content/presets/instruct/Llama 3 Instruct.json @@ -18,7 +18,7 @@ "input_suffix": "<|eot_id|>", "system_suffix": "<|eot_id|>", "user_alignment_message": "", - "system_same_as_user": false, + "system_same_as_user": true, "last_system_sequence": "", "name": "Llama 3 Instruct" } diff --git a/package-lock.json b/package-lock.json index 04b122d5f..93c8ed8ee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -27,6 +27,7 @@ "form-data": "^4.0.0", "google-translate-api-browser": "^3.0.1", "gpt3-tokenizer": "^1.1.5", + "he": "^1.2.0", "helmet": "^7.1.0", "ip-matching": "^2.1.2", "ipaddr.js": "^2.0.1", @@ -2800,6 +2801,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "bin": { + "he": "bin/he" + } + }, "node_modules/helmet": { "version": "7.1.0", "resolved": "https://registry.npmjs.org/helmet/-/helmet-7.1.0.tgz", diff --git a/package.json b/package.json index 8f84c95b9..c40d267e4 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "form-data": "^4.0.0", "google-translate-api-browser": "^3.0.1", "gpt3-tokenizer": "^1.1.5", + "he": "^1.2.0", "helmet": "^7.1.0", "ip-matching": "^2.1.2", "ipaddr.js": "^2.0.1", diff --git a/public/css/st-tailwind.css b/public/css/st-tailwind.css index ecc55c074..44664ddfd 100644 --- a/public/css/st-tailwind.css +++ b/public/css/st-tailwind.css @@ -86,6 +86,10 @@ margin: 5px; } +.marginLeft5 { + margin-left: 5px; +} + .overflowYAuto { overflow-y: auto; } @@ -257,6 +261,10 @@ flex-basis: 48% } +.flexBasis30p { + flex-basis: 30%; +} + .flex-container { display: flex; gap: 5px; @@ -555,4 +563,4 @@ textarea:disabled { height: 30px; text-align: center; padding: 5px; -} +} \ No newline at end of file diff --git a/public/index.html b/public/index.html index 594f946ab..44c2254c4 100644 --- a/public/index.html +++ b/public/index.html @@ -141,10 +141,10 @@
- -
+
@@ -163,10 +163,10 @@
- -
+
@@ -184,7 +184,7 @@ -
+
@@ -200,7 +200,7 @@
-
+
@@ -321,7 +321,7 @@
- Repetition Penalty Range + Rep Pen Range
@@ -373,7 +373,7 @@
- Tail Free Sampling + TFS
@@ -788,7 +788,7 @@
-
+
Temperature
@@ -796,7 +796,7 @@
-
+
Top K
@@ -804,7 +804,7 @@
-
+
Top P
@@ -812,7 +812,7 @@
-
+
Typical P
@@ -820,7 +820,7 @@
-
+
Min P
@@ -828,7 +828,7 @@
-
+
Top A
@@ -836,29 +836,29 @@
-
+
- Tail Free Sampling + TFS
-
+
Repetition Penalty
-
+
- Repetition Penalty Range + Rep Pen Range
-
+
Repetition Penalty Slope @@ -892,7 +892,7 @@

-
+
-
+
Seed @@ -1185,7 +1185,7 @@
-
+
Temperature
@@ -1193,7 +1193,7 @@
-
+
Top K
@@ -1201,7 +1201,7 @@
-
+
Top P
@@ -1209,7 +1209,7 @@
-
+
Typical P
@@ -1217,7 +1217,7 @@
-
+
Min P
@@ -1225,7 +1225,7 @@
-
+
Top A
@@ -1233,15 +1233,15 @@
-
+
- Tail Free Sampling + TFS
-
+
Epsilon Cutoff
@@ -1249,7 +1249,7 @@
-
+
Eta Cutoff
@@ -1257,42 +1257,42 @@
-
+
Repetition Penalty
-
- Repetition Penalty Range +
+ Rep Pen Range
-
+
Encoder Penalty
-
+
Frequency Penalty
-
+
Presence Penalty
-
+
No Repeat Ngram Size
-
+
Min Length
-
+
Maximum tokens/second @@ -3544,7 +3544,7 @@ - @@ -4488,9 +4488,7 @@
@@ -6099,4 +6092,4 @@ - + \ No newline at end of file diff --git a/public/script.js b/public/script.js index a8c539e73..9355ef831 100644 --- a/public/script.js +++ b/public/script.js @@ -4132,8 +4132,12 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu // regenerate with character speech reenforced // to make sure we leave on swipe type while also adding the name2 appendage await delay(1000); + // A message was already deleted on regeneration, so instead treat is as a normal gen + if (type === 'regenerate') { + type = 'normal'; + } // The first await is for waiting for the generate to start. The second one is waiting for it to finish - const result = await await Generate(type, { automatic_trigger, force_name2: true, quiet_prompt, skipWIAN, force_chid, maxLoops: maxLoops - 1 }); + const result = await await Generate(type, { automatic_trigger, force_name2: true, quiet_prompt, quietToLoud, skipWIAN, force_chid, signal, quietImage, quietName, maxLoops: maxLoops - 1 }); return result; } @@ -6769,8 +6773,9 @@ function select_rm_info(type, charId, previousCharId = null) { importFlashTimeout = setTimeout(function () { if (type === 'char_import' || type === 'char_create') { // Find the page at which the character is located + const avatarFileName = `${charId}.png`; const charData = getEntitiesList({ doFilter: true }); - const charIndex = charData.findIndex((x) => x?.item?.avatar?.startsWith(charId)); + const charIndex = charData.findIndex((x) => x?.item?.avatar?.startsWith(avatarFileName)); if (charIndex === -1) { console.log(`Could not find character ${charId} in the list`); @@ -6780,7 +6785,7 @@ function select_rm_info(type, charId, previousCharId = null) { try { const perPage = Number(localStorage.getItem('Characters_PerPage')) || per_page_default; const page = Math.floor(charIndex / perPage) + 1; - const selector = `#rm_print_characters_block [title^="${charId}"]`; + const selector = `#rm_print_characters_block [title*="${avatarFileName}"]`; $('#rm_print_characters_pagination').pagination('go', page); waitUntilCondition(() => document.querySelector(selector) !== null).then(() => { diff --git a/public/scripts/PromptManager.js b/public/scripts/PromptManager.js index 4ab734156..308015326 100644 --- a/public/scripts/PromptManager.js +++ b/public/scripts/PromptManager.js @@ -1398,7 +1398,8 @@ class PromptManager { `; const rangeBlockDiv = promptManagerDiv.querySelector('.range-block'); - rangeBlockDiv.insertAdjacentHTML('beforeend', footerHtml); + const headerDiv = promptManagerDiv.querySelector('.completion_prompt_manager_header'); + headerDiv.insertAdjacentHTML('afterend', footerHtml); rangeBlockDiv.querySelector('#prompt-manager-reset-character').addEventListener('click', this.handleCharacterReset); const footerDiv = rangeBlockDiv.querySelector(`.${this.configuration.prefix}prompt_manager_footer`); @@ -1427,7 +1428,12 @@ class PromptManager { rangeBlockDiv.insertAdjacentHTML('beforeend', exportPopup); - let exportPopper = Popper.createPopper( + // Destroy previous popper instance if it exists + if (this.exportPopper) { + this.exportPopper.destroy(); + } + + this.exportPopper = Popper.createPopper( document.getElementById('prompt-manager-export'), document.getElementById('prompt-manager-export-format-popup'), { placement: 'bottom' }, @@ -1440,7 +1446,7 @@ class PromptManager { if (show) popup.removeAttribute('data-show'); else popup.setAttribute('data-show', ''); - exportPopper.update(); + this.exportPopper.update(); }; footerDiv.querySelector('#prompt-manager-import').addEventListener('click', this.handleImport); diff --git a/public/scripts/RossAscends-mods.js b/public/scripts/RossAscends-mods.js index 8504c65ee..7c12d5763 100644 --- a/public/scripts/RossAscends-mods.js +++ b/public/scripts/RossAscends-mods.js @@ -1202,7 +1202,7 @@ export function initRossMods() { if (event.ctrlKey && /^[1-9]$/.test(event.key)) { // This will eventually be to trigger quick replies - event.preventDefault(); + // event.preventDefault(); console.log('Ctrl +' + event.key + ' pressed!'); } } diff --git a/public/scripts/chats.js b/public/scripts/chats.js index 79293cbc7..a4082f6f0 100644 --- a/public/scripts/chats.js +++ b/public/scripts/chats.js @@ -32,6 +32,7 @@ import { getStringHash, humanFileSize, saveBase64AsFile, + extractTextFromOffice, } from './utils.js'; import { extension_settings, renderExtensionTemplateAsync, saveMetadataDebounced } from './extensions.js'; import { POPUP_RESULT, POPUP_TYPE, callGenericPopup } from './popup.js'; @@ -46,6 +47,12 @@ import { ScraperManager } from './scrapers.js'; * @property {string} [text] File text */ +/** + * @typedef {function} ConverterFunction + * @param {File} file File object + * @returns {Promise} Converted file text + */ + const fileSizeLimit = 1024 * 1024 * 10; // 10 MB const ATTACHMENT_SOURCE = { GLOBAL: 'global', @@ -53,20 +60,60 @@ const ATTACHMENT_SOURCE = { CHARACTER: 'character', }; +/** + * @type {Record} File converters + */ const converters = { 'application/pdf': extractTextFromPDF, 'text/html': extractTextFromHTML, 'text/markdown': extractTextFromMarkdown, 'application/epub+zip': extractTextFromEpub, + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': extractTextFromOffice, + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': extractTextFromOffice, + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': extractTextFromOffice, + 'application/vnd.oasis.opendocument.text': extractTextFromOffice, + 'application/vnd.oasis.opendocument.presentation': extractTextFromOffice, + 'application/vnd.oasis.opendocument.spreadsheet': extractTextFromOffice, }; +/** + * Finds a matching key in the converters object. + * @param {string} type MIME type + * @returns {string} Matching key + */ +function findConverterKey(type) { + return Object.keys(converters).find((key) => { + // Match exact type + if (type === key) { + return true; + } + + // Match wildcards + if (key.endsWith('*')) { + return type.startsWith(key.substring(0, key.length - 1)); + } + + return false; + }); +} + /** * Determines if the file type has a converter function. * @param {string} type MIME type * @returns {boolean} True if the file type is convertible, false otherwise. */ function isConvertible(type) { - return Object.keys(converters).includes(type); + return Boolean(findConverterKey(type)); +} + +/** + * Gets the converter function for a file type. + * @param {string} type MIME type + * @returns {ConverterFunction} Converter function + */ +function getConverter(type) { + const key = findConverterKey(type); + return key && converters[key]; } /** @@ -152,7 +199,7 @@ export async function populateFileAttachment(message, inputId = 'file_form_input if (isConvertible(file.type)) { try { - const converter = converters[file.type]; + const converter = getConverter(file.type); const fileText = await converter(file); base64Data = window.btoa(unescape(encodeURIComponent(fileText))); } catch (error) { @@ -584,18 +631,59 @@ async function openFilePopup(attachment) { callGenericPopup(modalTemplate, POPUP_TYPE.TEXT, '', { wide: true, large: true }); } +/** + * Edit a file attachment in a notepad-like modal. + * @param {FileAttachment} attachment Attachment to edit + * @param {string} source Attachment source + * @param {function} callback Callback function + */ +async function editAttachment(attachment, source, callback) { + const originalFileText = attachment.text || (await getFileAttachment(attachment.url)); + const template = $(await renderExtensionTemplateAsync('attachments', 'notepad')); + + let editedFileText = originalFileText; + template.find('[name="notepadFileContent"]').val(editedFileText).on('input', function () { + editedFileText = String($(this).val()); + }); + + let editedFileName = attachment.name; + template.find('[name="notepadFileName"]').val(editedFileName).on('input', function () { + editedFileName = String($(this).val()); + }); + + const result = await callGenericPopup(template, POPUP_TYPE.CONFIRM, '', { wide: true, large: true, okButton: 'Save', cancelButton: 'Cancel' }); + + if (result !== POPUP_RESULT.AFFIRMATIVE) { + return; + } + + if (editedFileText === originalFileText && editedFileName === attachment.name) { + return; + } + + const nullCallback = () => { }; + await deleteAttachment(attachment, source, nullCallback, false); + const file = new File([editedFileText], editedFileName, { type: 'text/plain' }); + await uploadFileAttachmentToServer(file, source); + + callback(); +} + /** * Deletes an attachment from the server and the chat. * @param {FileAttachment} attachment Attachment to delete * @param {string} source Source of the attachment * @param {function} callback Callback function + * @param {boolean} [confirm=true] If true, show a confirmation dialog * @returns {Promise} A promise that resolves when the attachment is deleted. */ -async function deleteAttachment(attachment, source, callback) { - const confirm = await callGenericPopup('Are you sure you want to delete this attachment?', POPUP_TYPE.CONFIRM); +async function deleteAttachment(attachment, source, callback, confirm = true) { + if (confirm) { + const result = await callGenericPopup('Are you sure you want to delete this attachment?', POPUP_TYPE.CONFIRM); - if (confirm !== POPUP_RESULT.AFFIRMATIVE) { - return; + if (result !== POPUP_RESULT.AFFIRMATIVE) { + return; + } } ensureAttachmentsExist(); @@ -672,6 +760,7 @@ async function openAttachmentManager() { attachmentTemplate.find('.attachmentListItemSize').text(humanFileSize(attachment.size)); attachmentTemplate.find('.attachmentListItemCreated').text(new Date(attachment.created).toLocaleString()); attachmentTemplate.find('.viewAttachmentButton').on('click', () => openFilePopup(attachment)); + attachmentTemplate.find('.editAttachmentButton').on('click', () => editAttachment(attachment, source, renderAttachments)); attachmentTemplate.find('.deleteAttachmentButton').on('click', () => deleteAttachment(attachment, source, renderAttachments)); template.find(sources[source]).append(attachmentTemplate); } @@ -748,7 +837,7 @@ async function openAttachmentManager() { } async function renderAttachments() { - /** @type {FileAttachment[]} */ + /** @type {FileAttachment[]} */ const globalAttachments = extension_settings.attachments ?? []; /** @type {FileAttachment[]} */ const chatAttachments = chat_metadata.attachments ?? []; @@ -842,7 +931,7 @@ async function runScraper(scraperId, target, callback) { * @param {string} target Target for the attachment * @returns */ -async function uploadFileAttachmentToServer(file, target) { +export async function uploadFileAttachmentToServer(file, target) { const isValid = await validateFile(file); if (!isValid) { @@ -855,7 +944,7 @@ async function uploadFileAttachmentToServer(file, target) { if (isConvertible(file.type)) { try { - const converter = converters[file.type]; + const converter = getConverter(file.type); const fileText = await converter(file); base64Data = window.btoa(unescape(encodeURIComponent(fileText))); } catch (error) { @@ -932,6 +1021,44 @@ export function getDataBankAttachments() { return [...globalAttachments, ...chatAttachments, ...characterAttachments]; } +/** + * Gets all attachments for a specific source. + * @param {string} source Attachment source + * @returns {FileAttachment[]} List of attachments + */ +export function getDataBankAttachmentsForSource(source) { + ensureAttachmentsExist(); + + switch (source) { + case ATTACHMENT_SOURCE.GLOBAL: + return extension_settings.attachments ?? []; + case ATTACHMENT_SOURCE.CHAT: + return chat_metadata.attachments ?? []; + case ATTACHMENT_SOURCE.CHARACTER: + return extension_settings.character_attachments?.[characters[this_chid]?.avatar] ?? []; + } +} + +/** + * Registers a file converter function. + * @param {string} mimeType MIME type + * @param {ConverterFunction} converter Function to convert file + * @returns {void} + */ +export function registerFileConverter(mimeType, converter) { + if (typeof mimeType !== 'string' || typeof converter !== 'function') { + console.error('Invalid converter registration'); + return; + } + + if (Object.keys(converters).includes(mimeType)) { + console.error('Converter already registered'); + return; + } + + converters[mimeType] = converter; +} + jQuery(function () { $(document).on('click', '.mes_hide', async function () { const messageBlock = $(this).closest('.mes'); diff --git a/public/scripts/extensions/attachments/manager.html b/public/scripts/extensions/attachments/manager.html index e18407771..ecfbe2fbb 100644 --- a/public/scripts/extensions/attachments/manager.html +++ b/public/scripts/extensions/attachments/manager.html @@ -102,6 +102,7 @@
+
diff --git a/public/scripts/extensions/attachments/notepad.html b/public/scripts/extensions/attachments/notepad.html new file mode 100644 index 000000000..899d4dfbb --- /dev/null +++ b/public/scripts/extensions/attachments/notepad.html @@ -0,0 +1,10 @@ +
+ + + + File Content + + +
diff --git a/public/scripts/extensions/attachments/youtube-scrape.html b/public/scripts/extensions/attachments/youtube-scrape.html new file mode 100644 index 000000000..298b08159 --- /dev/null +++ b/public/scripts/extensions/attachments/youtube-scrape.html @@ -0,0 +1,20 @@ +
+ + Enter a video URL or ID to download its transcript. + +
+ Examples: +
+
    +
  • https://www.youtube.com/watch?v=jV1vkHv4zq8
  • +
  • https://youtu.be/nlLhw1mtCFA
  • +
  • TDpxx5UqrVU
  • +
+ + + +
diff --git a/public/scripts/extensions/stable-diffusion/index.js b/public/scripts/extensions/stable-diffusion/index.js index 6618b66d4..d7b0cfc0b 100644 --- a/public/scripts/extensions/stable-diffusion/index.js +++ b/public/scripts/extensions/stable-diffusion/index.js @@ -37,6 +37,8 @@ const p = a => `

${a}

`; const MODULE_NAME = 'sd'; const UPDATE_INTERVAL = 1000; +// This is a 1x1 transparent PNG +const PNG_PIXEL = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='; const sources = { extras: 'extras', @@ -2650,6 +2652,8 @@ async function generateComfyImage(prompt, negativePrompt) { const avatarBlob = await response.blob(); const avatarBase64 = await getBase64Async(avatarBlob); workflow = workflow.replace('"%user_avatar%"', JSON.stringify(avatarBase64)); + } else { + workflow = workflow.replace('"%user_avatar%"', JSON.stringify(PNG_PIXEL)); } } if (/%char_avatar%/gi.test(workflow)) { @@ -2658,6 +2662,8 @@ async function generateComfyImage(prompt, negativePrompt) { const avatarBlob = await response.blob(); const avatarBase64 = await getBase64Async(avatarBlob); workflow = workflow.replace('"%char_avatar%"', JSON.stringify(avatarBase64)); + } else { + workflow = workflow.replace('"%char_avatar%"', JSON.stringify(PNG_PIXEL)); } } console.log(`{ diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index f00641082..6203bd213 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -1,4 +1,4 @@ -import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js'; +import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced, substituteParams } from '../../../script.js'; import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js'; import { delay, escapeRegex, getBase64Async, getStringHash, onlyUnique } from '../../utils.js'; import { EdgeTtsProvider } from './edge.js'; @@ -425,6 +425,9 @@ async function processTtsQueue() { currentTtsJob = ttsJobQueue.shift(); let text = extension_settings.tts.narrate_translated_only ? (currentTtsJob?.extra?.display_text || currentTtsJob.mes) : currentTtsJob.mes; + // Substitute macros + text = substituteParams(text); + if (extension_settings.tts.skip_codeblocks) { text = text.replace(/^\s{4}.*$/gm, '').trim(); text = text.replace(/```.*?```/gs, '').trim(); diff --git a/public/scripts/extensions/vectors/index.js b/public/scripts/extensions/vectors/index.js index d228326f1..6ff5c6af5 100644 --- a/public/scripts/extensions/vectors/index.js +++ b/public/scripts/extensions/vectors/index.js @@ -53,6 +53,7 @@ const settings = { // For files enabled_files: false, + translate_files: false, size_threshold: 10, chunk_size: 5000, chunk_count: 2, @@ -437,6 +438,12 @@ async function retrieveFileChunks(queryText, collectionId) { */ async function vectorizeFile(fileText, fileName, collectionId, chunkSize) { try { + if (settings.translate_files && typeof window['translate'] === 'function') { + console.log(`Vectors: Translating file ${fileName} to English...`); + const translatedText = await window['translate'](fileText, 'en'); + fileText = translatedText; + } + const toast = toastr.info('Vectorization may take some time, please wait...', `Ingesting file ${fileName}`); const chunks = splitRecursive(fileText, chunkSize); console.debug(`Vectors: Split file ${fileName} into ${chunks.length} chunks`, chunks); @@ -1121,6 +1128,12 @@ jQuery(async () => { saveSettingsDebounced(); }); + $('#vectors_translate_files').prop('checked', settings.translate_files).on('input', () => { + settings.translate_files = !!$('#vectors_translate_files').prop('checked'); + Object.assign(extension_settings.vectors, settings); + saveSettingsDebounced(); + }); + const validSecret = !!secret_state[SECRET_KEYS.NOMICAI]; const placeholder = validSecret ? '✔️ Key saved' : '❌ Missing key'; $('#api_key_nomicai').attr('placeholder', placeholder); diff --git a/public/scripts/extensions/vectors/settings.html b/public/scripts/extensions/vectors/settings.html index cdb91981b..02499d120 100644 --- a/public/scripts/extensions/vectors/settings.html +++ b/public/scripts/extensions/vectors/settings.html @@ -107,6 +107,13 @@
+
Message attachments
diff --git a/public/scripts/group-chats.js b/public/scripts/group-chats.js index 3ce9f0d29..2396116d8 100644 --- a/public/scripts/group-chats.js +++ b/public/scripts/group-chats.js @@ -188,9 +188,7 @@ export async function getGroupChat(groupId, reload = false) { if (Array.isArray(data) && data.length) { data[0].is_group = true; - for (let key of data) { - chat.push(key); - } + chat.splice(0, chat.length, ...data); await printMessages(); } else { sendSystemMessage(system_message_types.GROUP, '', { isSmallSys: true }); diff --git a/public/scripts/macros.js b/public/scripts/macros.js index 74c89b715..05f3f7b66 100644 --- a/public/scripts/macros.js +++ b/public/scripts/macros.js @@ -1,4 +1,4 @@ -import { chat, chat_metadata, main_api, getMaxContextSize, getCurrentChatId } from '../script.js'; +import { chat, chat_metadata, main_api, getMaxContextSize, getCurrentChatId, substituteParams } from '../script.js'; import { timestampToMoment, isDigitsOnly, getStringHash } from './utils.js'; import { textgenerationwebui_banned_in_macros } from './textgen-settings.js'; import { replaceInstructMacros } from './instruct-mode.js'; @@ -6,6 +6,12 @@ import { replaceVariableMacros } from './variables.js'; // Register any macro that you want to leave in the compiled story string Handlebars.registerHelper('trim', () => '{{trim}}'); +// Catch-all helper for any macro that is not defined for story strings +Handlebars.registerHelper('helperMissing', function () { + const options = arguments[arguments.length - 1]; + const macroName = options.name; + return substituteParams(`{{${macroName}}}`); +}); /** * Gets a hashed id of the current chat from the metadata. diff --git a/public/scripts/scrapers.js b/public/scripts/scrapers.js index 5ac3c5dc5..10529ee89 100644 --- a/public/scripts/scrapers.js +++ b/public/scripts/scrapers.js @@ -77,6 +77,52 @@ export class ScraperManager { } } +/** + * Create a text file from a string. + * @implements {Scraper} + */ +class Notepad { + constructor() { + this.id = 'text'; + this.name = 'Notepad'; + this.description = 'Create a text file from scratch.'; + this.iconClass = 'fa-solid fa-note-sticky'; + } + + /** + * Check if the scraper is available. + * @returns {Promise} + */ + async isAvailable() { + return true; + } + + /** + * Create a text file from a string. + * @returns {Promise} File attachments scraped from the text + */ + async scrape() { + const template = $(await renderExtensionTemplateAsync('attachments', 'notepad', {})); + let fileName = `Untitled - ${new Date().toLocaleString()}`; + let text = ''; + template.find('input[name="notepadFileName"]').val(fileName).on('input', function () { + fileName = String($(this).val()).trim(); + }); + template.find('textarea[name="notepadFileContent"]').on('input', function () { + text = String($(this).val()); + }); + + const result = await callGenericPopup(template, POPUP_TYPE.CONFIRM, '', { wide: true, large: true, okButton: 'Save', cancelButton: 'Cancel' }); + + if (!result || text === '') { + return; + } + + const file = new File([text], `Notepad - ${fileName}.txt`, { type: 'text/plain' }); + return [file]; + } +} + /** * Scrape data from a webpage. * @implements {Scraper} @@ -93,8 +139,8 @@ class WebScraper { * Check if the scraper is available. * @returns {Promise} */ - isAvailable() { - return Promise.resolve(true); + async isAvailable() { + return true; } /** @@ -167,8 +213,8 @@ class FileScraper { * Check if the scraper is available. * @returns {Promise} */ - isAvailable() { - return Promise.resolve(true); + async isAvailable() { + return true; } /** @@ -179,7 +225,7 @@ class FileScraper { return new Promise(resolve => { const fileInput = document.createElement('input'); fileInput.type = 'file'; - fileInput.accept = '.txt, .md, .pdf, .html, .htm, .epub'; + fileInput.accept = '*/*'; fileInput.multiple = true; fileInput.onchange = () => resolve(Array.from(fileInput.files)); fileInput.click(); @@ -199,6 +245,10 @@ class FandomScraper { this.iconClass = 'fa-solid fa-fire'; } + /** + * Check if the scraper is available. + * @returns {Promise} + */ async isAvailable() { try { const result = await fetch('/api/plugins/fandom/probe', { @@ -289,6 +339,78 @@ class FandomScraper { } } +/** + * Scrape transcript from a YouTube video. + * @implements {Scraper} + */ +class YouTubeScraper { + constructor() { + this.id = 'youtube'; + this.name = 'YouTube'; + this.description = 'Download a transcript from a YouTube video.'; + this.iconClass = 'fa-solid fa-closed-captioning'; + } + + /** + * Check if the scraper is available. + * @returns {Promise} + */ + async isAvailable() { + return true; + } + + /** + * Parse the ID of a YouTube video from a URL. + * @param {string} url URL of the YouTube video + * @returns {string} ID of the YouTube video + */ + parseId(url){ + const regex = /^.*(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/|shorts\/)|(?:(?:watch)?\?v(?:i)?=|&v(?:i)?=))([^#&?]*).*/; + const match = url.match(regex); + return (match?.length && match[1] ? match[1] : url); + } + + /** + * Scrape transcript from a YouTube video. + * @returns {Promise} File attachments scraped from the YouTube video + */ + async scrape() { + let lang = ''; + const template = $(await renderExtensionTemplateAsync('attachments', 'youtube-scrape', {})); + const videoUrl = await callGenericPopup(template, POPUP_TYPE.INPUT, '', { wide: false, large: false, okButton: 'Scrape', cancelButton: 'Cancel', rows: 2 }); + + template.find('input[name="youtubeLanguageCode"]').on('input', function () { + lang = String($(this).val()).trim(); + }); + + if (!videoUrl) { + return; + } + + const id = this.parseId(String(videoUrl).trim()); + const toast = toastr.info('Working, please wait...'); + + const result = await fetch('/api/serpapi/transcript', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ id, lang }), + }); + + if (!result.ok) { + const error = await result.text(); + throw new Error(error); + } + + const transcript = await result.text(); + toastr.clear(toast); + + const file = new File([transcript], `YouTube - ${id} - ${Date.now()}.txt`, { type: 'text/plain' }); + return [file]; + } +} + ScraperManager.registerDataBankScraper(new FileScraper()); +ScraperManager.registerDataBankScraper(new Notepad()); ScraperManager.registerDataBankScraper(new WebScraper()); ScraperManager.registerDataBankScraper(new FandomScraper()); +ScraperManager.registerDataBankScraper(new YouTubeScraper()); diff --git a/public/scripts/utils.js b/public/scripts/utils.js index fd2508b8c..37f40ad66 100644 --- a/public/scripts/utils.js +++ b/public/scripts/utils.js @@ -1185,16 +1185,23 @@ export function uuidv4() { } function postProcessText(text, collapse = true) { + // Remove carriage returns + text = text.replace(/\r/g, ''); + // Replace tabs with spaces + text = text.replace(/\t/g, ' '); + // Normalize unicode spaces + text = text.replace(/\u00A0/g, ' '); // Collapse multiple newlines into one if (collapse) { text = collapseNewlines(text); // Trim leading and trailing whitespace, and remove empty lines text = text.split('\n').map(l => l.trim()).filter(Boolean).join('\n'); + } else { + // Replace more than 4 newlines with 4 newlines + text = text.replace(/\n{4,}/g, '\n\n\n\n'); + // Trim lines that contain nothing but whitespace + text = text.split('\n').map(l => /^\s+$/.test(l) ? '' : l).join('\n'); } - // Remove carriage returns - text = text.replace(/\r/g, ''); - // Normalize unicode spaces - text = text.replace(/\u00A0/g, ' '); // Collapse multiple spaces into one (except for newlines) text = text.replace(/ {2,}/g, ' '); // Remove leading and trailing spaces @@ -1348,6 +1355,47 @@ export async function extractTextFromEpub(blob) { return postProcessText(text.join('\n'), false); } +/** + * Extracts text from an Office document using the server plugin. + * @param {File} blob File to extract text from + * @returns {Promise} A promise that resolves to the extracted text. + */ +export async function extractTextFromOffice(blob) { + async function checkPluginAvailability() { + try { + const result = await fetch('/api/plugins/office/probe', { + method: 'POST', + headers: getRequestHeaders(), + }); + + return result.ok; + } catch (error) { + return false; + } + } + + const isPluginAvailable = await checkPluginAvailability(); + + if (!isPluginAvailable) { + throw new Error('Importing Office documents requires a server plugin. Please refer to the documentation for more information.'); + } + + const base64 = await getBase64Async(blob); + + const response = await fetch('/api/plugins/office/parse', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ data: base64 }), + }); + + if (!response.ok) { + throw new Error('Failed to parse the Office document'); + } + + const data = await response.text(); + return postProcessText(data, false); +} + /** * Sets a value in an object by a path. * @param {object} obj Object to set value in diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 06969e60b..2b28fd1b5 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -899,7 +899,7 @@ router.post('/generate', jsonParser, function (request, response) { } } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.PERPLEXITY) { apiUrl = API_PERPLEXITY; - apiKey = readSecret(SECRET_KEYS.PERPLEXITY); + apiKey = readSecret(request.user.directories, SECRET_KEYS.PERPLEXITY); headers = {}; bodyParams = {}; request.body.messages = postProcessPrompt(request.body.messages, 'claude', request.body.char_name, request.body.user_name); diff --git a/src/endpoints/serpapi.js b/src/endpoints/serpapi.js index faae11750..15d7d0e3c 100644 --- a/src/endpoints/serpapi.js +++ b/src/endpoints/serpapi.js @@ -48,6 +48,92 @@ router.post('/search', jsonParser, async (request, response) => { } }); +/** + * Get the transcript of a YouTube video + * @copyright https://github.com/Kakulukian/youtube-transcript (MIT License) + */ +router.post('/transcript', jsonParser, async (request, response) => { + try { + const he = require('he'); + const RE_XML_TRANSCRIPT = /([^<]*)<\/text>/g; + const id = request.body.id; + const lang = request.body.lang; + + if (!id) { + console.log('Id is required for /transcript'); + return response.sendStatus(400); + } + + const videoPageResponse = await fetch(`https://www.youtube.com/watch?v=${id}`, { + headers: { + ...(lang && { 'Accept-Language': lang }), + 'User-Agent': visitHeaders['User-Agent'], + }, + }); + + const videoPageBody = await videoPageResponse.text(); + const splittedHTML = videoPageBody.split('"captions":'); + + if (splittedHTML.length <= 1) { + if (videoPageBody.includes('class="g-recaptcha"')) { + throw new Error('Too many requests'); + } + if (!videoPageBody.includes('"playabilityStatus":')) { + throw new Error('Video is not available'); + } + throw new Error('Transcript not available'); + } + + const captions = (() => { + try { + return JSON.parse(splittedHTML[1].split(',"videoDetails')[0].replace('\n', '')); + } catch (e) { + return undefined; + } + })()?.['playerCaptionsTracklistRenderer']; + + if (!captions) { + throw new Error('Transcript disabled'); + } + + if (!('captionTracks' in captions)) { + throw new Error('Transcript not available'); + } + + if (lang && !captions.captionTracks.some(track => track.languageCode === lang)) { + throw new Error('Transcript not available in this language'); + } + + const transcriptURL = (lang ? captions.captionTracks.find(track => track.languageCode === lang) : captions.captionTracks[0]).baseUrl; + const transcriptResponse = await fetch(transcriptURL, { + headers: { + ...(lang && { 'Accept-Language': lang }), + 'User-Agent': visitHeaders['User-Agent'], + }, + }); + + if (!transcriptResponse.ok) { + throw new Error('Transcript request failed'); + } + + const transcriptBody = await transcriptResponse.text(); + const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)]; + const transcript = results.map((result) => ({ + text: result[3], + duration: parseFloat(result[2]), + offset: parseFloat(result[1]), + lang: lang ?? captions.captionTracks[0].languageCode, + })); + // The text is double-encoded + const transcriptText = transcript.map((line) => he.decode(he.decode(line.text))).join(' '); + + return response.send(transcriptText); + } catch (error) { + console.log(error); + return response.sendStatus(500); + } +}); + router.post('/visit', jsonParser, async (request, response) => { try { const url = request.body.url;