From 299749a4e79ccb310e62a10275b719093e1013b1 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 12 Dec 2023 01:08:47 +0200 Subject: [PATCH] Add prerequisites for websearch extension --- public/script.js | 16 ++++---------- public/scripts/chats.js | 19 +++++++++++++++++ public/scripts/extensions.js | 2 +- public/scripts/utils.js | 6 ++++-- src/endpoints/serpapi.js | 41 ++++++++++++++++++++++++++++++++++++ 5 files changed, 69 insertions(+), 15 deletions(-) diff --git a/public/script.js b/public/script.js index fe3513a2a..780e28dbd 100644 --- a/public/script.js +++ b/public/script.js @@ -190,7 +190,7 @@ import { getBackgrounds, initBackgrounds } from './scripts/backgrounds.js'; import { hideLoader, showLoader } from './scripts/loader.js'; import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay.js'; import { loadMancerModels } from './scripts/mancer-settings.js'; -import { getFileAttachment, hasPendingFileAttachment, populateFileAttachment } from './scripts/chats.js'; +import { appendFileContent, hasPendingFileAttachment, populateFileAttachment } from './scripts/chats.js'; import { replaceVariableMacros } from './scripts/variables.js'; //exporting functions and vars for mods @@ -3098,26 +3098,18 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu coreChat.pop(); } - coreChat = await Promise.all(coreChat.map(async (chatItem) => { + coreChat = await Promise.all(coreChat.map(async (chatItem, index) => { let message = chatItem.mes; let regexType = chatItem.is_user ? regex_placement.USER_INPUT : regex_placement.AI_OUTPUT; let options = { isPrompt: true }; let regexedMessage = getRegexedString(message, regexType, options); - - if (chatItem.extra?.file) { - const fileText = chatItem.extra.file.text || (await getFileAttachment(chatItem.extra.file.url)); - - if (fileText) { - const fileWrapped = `\`\`\`\n${fileText}\n\`\`\`\n\n`; - chatItem.extra.fileLength = fileWrapped.length; - regexedMessage = fileWrapped + regexedMessage; - } - } + regexedMessage = await appendFileContent(chatItem, regexedMessage); return { ...chatItem, mes: regexedMessage, + index, }; })); diff --git a/public/scripts/chats.js b/public/scripts/chats.js index cd8a8677e..176d4d6a0 100644 --- a/public/scripts/chats.js +++ b/public/scripts/chats.js @@ -341,6 +341,25 @@ function embedMessageFile(messageId, messageBlock) { } } +/** + * Appends file content to the message text. + * @param {object} message Message object + * @param {string} messageText Message text + * @returns {Promise} Message text with file content appended. + */ +export async function appendFileContent(message, messageText) { + if (message.extra?.file) { + const fileText = message.extra.file.text || (await getFileAttachment(message.extra.file.url)); + + if (fileText) { + const fileWrapped = `\`\`\`\n${fileText}\n\`\`\`\n\n`; + message.extra.fileLength = fileWrapped.length; + messageText = fileWrapped + messageText; + } + } + return messageText; +} + jQuery(function () { $(document).on('click', '.mes_hide', async function () { const messageBlock = $(this).closest('.mes'); diff --git a/public/scripts/extensions.js b/public/scripts/extensions.js index 14a5f286e..3ce240060 100644 --- a/public/scripts/extensions.js +++ b/public/scripts/extensions.js @@ -879,7 +879,7 @@ async function runGenerationInterceptors(chat, contextSize) { exitImmediately = immediately; }; - for (const manifest of Object.values(manifests)) { + for (const manifest of Object.values(manifests).sort((a, b) => a.loading_order - b.loading_order)) { const interceptorKey = manifest.generate_interceptor; if (typeof window[interceptorKey] === 'function') { try { diff --git a/public/scripts/utils.js b/public/scripts/utils.js index 7699263ba..6ea0508d3 100644 --- a/public/scripts/utils.js +++ b/public/scripts/utils.js @@ -1143,11 +1143,13 @@ export async function extractTextFromPDF(blob) { * @param {Blob} blob HTML content blob * @returns {Promise} A promise that resolves to the parsed text. */ -export async function extractTextFromHTML(blob) { +export async function extractTextFromHTML(blob, textSelector = 'body') { const html = await blob.text(); const domParser = new DOMParser(); const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html'); - const text = postProcessText(document.body.textContent); + const elements = document.querySelectorAll(textSelector); + const rawText = Array.from(elements).map(e => e.textContent).join('\n'); + const text = postProcessText(rawText); return text; } diff --git a/src/endpoints/serpapi.js b/src/endpoints/serpapi.js index e41cb543e..3ba6a134d 100644 --- a/src/endpoints/serpapi.js +++ b/src/endpoints/serpapi.js @@ -5,6 +5,23 @@ const { jsonParser } = require('../express-common'); const router = express.Router(); +// Cosplay as Firefox +const visitHeaders = { + 'Accept': '*/*', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:120.0) Gecko/20100101 Firefox/120.0', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip, deflate, br', + 'Connection': 'keep-alive', + 'Cache-Control': 'no-cache', + 'Pragma': 'no-cache', + 'TE': 'trailers', + 'DNT': '1', + 'Sec-Fetch-Dest': 'document', + 'Sec-Fetch-Mode': 'navigate', + 'Sec-Fetch-Site': 'none', + 'Sec-Fetch-User': '?1', +}; + router.post('/search', jsonParser, async (request, response) => { try { const key = readSecret(SECRET_KEYS.SERPAPI); @@ -31,4 +48,28 @@ router.post('/search', jsonParser, async (request, response) => { } }); +router.post('/visit', jsonParser, async (request, response) => { + try { + const url = request.body.url; + + if (!url) { + console.log('No url provided for /visit'); + return response.sendStatus(400); + } + + const result = await fetch(url, { headers: visitHeaders }); + + if (!result.ok) { + console.log(`Visit failed ${result.status} ${result.statusText}`); + return response.sendStatus(500); + } + + const text = await result.text(); + return response.send(text); + } catch (error) { + console.log(error); + return response.sendStatus(500); + } +}); + module.exports = { router };