2024-04-10 00:00:12 +02:00
import {
eventSource ,
event _types ,
extension _prompt _types ,
2024-04-18 22:57:27 +03:00
extension _prompt _roles ,
2024-04-10 00:00:12 +02:00
getCurrentChatId ,
getRequestHeaders ,
is _send _press ,
saveSettingsDebounced ,
setExtensionPrompt ,
substituteParams ,
generateRaw ,
2024-06-15 01:40:16 +03:00
substituteParamsExtended ,
2024-04-10 00:00:12 +02:00
} from '../../../script.js' ;
2024-04-16 22:53:58 +02:00
import {
ModuleWorkerWrapper ,
extension _settings ,
getContext ,
modules ,
renderExtensionTemplateAsync ,
doExtrasFetch , getApiUrl ,
} from '../../extensions.js' ;
2024-06-22 17:41:02 +03:00
import { collapseNewlines , registerDebugFunction } from '../../power-user.js' ;
2024-03-12 20:10:25 +01:00
import { SECRET _KEYS , secret _state , writeSecret } from '../../secrets.js' ;
2024-05-30 14:49:57 +03:00
import { getDataBankAttachments , getDataBankAttachmentsForSource , getFileAttachment } from '../../chats.js' ;
2024-06-06 21:45:47 +03:00
import { debounce , getStringHash as calculateHash , waitUntilCondition , onlyUnique , splitRecursive , trimToStartSentence , trimToEndSentence } from '../../utils.js' ;
2024-04-28 19:47:53 +03:00
import { debounce _timeout } from '../../constants.js' ;
2024-04-23 03:09:52 +03:00
import { getSortedEntries } from '../../world-info.js' ;
2024-05-28 22:54:50 +03:00
import { textgen _types , textgenerationwebui _settings } from '../../textgen-settings.js' ;
2024-05-30 14:49:57 +03:00
import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js' ;
import { SlashCommand } from '../../slash-commands/SlashCommand.js' ;
import { ARGUMENT _TYPE , SlashCommandArgument , SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js' ;
2024-08-13 20:50:41 +03:00
import { callGenericPopup , POPUP _RESULT , POPUP _TYPE } from '../../popup.js' ;
2024-08-13 19:34:11 +03:00
import { generateWebLlmChatPrompt , isWebLlmSupported } from '../shared.js' ;
/ * *
* @ typedef { object } HashedMessage
* @ property { string } text - The hashed message text
* /
2023-09-08 00:28:06 +03:00
const MODULE _NAME = 'vectors' ;
2023-09-08 01:26:26 +03:00
export const EXTENSION _PROMPT _TAG = '3_vectors' ;
2024-04-17 02:09:22 +03:00
export const EXTENSION _PROMPT _TAG _DB = '4_vectors_data_bank' ;
2023-09-08 00:28:06 +03:00
const settings = {
2023-11-30 00:01:59 +02:00
// For both
2023-09-17 14:09:24 +03:00
source : 'transformers' ,
2023-12-11 22:47:26 +02:00
include _wi : false ,
2024-03-02 23:16:18 +02:00
togetherai _model : 'togethercomputer/m2-bert-80M-32k-retrieval' ,
2024-03-12 21:41:30 +02:00
openai _model : 'text-embedding-ada-002' ,
2024-04-19 00:07:12 +03:00
cohere _model : 'embed-english-v3.0' ,
2024-05-28 22:54:50 +03:00
ollama _model : 'mxbai-embed-large' ,
ollama _keep : false ,
2024-06-09 01:03:22 +03:00
vllm _model : '' ,
2024-04-10 00:13:27 +02:00
summarize : false ,
2024-04-16 22:53:58 +02:00
summarize _sent : false ,
2024-04-16 21:31:51 +02:00
summary _source : 'main' ,
2024-09-22 12:58:46 +03:00
summary _prompt : 'Ignore previous instructions. Summarize the most important parts of the message. Limit yourself to 250 words or less. Your response should include nothing but the summary.' ,
2024-06-16 02:16:27 +03:00
force _chunk _delimiter : '' ,
2023-11-30 00:01:59 +02:00
// For chats
enabled _chats : false ,
2024-04-17 02:09:22 +03:00
template : 'Past events:\n{{text}}' ,
2023-09-09 21:26:04 +03:00
depth : 2 ,
position : extension _prompt _types . IN _PROMPT ,
protect : 5 ,
insert : 3 ,
query : 2 ,
2023-12-31 04:00:04 +02:00
message _chunk _size : 400 ,
2024-05-23 17:28:43 +03:00
score _threshold : 0.25 ,
2023-11-30 00:01:59 +02:00
// For files
enabled _files : false ,
2024-04-21 03:24:01 +03:00
translate _files : false ,
2023-12-01 03:54:28 +02:00
size _threshold : 10 ,
chunk _size : 5000 ,
chunk _count : 2 ,
2024-06-06 21:45:47 +03:00
overlap _percent : 0 ,
2024-04-17 02:09:22 +03:00
// For Data Bank
size _threshold _db : 5 ,
chunk _size _db : 2500 ,
chunk _count _db : 5 ,
2024-06-06 21:45:47 +03:00
overlap _percent _db : 0 ,
2024-04-17 02:09:22 +03:00
file _template _db : 'Related information:\n{{text}}' ,
file _position _db : extension _prompt _types . IN _PROMPT ,
file _depth _db : 4 ,
file _depth _role _db : extension _prompt _roles . SYSTEM ,
2024-04-23 03:09:52 +03:00
// For World Info
enabled _world _info : false ,
enabled _for _all : false ,
max _entries : 5 ,
2023-09-08 00:28:06 +03:00
} ;
const moduleWorker = new ModuleWorkerWrapper ( synchronizeChat ) ;
2024-04-19 00:16:23 +03:00
/ * *
* Gets the Collection ID for a file embedded in the chat .
* @ param { string } fileUrl URL of the file
* @ returns { string } Collection ID
* /
function getFileCollectionId ( fileUrl ) {
return ` file_ ${ getStringHash ( fileUrl ) } ` ;
}
2023-09-08 15:25:10 +03:00
async function onVectorizeAllClick ( ) {
try {
2023-11-30 00:01:59 +02:00
if ( ! settings . enabled _chats ) {
2023-09-08 15:25:10 +03:00
return ;
}
const chatId = getCurrentChatId ( ) ;
2023-09-10 19:47:41 +03:00
if ( ! chatId ) {
toastr . info ( 'No chat selected' , 'Vectorization aborted' ) ;
return ;
}
2023-09-08 15:25:10 +03:00
const batchSize = 5 ;
const elapsedLog = [ ] ;
let finished = false ;
$ ( '#vectorize_progress' ) . show ( ) ;
$ ( '#vectorize_progress_percent' ) . text ( '0' ) ;
$ ( '#vectorize_progress_eta' ) . text ( '...' ) ;
while ( ! finished ) {
2023-09-10 00:15:02 +03:00
if ( is _send _press ) {
toastr . info ( 'Message generation is in progress.' , 'Vectorization aborted' ) ;
throw new Error ( 'Message generation is in progress.' ) ;
}
2023-09-08 15:25:10 +03:00
const startTime = Date . now ( ) ;
const remaining = await synchronizeChat ( batchSize ) ;
const elapsed = Date . now ( ) - startTime ;
elapsedLog . push ( elapsed ) ;
finished = remaining <= 0 ;
const total = getContext ( ) . chat . length ;
const processed = total - remaining ;
const processedPercent = Math . round ( ( processed / total ) * 100 ) ; // percentage of the work done
2023-09-10 00:15:02 +03:00
const lastElapsed = elapsedLog . slice ( - 5 ) ; // last 5 elapsed times
const averageElapsed = lastElapsed . reduce ( ( a , b ) => a + b , 0 ) / lastElapsed . length ; // average time needed to process one item
2023-09-08 15:25:10 +03:00
const pace = averageElapsed / batchSize ; // time needed to process one item
const remainingTime = Math . round ( pace * remaining / 1000 ) ;
$ ( '#vectorize_progress_percent' ) . text ( processedPercent ) ;
$ ( '#vectorize_progress_eta' ) . text ( remainingTime ) ;
if ( chatId !== getCurrentChatId ( ) ) {
throw new Error ( 'Chat changed' ) ;
}
}
} catch ( error ) {
console . error ( 'Vectors: Failed to vectorize all' , error ) ;
} finally {
$ ( '#vectorize_progress' ) . hide ( ) ;
}
}
2023-09-10 00:15:02 +03:00
let syncBlocked = false ;
2024-06-16 02:16:27 +03:00
/ * *
* Gets the chunk delimiters for splitting text .
* @ returns { string [ ] } Array of chunk delimiters
* /
function getChunkDelimiters ( ) {
const delimiters = [ '\n\n' , '\n' , ' ' , '' ] ;
if ( settings . force _chunk _delimiter ) {
delimiters . unshift ( settings . force _chunk _delimiter ) ;
}
return delimiters ;
}
2023-12-31 04:00:04 +02:00
/ * *
* Splits messages into chunks before inserting them into the vector index .
* @ param { object [ ] } items Array of vector items
* @ returns { object [ ] } Array of vector items ( possibly chunked )
* /
function splitByChunks ( items ) {
if ( settings . message _chunk _size <= 0 ) {
return items ;
}
const chunkedItems = [ ] ;
for ( const item of items ) {
2024-06-16 02:16:27 +03:00
const chunks = splitRecursive ( item . text , settings . message _chunk _size , getChunkDelimiters ( ) ) ;
2023-12-31 04:00:04 +02:00
for ( const chunk of chunks ) {
const chunkedItem = { ... item , text : chunk } ;
chunkedItems . push ( chunkedItem ) ;
}
}
return chunkedItems ;
}
2024-08-13 19:34:11 +03:00
/ * *
* Summarizes messages using the Extras API method .
* @ param { HashedMessage [ ] } hashedMessages Array of hashed messages
* @ returns { Promise < HashedMessage [ ] > } Summarized messages
* /
2024-04-16 21:31:51 +02:00
async function summarizeExtra ( hashedMessages ) {
for ( const element of hashedMessages ) {
try {
const url = new URL ( getApiUrl ( ) ) ;
url . pathname = '/api/summarize' ;
const apiResult = await doExtrasFetch ( url , {
method : 'POST' ,
headers : {
'Content-Type' : 'application/json' ,
'Bypass-Tunnel-Reminder' : 'bypass' ,
} ,
body : JSON . stringify ( {
text : element . text ,
params : { } ,
} ) ,
} ) ;
if ( apiResult . ok ) {
const data = await apiResult . json ( ) ;
element . text = data . summary ;
}
}
catch ( error ) {
console . log ( error ) ;
}
}
return hashedMessages ;
}
2024-08-13 19:34:11 +03:00
/ * *
* Summarizes messages using the main API method .
* @ param { HashedMessage [ ] } hashedMessages Array of hashed messages
* @ returns { Promise < HashedMessage [ ] > } Summarized messages
* /
2024-04-16 21:31:51 +02:00
async function summarizeMain ( hashedMessages ) {
for ( const element of hashedMessages ) {
2024-04-16 22:53:58 +02:00
element . text = await generateRaw ( element . text , '' , false , false , settings . summary _prompt ) ;
2024-04-16 21:31:51 +02:00
}
return hashedMessages ;
}
2024-08-13 19:34:11 +03:00
/ * *
* Summarizes messages using WebLLM .
* @ param { HashedMessage [ ] } hashedMessages Array of hashed messages
* @ returns { Promise < HashedMessage [ ] > } Summarized messages
* /
async function summarizeWebLLM ( hashedMessages ) {
if ( ! isWebLlmSupported ( ) ) {
console . warn ( 'Vectors: WebLLM is not supported' ) ;
return hashedMessages ;
}
for ( const element of hashedMessages ) {
const messages = [ { role : 'system' , content : settings . summary _prompt } , { role : 'user' , content : element . text } ] ;
element . text = await generateWebLlmChatPrompt ( messages ) ;
}
return hashedMessages ;
}
/ * *
* Summarizes messages using the chosen method .
* @ param { HashedMessage [ ] } hashedMessages Array of hashed messages
* @ param { string } endpoint Type of endpoint to use
* @ returns { Promise < HashedMessage [ ] > } Summarized messages
* /
2024-04-16 21:31:51 +02:00
async function summarize ( hashedMessages , endpoint = 'main' ) {
switch ( endpoint ) {
case 'main' :
return await summarizeMain ( hashedMessages ) ;
case 'extras' :
return await summarizeExtra ( hashedMessages ) ;
2024-08-13 19:34:11 +03:00
case 'webllm' :
return await summarizeWebLLM ( hashedMessages ) ;
2024-04-16 21:31:51 +02:00
default :
console . error ( 'Unsupported endpoint' , endpoint ) ;
}
}
2023-09-09 15:12:54 +03:00
async function synchronizeChat ( batchSize = 5 ) {
2023-11-30 00:01:59 +02:00
if ( ! settings . enabled _chats ) {
2023-09-10 20:21:23 +03:00
return - 1 ;
}
2023-09-10 00:15:02 +03:00
try {
2023-09-10 20:21:23 +03:00
await waitUntilCondition ( ( ) => ! syncBlocked && ! is _send _press , 1000 ) ;
2023-09-10 00:15:02 +03:00
} catch {
console . log ( 'Vectors: Synchronization blocked by another process' ) ;
return - 1 ;
}
2023-09-08 00:28:06 +03:00
try {
2023-09-10 00:15:02 +03:00
syncBlocked = true ;
2023-09-08 00:28:06 +03:00
const context = getContext ( ) ;
const chatId = getCurrentChatId ( ) ;
if ( ! chatId || ! Array . isArray ( context . chat ) ) {
console . debug ( 'Vectors: No chat selected' ) ;
2023-09-10 19:47:41 +03:00
return - 1 ;
2023-09-08 00:28:06 +03:00
}
2024-04-16 21:31:51 +02:00
let hashedMessages = context . chat . filter ( x => ! x . is _system ) . map ( x => ( { text : String ( substituteParams ( x . mes ) ) , hash : getStringHash ( substituteParams ( x . mes ) ) , index : context . chat . indexOf ( x ) } ) ) ;
2023-09-08 00:28:06 +03:00
const hashesInCollection = await getSavedHashes ( chatId ) ;
2024-04-16 21:31:51 +02:00
if ( settings . summarize ) {
hashedMessages = await summarize ( hashedMessages , settings . summary _source ) ;
}
2023-09-08 00:28:06 +03:00
const newVectorItems = hashedMessages . filter ( x => ! hashesInCollection . includes ( x . hash ) ) ;
const deletedHashes = hashesInCollection . filter ( x => ! hashedMessages . some ( y => y . hash === x ) ) ;
2024-04-10 00:00:12 +02:00
2023-09-08 00:28:06 +03:00
if ( newVectorItems . length > 0 ) {
2023-12-31 04:00:04 +02:00
const chunkedBatch = splitByChunks ( newVectorItems . slice ( 0 , batchSize ) ) ;
2024-04-16 21:31:51 +02:00
2023-09-10 00:15:02 +03:00
console . log ( ` Vectors: Found ${ newVectorItems . length } new items. Processing ${ batchSize } ... ` ) ;
2023-12-31 04:00:04 +02:00
await insertVectorItems ( chatId , chunkedBatch ) ;
2023-09-08 00:28:06 +03:00
}
if ( deletedHashes . length > 0 ) {
await deleteVectorItems ( chatId , deletedHashes ) ;
console . log ( ` Vectors: Deleted ${ deletedHashes . length } old hashes ` ) ;
}
2023-09-08 13:57:27 +03:00
return newVectorItems . length - batchSize ;
2023-09-08 00:28:06 +03:00
} catch ( error ) {
2024-01-24 16:51:57 +02:00
/ * *
* Gets the error message for a given cause
* @ param { string } cause Error cause key
* @ returns { string } Error message
* /
function getErrorMessage ( cause ) {
switch ( cause ) {
case 'api_key_missing' :
return 'API key missing. Save it in the "API Connections" panel.' ;
2024-05-28 22:54:50 +03:00
case 'api_url_missing' :
return 'API URL missing. Save it in the "API Connections" panel.' ;
case 'api_model_missing' :
return 'Vectorization Source Model is required, but not set.' ;
2024-01-24 16:51:57 +02:00
case 'extras_module_missing' :
return 'Extras API must provide an "embeddings" module.' ;
default :
return 'Check server console for more details' ;
}
}
2023-09-08 00:28:06 +03:00
console . error ( 'Vectors: Failed to synchronize chat' , error ) ;
2024-01-24 16:51:57 +02:00
const message = getErrorMessage ( error . cause ) ;
2024-04-30 00:17:39 +03:00
toastr . error ( message , 'Vectorization failed' , { preventDuplicates : true } ) ;
2023-09-09 21:36:04 +03:00
return - 1 ;
2023-09-10 00:15:02 +03:00
} finally {
syncBlocked = false ;
2023-09-08 00:28:06 +03:00
}
}
2024-04-23 16:15:54 +03:00
/ * *
* @ type { Map < string , number > } Cache object for storing hash values
* /
const hashCache = new Map ( ) ;
2023-09-08 00:28:06 +03:00
/ * *
* Gets the hash value for a given string
* @ param { string } str Input string
* @ returns { number } Hash value
* /
function getStringHash ( str ) {
2023-09-08 13:57:27 +03:00
// Check if the hash is already in the cache
2024-04-23 16:15:54 +03:00
if ( hashCache . has ( str ) ) {
return hashCache . get ( str ) ;
2023-09-08 13:57:27 +03:00
}
2023-09-08 00:28:06 +03:00
2023-09-08 13:57:27 +03:00
// Calculate the hash value
const hash = calculateHash ( str ) ;
2023-09-08 00:28:06 +03:00
2023-09-08 13:57:27 +03:00
// Store the hash in the cache
2024-04-23 16:15:54 +03:00
hashCache . set ( str , hash ) ;
2023-09-08 00:28:06 +03:00
2023-09-08 13:57:27 +03:00
return hash ;
2023-09-08 00:28:06 +03:00
}
2023-11-30 00:01:59 +02:00
/ * *
* Retrieves files from the chat and inserts them into the vector index .
* @ param { object [ ] } chat Array of chat messages
* @ returns { Promise < void > }
* /
async function processFiles ( chat ) {
try {
if ( ! settings . enabled _files ) {
return ;
}
2024-05-30 14:49:57 +03:00
const dataBankCollectionIds = await ingestDataBankAttachments ( ) ;
2024-04-17 02:09:22 +03:00
if ( dataBankCollectionIds . length ) {
2024-08-16 20:49:14 +03:00
const queryText = await getQueryText ( chat , 'file' ) ;
2024-04-17 02:09:22 +03:00
await injectDataBankChunks ( queryText , dataBankCollectionIds ) ;
}
2023-11-30 00:01:59 +02:00
for ( const message of chat ) {
// Message has no file
if ( ! message ? . extra ? . file ) {
continue ;
}
// Trim file inserted by the script
2023-12-01 03:54:28 +02:00
const fileText = String ( message . mes )
2024-04-16 22:28:10 +03:00
. substring ( 0 , message . extra . fileLength ) . trim ( ) ;
2023-11-30 00:01:59 +02:00
// Convert kilobytes to string length
const thresholdLength = settings . size _threshold * 1024 ;
// File is too small
if ( fileText . length < thresholdLength ) {
continue ;
}
2023-12-01 03:54:28 +02:00
message . mes = message . mes . substring ( message . extra . fileLength ) ;
2023-11-30 00:01:59 +02:00
const fileName = message . extra . file . name ;
2024-04-18 23:07:16 +03:00
const fileUrl = message . extra . file . url ;
2024-04-19 00:16:23 +03:00
const collectionId = getFileCollectionId ( fileUrl ) ;
2023-11-30 00:01:59 +02:00
const hashesInCollection = await getSavedHashes ( collectionId ) ;
// File is already in the collection
if ( ! hashesInCollection . length ) {
2024-06-06 21:45:47 +03:00
await vectorizeFile ( fileText , fileName , collectionId , settings . chunk _size , settings . overlap _percent ) ;
2023-11-30 00:01:59 +02:00
}
2024-08-16 20:49:14 +03:00
const queryText = await getQueryText ( chat , 'file' ) ;
2023-11-30 00:01:59 +02:00
const fileChunks = await retrieveFileChunks ( queryText , collectionId ) ;
2024-04-16 22:28:10 +03:00
message . mes = ` ${ fileChunks } \n \n ${ message . mes } ` ;
2023-11-30 00:01:59 +02:00
}
} catch ( error ) {
console . error ( 'Vectors: Failed to retrieve files' , error ) ;
}
}
2024-05-30 14:49:57 +03:00
/ * *
* Ensures that data bank attachments are ingested and inserted into the vector index .
* @ param { string } [ source ] Optional source filter for data bank attachments .
* @ returns { Promise < string [ ] > } Collection IDs
* /
async function ingestDataBankAttachments ( source ) {
// Exclude disabled files
const dataBank = source ? getDataBankAttachmentsForSource ( source , false ) : getDataBankAttachments ( false ) ;
const dataBankCollectionIds = [ ] ;
for ( const file of dataBank ) {
const collectionId = getFileCollectionId ( file . url ) ;
const hashesInCollection = await getSavedHashes ( collectionId ) ;
dataBankCollectionIds . push ( collectionId ) ;
// File is already in the collection
if ( hashesInCollection . length ) {
continue ;
}
// Download and process the file
2024-10-03 22:13:37 +03:00
const fileText = await getFileAttachment ( file . url ) ;
2024-05-30 14:49:57 +03:00
console . log ( ` Vectors: Retrieved file ${ file . name } from Data Bank ` ) ;
// Convert kilobytes to string length
const thresholdLength = settings . size _threshold _db * 1024 ;
// Use chunk size from settings if file is larger than threshold
const chunkSize = file . size > thresholdLength ? settings . chunk _size _db : - 1 ;
2024-10-03 22:13:37 +03:00
await vectorizeFile ( fileText , file . name , collectionId , chunkSize , settings . overlap _percent _db ) ;
2024-05-30 14:49:57 +03:00
}
return dataBankCollectionIds ;
}
2024-04-17 02:09:22 +03:00
/ * *
* Inserts file chunks from the Data Bank into the prompt .
* @ param { string } queryText Text to query
* @ param { string [ ] } collectionIds File collection IDs
* @ returns { Promise < void > }
* /
async function injectDataBankChunks ( queryText , collectionIds ) {
try {
2024-05-30 14:49:57 +03:00
const queryResults = await queryMultipleCollections ( collectionIds , queryText , settings . chunk _count _db , settings . score _threshold ) ;
2024-04-17 02:09:22 +03:00
console . debug ( ` Vectors: Retrieved ${ collectionIds . length } Data Bank collections ` , queryResults ) ;
let textResult = '' ;
for ( const collectionId in queryResults ) {
console . debug ( ` Vectors: Processing Data Bank collection ${ collectionId } ` , queryResults [ collectionId ] ) ;
const metadata = queryResults [ collectionId ] . metadata ? . filter ( x => x . text ) ? . sort ( ( a , b ) => a . index - b . index ) ? . map ( x => x . text ) ? . filter ( onlyUnique ) || [ ] ;
textResult += metadata . join ( '\n' ) + '\n\n' ;
}
if ( ! textResult ) {
console . debug ( 'Vectors: No Data Bank chunks found' ) ;
return ;
}
2024-06-15 01:40:16 +03:00
const insertedText = substituteParamsExtended ( settings . file _template _db , { text : textResult } ) ;
2024-04-17 02:09:22 +03:00
setExtensionPrompt ( EXTENSION _PROMPT _TAG _DB , insertedText , settings . file _position _db , settings . file _depth _db , settings . include _wi , settings . file _depth _role _db ) ;
} catch ( error ) {
console . error ( 'Vectors: Failed to insert Data Bank chunks' , error ) ;
}
}
2023-11-30 00:01:59 +02:00
/ * *
* Retrieves file chunks from the vector index and inserts them into the chat .
* @ param { string } queryText Text to query
* @ param { string } collectionId File collection ID
* @ returns { Promise < string > } Retrieved file text
* /
async function retrieveFileChunks ( queryText , collectionId ) {
console . debug ( ` Vectors: Retrieving file chunks for collection ${ collectionId } ` , queryText ) ;
const queryResults = await queryCollection ( collectionId , queryText , settings . chunk _count ) ;
console . debug ( ` Vectors: Retrieved ${ queryResults . hashes . length } file chunks for collection ${ collectionId } ` , queryResults ) ;
2024-01-26 04:18:15 +02:00
const metadata = queryResults . metadata . filter ( x => x . text ) . sort ( ( a , b ) => a . index - b . index ) . map ( x => x . text ) . filter ( onlyUnique ) ;
2023-11-30 00:01:59 +02:00
const fileText = metadata . join ( '\n' ) ;
return fileText ;
}
/ * *
* Vectorizes a file and inserts it into the vector index .
* @ param { string } fileText File text
* @ param { string } fileName File name
* @ param { string } collectionId File collection ID
2024-04-17 02:09:22 +03:00
* @ param { number } chunkSize Chunk size
2024-06-06 21:45:47 +03:00
* @ param { number } overlapPercent Overlap size ( in % )
2024-04-30 00:17:39 +03:00
* @ returns { Promise < boolean > } True if successful , false if not
2023-11-30 00:01:59 +02:00
* /
2024-06-06 21:45:47 +03:00
async function vectorizeFile ( fileText , fileName , collectionId , chunkSize , overlapPercent ) {
2023-11-30 00:01:59 +02:00
try {
2024-04-21 03:24:01 +03:00
if ( settings . translate _files && typeof window [ 'translate' ] === 'function' ) {
console . log ( ` Vectors: Translating file ${ fileName } to English... ` ) ;
const translatedText = await window [ 'translate' ] ( fileText , 'en' ) ;
fileText = translatedText ;
}
2024-04-18 22:59:42 +03:00
const toast = toastr . info ( 'Vectorization may take some time, please wait...' , ` Ingesting file ${ fileName } ` ) ;
2024-06-06 21:45:47 +03:00
const overlapSize = Math . round ( chunkSize * overlapPercent / 100 ) ;
2024-06-16 02:16:27 +03:00
const delimiters = getChunkDelimiters ( ) ;
2024-06-06 21:45:47 +03:00
// Overlap should not be included in chunk size. It will be later compensated by overlapChunks
chunkSize = overlapSize > 0 ? ( chunkSize - overlapSize ) : chunkSize ;
2024-06-16 02:16:27 +03:00
const chunks = splitRecursive ( fileText , chunkSize , delimiters ) . map ( ( x , y , z ) => overlapSize > 0 ? overlapChunks ( x , y , z , overlapSize ) : x ) ;
2024-06-06 21:45:47 +03:00
console . debug ( ` Vectors: Split file ${ fileName } into ${ chunks . length } chunks with ${ overlapPercent } % overlap ` , chunks ) ;
2023-11-30 00:01:59 +02:00
const items = chunks . map ( ( chunk , index ) => ( { hash : getStringHash ( chunk ) , text : chunk , index : index } ) ) ;
await insertVectorItems ( collectionId , items ) ;
2024-04-18 22:59:42 +03:00
toastr . clear ( toast ) ;
2023-11-30 00:01:59 +02:00
console . log ( ` Vectors: Inserted ${ chunks . length } vector items for file ${ fileName } into ${ collectionId } ` ) ;
2024-04-30 00:17:39 +03:00
return true ;
2023-11-30 00:01:59 +02:00
} catch ( error ) {
2024-04-30 00:17:39 +03:00
toastr . error ( String ( error ) , 'Failed to vectorize file' , { preventDuplicates : true } ) ;
2023-11-30 00:01:59 +02:00
console . error ( 'Vectors: Failed to vectorize file' , error ) ;
2024-04-30 00:17:39 +03:00
return false ;
2023-11-30 00:01:59 +02:00
}
}
2023-09-08 00:28:06 +03:00
/ * *
2023-09-08 01:26:26 +03:00
* Removes the most relevant messages from the chat and displays them in the extension prompt
2023-09-08 00:28:06 +03:00
* @ param { object [ ] } chat Array of chat messages
* /
async function rearrangeChat ( chat ) {
try {
2023-09-08 13:57:27 +03:00
// Clear the extension prompt
2024-04-17 02:09:22 +03:00
setExtensionPrompt ( EXTENSION _PROMPT _TAG , '' , settings . position , settings . depth , settings . include _wi ) ;
setExtensionPrompt ( EXTENSION _PROMPT _TAG _DB , '' , settings . file _position _db , settings . file _depth _db , settings . include _wi , settings . file _depth _role _db ) ;
2023-09-08 13:57:27 +03:00
2023-11-30 00:01:59 +02:00
if ( settings . enabled _files ) {
await processFiles ( chat ) ;
}
2024-04-23 03:09:52 +03:00
if ( settings . enabled _world _info ) {
await activateWorldInfo ( chat ) ;
}
2023-11-30 00:01:59 +02:00
if ( ! settings . enabled _chats ) {
2023-09-08 00:28:06 +03:00
return ;
}
const chatId = getCurrentChatId ( ) ;
if ( ! chatId || ! Array . isArray ( chat ) ) {
console . debug ( 'Vectors: No chat selected' ) ;
return ;
}
2023-09-09 21:26:04 +03:00
if ( chat . length < settings . protect ) {
console . debug ( ` Vectors: Not enough messages to rearrange (less than ${ settings . protect } ) ` ) ;
2023-09-08 00:28:06 +03:00
return ;
}
2024-08-16 20:49:14 +03:00
const queryText = await getQueryText ( chat , 'chat' ) ;
2023-09-08 00:28:06 +03:00
if ( queryText . length === 0 ) {
console . debug ( 'Vectors: No text to query' ) ;
return ;
}
2023-09-08 01:26:26 +03:00
// Get the most relevant messages, excluding the last few
2024-02-24 15:56:47 +02:00
const queryResults = await queryCollection ( chatId , queryText , settings . insert ) ;
2023-11-30 00:01:59 +02:00
const queryHashes = queryResults . hashes . filter ( onlyUnique ) ;
2023-09-08 00:28:06 +03:00
const queriedMessages = [ ] ;
2023-09-19 17:12:22 +03:00
const insertedHashes = new Set ( ) ;
2023-09-09 21:26:04 +03:00
const retainMessages = chat . slice ( - settings . protect ) ;
2023-09-08 00:28:06 +03:00
for ( const message of chat ) {
2023-09-19 17:12:22 +03:00
if ( retainMessages . includes ( message ) || ! message . mes ) {
2023-09-08 00:28:06 +03:00
continue ;
}
2024-04-16 21:31:51 +02:00
const hash = getStringHash ( substituteParams ( message . mes ) ) ;
2023-09-19 17:12:22 +03:00
if ( queryHashes . includes ( hash ) && ! insertedHashes . has ( hash ) ) {
2023-09-08 00:28:06 +03:00
queriedMessages . push ( message ) ;
2023-09-19 17:12:22 +03:00
insertedHashes . add ( hash ) ;
2023-09-08 00:28:06 +03:00
}
}
// Rearrange queried messages to match query order
// Order is reversed because more relevant are at the lower indices
2024-04-16 21:31:51 +02:00
queriedMessages . sort ( ( a , b ) => queryHashes . indexOf ( getStringHash ( substituteParams ( b . mes ) ) ) - queryHashes . indexOf ( getStringHash ( substituteParams ( a . mes ) ) ) ) ;
2023-09-08 00:28:06 +03:00
2023-09-08 01:26:26 +03:00
// Remove queried messages from the original chat array
for ( const message of chat ) {
if ( queriedMessages . includes ( message ) ) {
chat . splice ( chat . indexOf ( message ) , 1 ) ;
}
2023-09-08 00:28:06 +03:00
}
2023-09-08 13:57:27 +03:00
if ( queriedMessages . length === 0 ) {
console . debug ( 'Vectors: No relevant messages found' ) ;
return ;
}
2023-09-08 01:26:26 +03:00
// Format queried messages into a single string
2023-09-09 21:26:04 +03:00
const insertedText = getPromptText ( queriedMessages ) ;
2023-12-11 22:47:26 +02:00
setExtensionPrompt ( EXTENSION _PROMPT _TAG , insertedText , settings . position , settings . depth , settings . include _wi ) ;
2023-09-08 00:28:06 +03:00
} catch ( error ) {
2024-04-30 00:17:39 +03:00
toastr . error ( 'Generation interceptor aborted. Check browser console for more details.' , 'Vector Storage' ) ;
2023-09-08 00:28:06 +03:00
console . error ( 'Vectors: Failed to rearrange chat' , error ) ;
}
}
2023-09-09 21:26:04 +03:00
/ * *
* @ param { any [ ] } queriedMessages
* @ returns { string }
* /
function getPromptText ( queriedMessages ) {
const queriedText = queriedMessages . map ( x => collapseNewlines ( ` ${ x . name } : ${ x . mes } ` ) . trim ( ) ) . join ( '\n\n' ) ;
console . log ( 'Vectors: relevant past messages found.\n' , queriedText ) ;
2024-06-15 01:40:16 +03:00
return substituteParamsExtended ( settings . template , { text : queriedText } ) ;
2023-09-09 21:26:04 +03:00
}
2024-06-06 21:45:47 +03:00
/ * *
* Modifies text chunks to include overlap with adjacent chunks .
* @ param { string } chunk Current item
* @ param { number } index Current index
* @ param { string [ ] } chunks List of chunks
* @ param { number } overlapSize Size of the overlap
* @ returns { string } Overlapped chunks , with overlap trimmed to sentence boundaries
* /
function overlapChunks ( chunk , index , chunks , overlapSize ) {
2024-06-06 23:00:49 +03:00
const halfOverlap = Math . floor ( overlapSize / 2 ) ;
2024-06-06 21:45:47 +03:00
const nextChunk = chunks [ index + 1 ] ;
const prevChunk = chunks [ index - 1 ] ;
2024-06-06 23:00:49 +03:00
const nextOverlap = trimToEndSentence ( nextChunk ? . substring ( 0 , halfOverlap ) ) || '' ;
const prevOverlap = trimToStartSentence ( prevChunk ? . substring ( prevChunk . length - halfOverlap ) ) || '' ;
2024-06-06 21:45:47 +03:00
const overlappedChunk = [ prevOverlap , chunk , nextOverlap ] . filter ( x => x ) . join ( ' ' ) ;
return overlappedChunk ;
}
2023-09-08 00:28:06 +03:00
window [ 'vectors_rearrangeChat' ] = rearrangeChat ;
2024-04-28 14:35:35 +03:00
const onChatEvent = debounce ( async ( ) => await moduleWorker . update ( ) , debounce _timeout . relaxed ) ;
2023-09-08 00:28:06 +03:00
2023-09-08 01:26:26 +03:00
/ * *
* Gets the text to query from the chat
* @ param { object [ ] } chat Chat messages
2024-08-16 20:49:14 +03:00
* @ param { 'file' | 'chat' | 'world-info' } initiator Initiator of the query
2024-04-18 22:57:27 +03:00
* @ returns { Promise < string > } Text to query
2023-09-08 01:26:26 +03:00
* /
2024-08-16 20:49:14 +03:00
async function getQueryText ( chat , initiator ) {
2023-09-08 00:28:06 +03:00
let queryText = '' ;
let i = 0 ;
2024-04-16 21:31:51 +02:00
let hashedMessages = chat . map ( x => ( { text : String ( substituteParams ( x . mes ) ) } ) ) ;
2024-08-16 20:49:14 +03:00
if ( initiator === 'chat' && settings . enabled _chats && settings . summarize && settings . summarize _sent ) {
2024-04-16 21:31:51 +02:00
hashedMessages = await summarize ( hashedMessages , settings . summary _source ) ;
}
for ( const message of hashedMessages . slice ( ) . reverse ( ) ) {
if ( message . text ) {
queryText += message . text + '\n' ;
2023-09-08 00:28:06 +03:00
i ++ ;
}
2023-09-09 21:26:04 +03:00
if ( i === settings . query ) {
2023-09-08 00:28:06 +03:00
break ;
}
}
return collapseNewlines ( queryText ) . trim ( ) ;
}
/ * *
* Gets the saved hashes for a collection
* @ param { string } collectionId
* @ returns { Promise < number [ ] > } Saved hashes
* /
async function getSavedHashes ( collectionId ) {
const response = await fetch ( '/api/vector/list' , {
method : 'POST' ,
2024-09-16 09:29:39 +03:00
headers : getVectorHeaders ( ) ,
2023-09-08 13:57:27 +03:00
body : JSON . stringify ( {
collectionId : collectionId ,
source : settings . source ,
} ) ,
2023-09-08 00:28:06 +03:00
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Failed to get saved hashes for collection ${ collectionId } ` ) ;
}
const hashes = await response . json ( ) ;
return hashes ;
}
2024-03-12 21:41:30 +02:00
function getVectorHeaders ( ) {
const headers = getRequestHeaders ( ) ;
switch ( settings . source ) {
case 'extras' :
2024-09-16 09:29:39 +03:00
Object . assign ( headers , {
'X-Extras-Url' : extension _settings . apiUrl ,
'X-Extras-Key' : extension _settings . apiKey ,
} ) ;
2024-03-12 21:41:30 +02:00
break ;
case 'togetherai' :
2024-09-16 09:29:39 +03:00
Object . assign ( headers , {
'X-Togetherai-Model' : extension _settings . vectors . togetherai _model ,
} ) ;
2024-03-12 21:41:30 +02:00
break ;
case 'openai' :
2024-09-16 09:29:39 +03:00
Object . assign ( headers , {
'X-OpenAI-Model' : extension _settings . vectors . openai _model ,
} ) ;
2024-03-12 21:41:30 +02:00
break ;
2024-04-19 00:07:12 +03:00
case 'cohere' :
2024-09-16 09:29:39 +03:00
Object . assign ( headers , {
'X-Cohere-Model' : extension _settings . vectors . cohere _model ,
} ) ;
2024-04-19 00:07:12 +03:00
break ;
2024-05-28 22:54:50 +03:00
case 'ollama' :
2024-09-16 09:29:39 +03:00
Object . assign ( headers , {
'X-Ollama-Model' : extension _settings . vectors . ollama _model ,
'X-Ollama-URL' : textgenerationwebui _settings . server _urls [ textgen _types . OLLAMA ] ,
'X-Ollama-Keep' : ! ! extension _settings . vectors . ollama _keep ,
} ) ;
2024-05-28 22:54:50 +03:00
break ;
case 'llamacpp' :
2024-09-16 09:29:39 +03:00
Object . assign ( headers , {
'X-LlamaCpp-URL' : textgenerationwebui _settings . server _urls [ textgen _types . LLAMACPP ] ,
} ) ;
2024-05-28 22:54:50 +03:00
break ;
2024-06-09 01:03:22 +03:00
case 'vllm' :
2024-09-16 09:29:39 +03:00
Object . assign ( headers , {
'X-Vllm-URL' : textgenerationwebui _settings . server _urls [ textgen _types . VLLM ] ,
'X-Vllm-Model' : extension _settings . vectors . vllm _model ,
} ) ;
2024-06-09 01:03:22 +03:00
break ;
2024-03-12 21:41:30 +02:00
default :
break ;
}
return headers ;
}
2023-09-08 00:28:06 +03:00
/ * *
* Inserts vector items into a collection
* @ param { string } collectionId - The collection to insert into
* @ param { { hash : number , text : string } [ ] } items - The items to insert
* @ returns { Promise < void > }
* /
async function insertVectorItems ( collectionId , items ) {
2024-05-28 22:54:50 +03:00
throwIfSourceInvalid ( ) ;
2024-01-24 16:51:57 +02:00
2024-03-12 21:41:30 +02:00
const headers = getVectorHeaders ( ) ;
2024-01-24 15:16:55 +02:00
2023-09-08 00:28:06 +03:00
const response = await fetch ( '/api/vector/insert' , {
method : 'POST' ,
2024-01-24 15:16:55 +02:00
headers : headers ,
2023-09-08 13:57:27 +03:00
body : JSON . stringify ( {
collectionId : collectionId ,
items : items ,
source : settings . source ,
} ) ,
2023-09-08 00:28:06 +03:00
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Failed to insert vector items for collection ${ collectionId } ` ) ;
}
}
2024-05-28 22:54:50 +03:00
/ * *
* Throws an error if the source is invalid ( missing API key or URL , or missing module )
* /
function throwIfSourceInvalid ( ) {
if ( settings . source === 'openai' && ! secret _state [ SECRET _KEYS . OPENAI ] ||
settings . source === 'palm' && ! secret _state [ SECRET _KEYS . MAKERSUITE ] ||
settings . source === 'mistral' && ! secret _state [ SECRET _KEYS . MISTRALAI ] ||
settings . source === 'togetherai' && ! secret _state [ SECRET _KEYS . TOGETHERAI ] ||
settings . source === 'nomicai' && ! secret _state [ SECRET _KEYS . NOMICAI ] ||
settings . source === 'cohere' && ! secret _state [ SECRET _KEYS . COHERE ] ) {
throw new Error ( 'Vectors: API key missing' , { cause : 'api_key_missing' } ) ;
}
if ( settings . source === 'ollama' && ! textgenerationwebui _settings . server _urls [ textgen _types . OLLAMA ] ||
2024-06-09 01:03:22 +03:00
settings . source === 'vllm' && ! textgenerationwebui _settings . server _urls [ textgen _types . VLLM ] ||
2024-05-28 22:54:50 +03:00
settings . source === 'llamacpp' && ! textgenerationwebui _settings . server _urls [ textgen _types . LLAMACPP ] ) {
throw new Error ( 'Vectors: API URL missing' , { cause : 'api_url_missing' } ) ;
}
2024-06-09 01:03:22 +03:00
if ( settings . source === 'ollama' && ! settings . ollama _model || settings . source === 'vllm' && ! settings . vllm _model ) {
2024-05-28 22:54:50 +03:00
throw new Error ( 'Vectors: API model missing' , { cause : 'api_model_missing' } ) ;
}
if ( settings . source === 'extras' && ! modules . includes ( 'embeddings' ) ) {
throw new Error ( 'Vectors: Embeddings module missing' , { cause : 'extras_module_missing' } ) ;
}
}
2023-09-08 00:28:06 +03:00
/ * *
* Deletes vector items from a collection
* @ param { string } collectionId - The collection to delete from
* @ param { number [ ] } hashes - The hashes of the items to delete
* @ returns { Promise < void > }
* /
async function deleteVectorItems ( collectionId , hashes ) {
const response = await fetch ( '/api/vector/delete' , {
method : 'POST' ,
2024-09-16 09:29:39 +03:00
headers : getVectorHeaders ( ) ,
2023-09-08 13:57:27 +03:00
body : JSON . stringify ( {
collectionId : collectionId ,
hashes : hashes ,
source : settings . source ,
} ) ,
2023-09-08 00:28:06 +03:00
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Failed to delete vector items for collection ${ collectionId } ` ) ;
}
}
/ * *
* @ param { string } collectionId - The collection to query
* @ param { string } searchText - The text to query
2023-09-08 01:26:26 +03:00
* @ param { number } topK - The number of results to return
2023-11-30 00:01:59 +02:00
* @ returns { Promise < { hashes : number [ ] , metadata : object [ ] } > } - Hashes of the results
2023-09-08 00:28:06 +03:00
* /
2023-09-08 01:26:26 +03:00
async function queryCollection ( collectionId , searchText , topK ) {
2024-03-12 21:41:30 +02:00
const headers = getVectorHeaders ( ) ;
2024-01-24 15:16:55 +02:00
2023-09-08 00:28:06 +03:00
const response = await fetch ( '/api/vector/query' , {
method : 'POST' ,
2024-01-24 15:16:55 +02:00
headers : headers ,
2023-09-08 13:57:27 +03:00
body : JSON . stringify ( {
collectionId : collectionId ,
searchText : searchText ,
topK : topK ,
source : settings . source ,
2024-05-23 17:28:43 +03:00
threshold : settings . score _threshold ,
2023-09-08 13:57:27 +03:00
} ) ,
2023-09-08 00:28:06 +03:00
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Failed to query collection ${ collectionId } ` ) ;
}
2024-03-01 23:52:49 +01:00
return await response . json ( ) ;
2023-09-08 00:28:06 +03:00
}
2024-04-17 02:09:22 +03:00
/ * *
* Queries multiple collections for a given text .
* @ param { string [ ] } collectionIds - Collection IDs to query
* @ param { string } searchText - Text to query
* @ param { number } topK - Number of results to return
2024-05-30 14:49:57 +03:00
* @ param { number } threshold - Score threshold
2024-04-17 02:09:22 +03:00
* @ returns { Promise < Record < string , { hashes : number [ ] , metadata : object [ ] } >> } - Results mapped to collection IDs
* /
2024-05-30 14:49:57 +03:00
async function queryMultipleCollections ( collectionIds , searchText , topK , threshold ) {
2024-04-17 02:09:22 +03:00
const headers = getVectorHeaders ( ) ;
const response = await fetch ( '/api/vector/query-multi' , {
method : 'POST' ,
headers : headers ,
body : JSON . stringify ( {
collectionIds : collectionIds ,
searchText : searchText ,
topK : topK ,
source : settings . source ,
2024-05-30 14:49:57 +03:00
threshold : threshold ? ? settings . score _threshold ,
2024-04-17 02:09:22 +03:00
} ) ,
} ) ;
if ( ! response . ok ) {
throw new Error ( 'Failed to query multiple collections' ) ;
}
return await response . json ( ) ;
}
2024-04-18 23:07:16 +03:00
/ * *
* Purges the vector index for a file .
* @ param { string } fileUrl File URL to purge
* /
async function purgeFileVectorIndex ( fileUrl ) {
try {
if ( ! settings . enabled _files ) {
return ;
}
console . log ( ` Vectors: Purging file vector index for ${ fileUrl } ` ) ;
2024-04-19 00:16:23 +03:00
const collectionId = getFileCollectionId ( fileUrl ) ;
2024-04-18 23:07:16 +03:00
const response = await fetch ( '/api/vector/purge' , {
method : 'POST' ,
2024-09-16 09:29:39 +03:00
headers : getVectorHeaders ( ) ,
2024-04-18 23:07:16 +03:00
body : JSON . stringify ( {
collectionId : collectionId ,
} ) ,
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Could not delete vector index for collection ${ collectionId } ` ) ;
}
console . log ( ` Vectors: Purged vector index for collection ${ collectionId } ` ) ;
} catch ( error ) {
console . error ( 'Vectors: Failed to purge file' , error ) ;
}
}
2024-02-23 22:37:00 +02:00
/ * *
* Purges the vector index for a collection .
* @ param { string } collectionId Collection ID to purge
* @ returns < Promise < boolean >> True if deleted , false if not
* /
2023-09-09 22:15:47 +03:00
async function purgeVectorIndex ( collectionId ) {
try {
2023-11-30 00:01:59 +02:00
if ( ! settings . enabled _chats ) {
2024-02-23 22:37:00 +02:00
return true ;
2023-09-09 22:15:47 +03:00
}
const response = await fetch ( '/api/vector/purge' , {
method : 'POST' ,
2024-09-16 09:29:39 +03:00
headers : getVectorHeaders ( ) ,
2023-09-09 22:15:47 +03:00
body : JSON . stringify ( {
collectionId : collectionId ,
} ) ,
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Could not delete vector index for collection ${ collectionId } ` ) ;
}
console . log ( ` Vectors: Purged vector index for collection ${ collectionId } ` ) ;
2024-02-23 22:37:00 +02:00
return true ;
2023-09-09 22:15:47 +03:00
} catch ( error ) {
console . error ( 'Vectors: Failed to purge' , error ) ;
2024-02-23 22:37:00 +02:00
return false ;
2023-09-09 22:15:47 +03:00
}
}
2024-06-22 17:41:02 +03:00
/ * *
* Purges all vector indexes .
* /
async function purgeAllVectorIndexes ( ) {
try {
const response = await fetch ( '/api/vector/purge-all' , {
method : 'POST' ,
2024-09-16 09:29:39 +03:00
headers : getVectorHeaders ( ) ,
2024-06-22 17:41:02 +03:00
} ) ;
if ( ! response . ok ) {
throw new Error ( 'Failed to purge all vector indexes' ) ;
}
console . log ( 'Vectors: Purged all vector indexes' ) ;
toastr . success ( 'All vector indexes purged' , 'Purge successful' ) ;
} catch ( error ) {
console . error ( 'Vectors: Failed to purge all' , error ) ;
toastr . error ( 'Failed to purge all vector indexes' , 'Purge failed' ) ;
}
}
2023-11-30 00:01:59 +02:00
function toggleSettings ( ) {
$ ( '#vectors_files_settings' ) . toggle ( ! ! settings . enabled _files ) ;
$ ( '#vectors_chats_settings' ) . toggle ( ! ! settings . enabled _chats ) ;
2024-04-23 03:09:52 +03:00
$ ( '#vectors_world_info_settings' ) . toggle ( ! ! settings . enabled _world _info ) ;
2024-03-02 23:16:18 +02:00
$ ( '#together_vectorsModel' ) . toggle ( settings . source === 'togetherai' ) ;
2024-03-12 21:41:30 +02:00
$ ( '#openai_vectorsModel' ) . toggle ( settings . source === 'openai' ) ;
2024-04-19 00:07:12 +03:00
$ ( '#cohere_vectorsModel' ) . toggle ( settings . source === 'cohere' ) ;
2024-05-28 22:54:50 +03:00
$ ( '#ollama_vectorsModel' ) . toggle ( settings . source === 'ollama' ) ;
$ ( '#llamacpp_vectorsModel' ) . toggle ( settings . source === 'llamacpp' ) ;
2024-06-09 01:03:22 +03:00
$ ( '#vllm_vectorsModel' ) . toggle ( settings . source === 'vllm' ) ;
2024-03-12 20:10:25 +01:00
$ ( '#nomicai_apiKey' ) . toggle ( settings . source === 'nomicai' ) ;
2023-11-30 00:01:59 +02:00
}
2023-12-31 04:00:04 +02:00
async function onPurgeClick ( ) {
const chatId = getCurrentChatId ( ) ;
if ( ! chatId ) {
toastr . info ( 'No chat selected' , 'Purge aborted' ) ;
return ;
}
2024-02-23 22:37:00 +02:00
if ( await purgeVectorIndex ( chatId ) ) {
toastr . success ( 'Vector index purged' , 'Purge successful' ) ;
} else {
toastr . error ( 'Failed to purge vector index' , 'Purge failed' ) ;
}
2023-12-31 04:00:04 +02:00
}
async function onViewStatsClick ( ) {
const chatId = getCurrentChatId ( ) ;
if ( ! chatId ) {
toastr . info ( 'No chat selected' ) ;
return ;
}
const hashesInCollection = await getSavedHashes ( chatId ) ;
const totalHashes = hashesInCollection . length ;
const uniqueHashes = hashesInCollection . filter ( onlyUnique ) . length ;
toastr . info ( ` Total hashes: <b> ${ totalHashes } </b><br>
Unique hashes : < b > $ { uniqueHashes } < / b > < b r > < b r >
I ' ll mark collected messages with a green circle . ` ,
2024-06-06 21:45:47 +03:00
` Stats for chat ${ chatId } ` ,
{ timeOut : 10000 , escapeHtml : false } ,
) ;
2023-12-31 04:00:04 +02:00
const chat = getContext ( ) . chat ;
for ( const message of chat ) {
2024-04-16 21:31:51 +02:00
if ( hashesInCollection . includes ( getStringHash ( substituteParams ( message . mes ) ) ) ) {
2023-12-31 04:00:04 +02:00
const messageElement = $ ( ` .mes[mesid=" ${ chat . indexOf ( message ) } "] ` ) ;
messageElement . addClass ( 'vectorized' ) ;
}
}
}
2024-04-19 00:16:23 +03:00
async function onVectorizeAllFilesClick ( ) {
try {
const dataBank = getDataBankAttachments ( ) ;
const chatAttachments = getContext ( ) . chat . filter ( x => x . extra ? . file ) . map ( x => x . extra . file ) ;
const allFiles = [ ... dataBank , ... chatAttachments ] ;
2024-05-13 00:27:32 +03:00
/ * *
* Gets the chunk size for a file attachment .
* @ param file { import ( '../../chats.js' ) . FileAttachment } File attachment
* @ returns { number } Chunk size for the file
* /
function getChunkSize ( file ) {
if ( chatAttachments . includes ( file ) ) {
// Convert kilobytes to string length
const thresholdLength = settings . size _threshold * 1024 ;
return file . size > thresholdLength ? settings . chunk _size : - 1 ;
}
if ( dataBank . includes ( file ) ) {
// Convert kilobytes to string length
const thresholdLength = settings . size _threshold _db * 1024 ;
// Use chunk size from settings if file is larger than threshold
return file . size > thresholdLength ? settings . chunk _size _db : - 1 ;
}
return - 1 ;
}
2024-06-06 21:45:47 +03:00
/ * *
* Gets the overlap percent for a file attachment .
* @ param file { import ( '../../chats.js' ) . FileAttachment } File attachment
* @ returns { number } Overlap percent for the file
* /
function getOverlapPercent ( file ) {
if ( chatAttachments . includes ( file ) ) {
return settings . overlap _percent ;
}
if ( dataBank . includes ( file ) ) {
return settings . overlap _percent _db ;
}
return 0 ;
}
2024-04-30 00:17:39 +03:00
let allSuccess = true ;
2024-04-19 00:16:23 +03:00
for ( const file of allFiles ) {
const text = await getFileAttachment ( file . url ) ;
const collectionId = getFileCollectionId ( file . url ) ;
2024-04-19 18:43:35 +03:00
const hashes = await getSavedHashes ( collectionId ) ;
if ( hashes . length ) {
console . log ( ` Vectors: File ${ file . name } is already vectorized ` ) ;
continue ;
}
2024-05-13 00:27:32 +03:00
const chunkSize = getChunkSize ( file ) ;
2024-06-06 21:45:47 +03:00
const overlapPercent = getOverlapPercent ( file ) ;
const result = await vectorizeFile ( text , file . name , collectionId , chunkSize , overlapPercent ) ;
2024-04-30 00:17:39 +03:00
if ( ! result ) {
allSuccess = false ;
}
2024-04-19 00:16:23 +03:00
}
2024-04-30 00:17:39 +03:00
if ( allSuccess ) {
toastr . success ( 'All files vectorized' , 'Vectorization successful' ) ;
} else {
toastr . warning ( 'Some files failed to vectorize. Check browser console for more details.' , 'Vector Storage' ) ;
}
2024-04-19 00:16:23 +03:00
} catch ( error ) {
console . error ( 'Vectors: Failed to vectorize all files' , error ) ;
toastr . error ( 'Failed to vectorize all files' , 'Vectorization failed' ) ;
}
}
async function onPurgeFilesClick ( ) {
try {
const dataBank = getDataBankAttachments ( ) ;
const chatAttachments = getContext ( ) . chat . filter ( x => x . extra ? . file ) . map ( x => x . extra . file ) ;
const allFiles = [ ... dataBank , ... chatAttachments ] ;
for ( const file of allFiles ) {
await purgeFileVectorIndex ( file . url ) ;
}
toastr . success ( 'All files purged' , 'Purge successful' ) ;
} catch ( error ) {
console . error ( 'Vectors: Failed to purge all files' , error ) ;
toastr . error ( 'Failed to purge all files' , 'Purge failed' ) ;
}
}
2024-04-23 03:09:52 +03:00
async function activateWorldInfo ( chat ) {
if ( ! settings . enabled _world _info ) {
console . debug ( 'Vectors: Disabled for World Info' ) ;
return ;
}
const entries = await getSortedEntries ( ) ;
if ( ! Array . isArray ( entries ) || entries . length === 0 ) {
console . debug ( 'Vectors: No WI entries found' ) ;
return ;
}
// Group entries by "world" field
const groupedEntries = { } ;
for ( const entry of entries ) {
// Skip orphaned entries. Is it even possible?
if ( ! entry . world ) {
console . debug ( 'Vectors: Skipped orphaned WI entry' , entry ) ;
continue ;
}
// Skip disabled entries
if ( entry . disable ) {
console . debug ( 'Vectors: Skipped disabled WI entry' , entry ) ;
continue ;
}
// Skip entries without content
if ( ! entry . content ) {
console . debug ( 'Vectors: Skipped WI entry without content' , entry ) ;
continue ;
}
// Skip non-vectorized entries
if ( ! entry . vectorized && ! settings . enabled _for _all ) {
console . debug ( 'Vectors: Skipped non-vectorized WI entry' , entry ) ;
continue ;
}
if ( ! Object . hasOwn ( groupedEntries , entry . world ) ) {
groupedEntries [ entry . world ] = [ ] ;
}
groupedEntries [ entry . world ] . push ( entry ) ;
}
const collectionIds = [ ] ;
if ( Object . keys ( groupedEntries ) . length === 0 ) {
console . debug ( 'Vectors: No WI entries to synchronize' ) ;
return ;
}
// Synchronize collections
for ( const world in groupedEntries ) {
const collectionId = ` world_ ${ getStringHash ( world ) } ` ;
const hashesInCollection = await getSavedHashes ( collectionId ) ;
const newEntries = groupedEntries [ world ] . filter ( x => ! hashesInCollection . includes ( getStringHash ( x . content ) ) ) ;
const deletedHashes = hashesInCollection . filter ( x => ! groupedEntries [ world ] . some ( y => getStringHash ( y . content ) === x ) ) ;
if ( newEntries . length > 0 ) {
console . log ( ` Vectors: Found ${ newEntries . length } new WI entries for world ${ world } ` ) ;
await insertVectorItems ( collectionId , newEntries . map ( x => ( { hash : getStringHash ( x . content ) , text : x . content , index : x . uid } ) ) ) ;
}
if ( deletedHashes . length > 0 ) {
console . log ( ` Vectors: Deleted ${ deletedHashes . length } old hashes for world ${ world } ` ) ;
await deleteVectorItems ( collectionId , deletedHashes ) ;
}
collectionIds . push ( collectionId ) ;
}
// Perform a multi-query
2024-08-16 20:49:14 +03:00
const queryText = await getQueryText ( chat , 'world-info' ) ;
2024-04-23 03:09:52 +03:00
if ( queryText . length === 0 ) {
console . debug ( 'Vectors: No text to query for WI' ) ;
return ;
}
2024-05-30 14:49:57 +03:00
const queryResults = await queryMultipleCollections ( collectionIds , queryText , settings . max _entries , settings . score _threshold ) ;
2024-04-23 03:09:52 +03:00
const activatedHashes = Object . values ( queryResults ) . flatMap ( x => x . hashes ) . filter ( onlyUnique ) ;
const activatedEntries = [ ] ;
// Activate entries found in the query results
for ( const entry of entries ) {
const hash = getStringHash ( entry . content ) ;
if ( activatedHashes . includes ( hash ) ) {
activatedEntries . push ( entry ) ;
}
}
if ( activatedEntries . length === 0 ) {
console . debug ( 'Vectors: No activated WI entries found' ) ;
return ;
}
console . log ( ` Vectors: Activated ${ activatedEntries . length } WI entries ` , activatedEntries ) ;
await eventSource . emit ( event _types . WORLDINFO _FORCE _ACTIVATE , activatedEntries ) ;
}
2023-09-08 00:28:06 +03:00
jQuery ( async ( ) => {
if ( ! extension _settings . vectors ) {
extension _settings . vectors = settings ;
}
2023-11-30 00:01:59 +02:00
// Migrate from old settings
if ( settings [ 'enabled' ] ) {
settings . enabled _chats = true ;
}
2023-09-08 00:28:06 +03:00
Object . assign ( settings , extension _settings . vectors ) ;
2024-03-01 23:52:49 +01:00
2023-09-17 14:09:24 +03:00
// Migrate from TensorFlow to Transformers
settings . source = settings . source !== 'local' ? settings . source : 'transformers' ;
2024-04-11 23:38:44 +03:00
const template = await renderExtensionTemplateAsync ( MODULE _NAME , 'settings' ) ;
2024-06-24 22:15:08 +03:00
$ ( '#vectors_container' ) . append ( template ) ;
2023-11-30 00:01:59 +02:00
$ ( '#vectors_enabled_chats' ) . prop ( 'checked' , settings . enabled _chats ) . on ( 'input' , ( ) => {
settings . enabled _chats = $ ( '#vectors_enabled_chats' ) . prop ( 'checked' ) ;
2023-09-08 00:28:06 +03:00
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
2023-11-30 00:01:59 +02:00
toggleSettings ( ) ;
} ) ;
$ ( '#vectors_enabled_files' ) . prop ( 'checked' , settings . enabled _files ) . on ( 'input' , ( ) => {
settings . enabled _files = $ ( '#vectors_enabled_files' ) . prop ( 'checked' ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
toggleSettings ( ) ;
2023-09-08 00:28:06 +03:00
} ) ;
2023-09-08 13:57:27 +03:00
$ ( '#vectors_source' ) . val ( settings . source ) . on ( 'change' , ( ) => {
settings . source = String ( $ ( '#vectors_source' ) . val ( ) ) ;
2024-03-01 23:52:49 +01:00
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
2024-03-02 23:16:18 +02:00
toggleSettings ( ) ;
2024-03-01 23:52:49 +01:00
} ) ;
2024-08-13 20:50:41 +03:00
$ ( '#api_key_nomicai' ) . on ( 'click' , async ( ) => {
const popupText = 'NomicAI API Key:' ;
const key = await callGenericPopup ( popupText , POPUP _TYPE . INPUT , '' , {
customButtons : [ {
text : 'Remove Key' ,
appendAtEnd : true ,
result : POPUP _RESULT . NEGATIVE ,
action : async ( ) => {
await writeSecret ( SECRET _KEYS . NOMICAI , '' ) ;
toastr . success ( 'API Key removed' ) ;
$ ( '#api_key_nomicai' ) . toggleClass ( 'success' , ! ! secret _state [ SECRET _KEYS . NOMICAI ] ) ;
saveSettingsDebounced ( ) ;
} ,
} ] ,
} ) ;
if ( ! key ) {
return ;
2024-03-12 20:10:25 +01:00
}
2024-08-13 20:50:41 +03:00
await writeSecret ( SECRET _KEYS . NOMICAI , String ( key ) ) ;
$ ( '#api_key_nomicai' ) . toggleClass ( 'success' , ! ! secret _state [ SECRET _KEYS . NOMICAI ] ) ;
toastr . success ( 'API Key saved' ) ;
2024-03-12 20:10:25 +01:00
saveSettingsDebounced ( ) ;
} ) ;
2024-03-02 23:16:18 +02:00
$ ( '#vectors_togetherai_model' ) . val ( settings . togetherai _model ) . on ( 'change' , ( ) => {
settings . togetherai _model = String ( $ ( '#vectors_togetherai_model' ) . val ( ) ) ;
2023-09-08 13:57:27 +03:00
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-03-12 21:41:30 +02:00
$ ( '#vectors_openai_model' ) . val ( settings . openai _model ) . on ( 'change' , ( ) => {
settings . openai _model = String ( $ ( '#vectors_openai_model' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-04-19 00:07:12 +03:00
$ ( '#vectors_cohere_model' ) . val ( settings . cohere _model ) . on ( 'change' , ( ) => {
settings . cohere _model = String ( $ ( '#vectors_cohere_model' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-05-28 22:54:50 +03:00
$ ( '#vectors_ollama_model' ) . val ( settings . ollama _model ) . on ( 'input' , ( ) => {
settings . ollama _model = String ( $ ( '#vectors_ollama_model' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-06-09 01:03:22 +03:00
$ ( '#vectors_vllm_model' ) . val ( settings . vllm _model ) . on ( 'input' , ( ) => {
settings . vllm _model = String ( $ ( '#vectors_vllm_model' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-05-28 22:54:50 +03:00
$ ( '#vectors_ollama_keep' ) . prop ( 'checked' , settings . ollama _keep ) . on ( 'input' , ( ) => {
settings . ollama _keep = $ ( '#vectors_ollama_keep' ) . prop ( 'checked' ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2023-09-09 21:26:04 +03:00
$ ( '#vectors_template' ) . val ( settings . template ) . on ( 'input' , ( ) => {
settings . template = String ( $ ( '#vectors_template' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_depth' ) . val ( settings . depth ) . on ( 'input' , ( ) => {
settings . depth = Number ( $ ( '#vectors_depth' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_protect' ) . val ( settings . protect ) . on ( 'input' , ( ) => {
settings . protect = Number ( $ ( '#vectors_protect' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_insert' ) . val ( settings . insert ) . on ( 'input' , ( ) => {
settings . insert = Number ( $ ( '#vectors_insert' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_query' ) . val ( settings . query ) . on ( 'input' , ( ) => {
settings . query = Number ( $ ( '#vectors_query' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( ` input[name="vectors_position"][value=" ${ settings . position } "] ` ) . prop ( 'checked' , true ) ;
$ ( 'input[name="vectors_position"]' ) . on ( 'change' , ( ) => {
settings . position = Number ( $ ( 'input[name="vectors_position"]:checked' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2023-09-08 15:25:10 +03:00
$ ( '#vectors_vectorize_all' ) . on ( 'click' , onVectorizeAllClick ) ;
2023-12-31 04:00:04 +02:00
$ ( '#vectors_purge' ) . on ( 'click' , onPurgeClick ) ;
$ ( '#vectors_view_stats' ) . on ( 'click' , onViewStatsClick ) ;
2024-04-19 00:16:23 +03:00
$ ( '#vectors_files_vectorize_all' ) . on ( 'click' , onVectorizeAllFilesClick ) ;
$ ( '#vectors_files_purge' ) . on ( 'click' , onPurgeFilesClick ) ;
2023-09-08 00:28:06 +03:00
2023-11-30 00:01:59 +02:00
$ ( '#vectors_size_threshold' ) . val ( settings . size _threshold ) . on ( 'input' , ( ) => {
settings . size _threshold = Number ( $ ( '#vectors_size_threshold' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_chunk_size' ) . val ( settings . chunk _size ) . on ( 'input' , ( ) => {
settings . chunk _size = Number ( $ ( '#vectors_chunk_size' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_chunk_count' ) . val ( settings . chunk _count ) . on ( 'input' , ( ) => {
settings . chunk _count = Number ( $ ( '#vectors_chunk_count' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2023-12-11 22:47:26 +02:00
$ ( '#vectors_include_wi' ) . prop ( 'checked' , settings . include _wi ) . on ( 'input' , ( ) => {
settings . include _wi = ! ! $ ( '#vectors_include_wi' ) . prop ( 'checked' ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-04-10 00:13:27 +02:00
$ ( '#vectors_summarize' ) . prop ( 'checked' , settings . summarize ) . on ( 'input' , ( ) => {
settings . summarize = ! ! $ ( '#vectors_summarize' ) . prop ( 'checked' ) ;
Object . assign ( extension _settings . vectors , settings ) ;
2024-04-16 21:31:51 +02:00
saveSettingsDebounced ( ) ;
} ) ;
2024-04-16 22:53:58 +02:00
$ ( '#vectors_summarize_user' ) . prop ( 'checked' , settings . summarize _sent ) . on ( 'input' , ( ) => {
settings . summarize _sent = ! ! $ ( '#vectors_summarize_user' ) . prop ( 'checked' ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_summary_source' ) . val ( settings . summary _source ) . on ( 'change' , ( ) => {
2024-04-16 21:31:51 +02:00
settings . summary _source = String ( $ ( '#vectors_summary_source' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
2024-04-16 22:53:58 +02:00
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_summary_prompt' ) . val ( settings . summary _prompt ) . on ( 'input' , ( ) => {
settings . summary _prompt = String ( $ ( '#vectors_summary_prompt' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
2023-12-11 22:47:26 +02:00
saveSettingsDebounced ( ) ;
} ) ;
2023-12-31 04:00:04 +02:00
$ ( '#vectors_message_chunk_size' ) . val ( settings . message _chunk _size ) . on ( 'input' , ( ) => {
settings . message _chunk _size = Number ( $ ( '#vectors_message_chunk_size' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-04-17 02:09:22 +03:00
$ ( '#vectors_size_threshold_db' ) . val ( settings . size _threshold _db ) . on ( 'input' , ( ) => {
settings . size _threshold _db = Number ( $ ( '#vectors_size_threshold_db' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_chunk_size_db' ) . val ( settings . chunk _size _db ) . on ( 'input' , ( ) => {
settings . chunk _size _db = Number ( $ ( '#vectors_chunk_size_db' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_chunk_count_db' ) . val ( settings . chunk _count _db ) . on ( 'input' , ( ) => {
settings . chunk _count _db = Number ( $ ( '#vectors_chunk_count_db' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-06-06 21:45:47 +03:00
$ ( '#vectors_overlap_percent' ) . val ( settings . overlap _percent ) . on ( 'input' , ( ) => {
settings . overlap _percent = Number ( $ ( '#vectors_overlap_percent' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_overlap_percent_db' ) . val ( settings . overlap _percent _db ) . on ( 'input' , ( ) => {
settings . overlap _percent _db = Number ( $ ( '#vectors_overlap_percent_db' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-04-17 02:09:22 +03:00
$ ( '#vectors_file_template_db' ) . val ( settings . file _template _db ) . on ( 'input' , ( ) => {
settings . file _template _db = String ( $ ( '#vectors_file_template_db' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( ` input[name="vectors_file_position_db"][value=" ${ settings . file _position _db } "] ` ) . prop ( 'checked' , true ) ;
$ ( 'input[name="vectors_file_position_db"]' ) . on ( 'change' , ( ) => {
settings . file _position _db = Number ( $ ( 'input[name="vectors_file_position_db"]:checked' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_file_depth_db' ) . val ( settings . file _depth _db ) . on ( 'input' , ( ) => {
settings . file _depth _db = Number ( $ ( '#vectors_file_depth_db' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_file_depth_role_db' ) . val ( settings . file _depth _role _db ) . on ( 'input' , ( ) => {
settings . file _depth _role _db = Number ( $ ( '#vectors_file_depth_role_db' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-04-21 03:24:01 +03:00
$ ( '#vectors_translate_files' ) . prop ( 'checked' , settings . translate _files ) . on ( 'input' , ( ) => {
settings . translate _files = ! ! $ ( '#vectors_translate_files' ) . prop ( 'checked' ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-04-23 03:09:52 +03:00
$ ( '#vectors_enabled_world_info' ) . prop ( 'checked' , settings . enabled _world _info ) . on ( 'input' , ( ) => {
settings . enabled _world _info = ! ! $ ( '#vectors_enabled_world_info' ) . prop ( 'checked' ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
toggleSettings ( ) ;
} ) ;
$ ( '#vectors_enabled_for_all' ) . prop ( 'checked' , settings . enabled _for _all ) . on ( 'input' , ( ) => {
settings . enabled _for _all = ! ! $ ( '#vectors_enabled_for_all' ) . prop ( 'checked' ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
$ ( '#vectors_max_entries' ) . val ( settings . max _entries ) . on ( 'input' , ( ) => {
settings . max _entries = Number ( $ ( '#vectors_max_entries' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-05-23 17:28:43 +03:00
$ ( '#vectors_score_threshold' ) . val ( settings . score _threshold ) . on ( 'input' , ( ) => {
settings . score _threshold = Number ( $ ( '#vectors_score_threshold' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-06-16 02:16:27 +03:00
$ ( '#vectors_force_chunk_delimiter' ) . prop ( 'checked' , settings . force _chunk _delimiter ) . on ( 'input' , ( ) => {
settings . force _chunk _delimiter = String ( $ ( '#vectors_force_chunk_delimiter' ) . val ( ) ) ;
Object . assign ( extension _settings . vectors , settings ) ;
saveSettingsDebounced ( ) ;
} ) ;
2024-06-22 16:38:00 +03:00
$ ( '#vectors_ollama_pull' ) . on ( 'click' , ( e ) => {
const presetModel = extension _settings . vectors . ollama _model || '' ;
e . preventDefault ( ) ;
$ ( '#ollama_download_model' ) . trigger ( 'click' ) ;
$ ( '#dialogue_popup_input' ) . val ( presetModel ) ;
} ) ;
2024-08-13 20:50:41 +03:00
$ ( '#api_key_nomicai' ) . toggleClass ( 'success' , ! ! secret _state [ SECRET _KEYS . NOMICAI ] ) ;
2024-03-12 20:10:25 +01:00
2023-11-30 00:01:59 +02:00
toggleSettings ( ) ;
2023-09-08 00:28:06 +03:00
eventSource . on ( event _types . MESSAGE _DELETED , onChatEvent ) ;
eventSource . on ( event _types . MESSAGE _EDITED , onChatEvent ) ;
eventSource . on ( event _types . MESSAGE _SENT , onChatEvent ) ;
eventSource . on ( event _types . MESSAGE _RECEIVED , onChatEvent ) ;
eventSource . on ( event _types . MESSAGE _SWIPED , onChatEvent ) ;
2023-09-09 22:15:47 +03:00
eventSource . on ( event _types . CHAT _DELETED , purgeVectorIndex ) ;
eventSource . on ( event _types . GROUP _CHAT _DELETED , purgeVectorIndex ) ;
2024-04-18 23:07:16 +03:00
eventSource . on ( event _types . FILE _ATTACHMENT _DELETED , purgeFileVectorIndex ) ;
2024-05-30 14:49:57 +03:00
SlashCommandParser . addCommandObject ( SlashCommand . fromProps ( {
name : 'db-ingest' ,
callback : async ( ) => {
await ingestDataBankAttachments ( ) ;
return '' ;
} ,
aliases : [ 'databank-ingest' , 'data-bank-ingest' ] ,
helpString : 'Force the ingestion of all Data Bank attachments.' ,
} ) ) ;
SlashCommandParser . addCommandObject ( SlashCommand . fromProps ( {
name : 'db-purge' ,
callback : async ( ) => {
const dataBank = getDataBankAttachments ( ) ;
for ( const file of dataBank ) {
await purgeFileVectorIndex ( file . url ) ;
}
return '' ;
} ,
aliases : [ 'databank-purge' , 'data-bank-purge' ] ,
helpString : 'Purge the vector index for all Data Bank attachments.' ,
} ) ) ;
SlashCommandParser . addCommandObject ( SlashCommand . fromProps ( {
name : 'db-search' ,
callback : async ( args , query ) => {
2024-05-30 17:15:17 +03:00
const clamp = ( v ) => Number . isNaN ( v ) ? null : Math . min ( 1 , Math . max ( 0 , v ) ) ;
const threshold = clamp ( Number ( args ? . threshold ? ? settings . score _threshold ) ) ;
2024-05-30 14:49:57 +03:00
const source = String ( args ? . source ? ? '' ) ;
const attachments = source ? getDataBankAttachmentsForSource ( source , false ) : getDataBankAttachments ( false ) ;
const collectionIds = await ingestDataBankAttachments ( String ( source ) ) ;
const queryResults = await queryMultipleCollections ( collectionIds , String ( query ) , settings . chunk _count _db , threshold ) ;
// Map collection IDs to file URLs
const urls = Object
. keys ( queryResults )
. map ( x => attachments . find ( y => getFileCollectionId ( y . url ) === x ) )
. filter ( x => x )
. map ( x => x . url ) ;
return JSON . stringify ( urls ) ;
} ,
aliases : [ 'databank-search' , 'data-bank-search' ] ,
helpString : 'Search the Data Bank for a specific query using vector similarity. Returns a list of file URLs with the most relevant content.' ,
namedArgumentList : [
2024-05-30 17:15:17 +03:00
new SlashCommandNamedArgument ( 'threshold' , 'Threshold for the similarity score in the [0, 1] range. Uses the global config value if not set.' , ARGUMENT _TYPE . NUMBER , false , false , '' ) ,
2024-05-30 14:49:57 +03:00
new SlashCommandNamedArgument ( 'source' , 'Optional filter for the attachments by source.' , ARGUMENT _TYPE . STRING , false , false , '' , [ 'global' , 'character' , 'chat' ] ) ,
] ,
unnamedArgumentList : [
new SlashCommandArgument ( 'Query to search by.' , ARGUMENT _TYPE . STRING , true , false ) ,
] ,
returns : ARGUMENT _TYPE . LIST ,
} ) ) ;
2024-06-22 17:41:02 +03:00
registerDebugFunction ( 'purge-everything' , 'Purge all vector indices' , 'Obliterate all stored vectors for all sources. No mercy.' , async ( ) => {
if ( ! confirm ( 'Are you sure?' ) ) {
return ;
}
await purgeAllVectorIndexes ( ) ;
} ) ;
2023-09-08 00:28:06 +03:00
} ) ;