2023-12-02 19:04:51 +01:00
import { substituteParams } from '../../../script.js' ;
import { extension _settings } from '../../extensions.js' ;
2023-07-20 19:32:15 +02:00
export {
regex _placement ,
getRegexedString ,
2023-12-02 21:06:57 +01:00
runRegexScript ,
2023-12-02 20:11:06 +01:00
} ;
2023-07-20 19:32:15 +02:00
2024-01-03 18:53:24 +01:00
/ * *
* @ enum { number } Where the regex script should be applied
* /
2023-07-20 19:32:15 +02:00
const regex _placement = {
2024-01-03 18:53:24 +01:00
/ * *
* @ deprecated MD Display is deprecated . Do not use .
* /
2023-07-20 19:32:15 +02:00
MD _DISPLAY : 0 ,
USER _INPUT : 1 ,
AI _OUTPUT : 2 ,
2023-12-02 21:06:57 +01:00
SLASH _COMMAND : 3 ,
2023-12-02 20:11:06 +01:00
} ;
2023-07-20 19:32:15 +02:00
2024-01-03 18:53:24 +01:00
/ * *
* @ enum { number } How the regex script should replace the matched string
* /
2023-07-20 19:32:15 +02:00
const regex _replace _strategy = {
REPLACE : 0 ,
2023-12-02 21:06:57 +01:00
OVERLAY : 1 ,
2023-12-02 20:11:06 +01:00
} ;
2023-07-20 19:32:15 +02:00
2024-01-03 18:53:24 +01:00
/ * *
* Instantiates a regular expression from a string .
* @ param { string } input The input string .
* @ returns { RegExp } The regular expression instance .
* @ copyright Originally from : https : //github.com/IonicaBizau/regex-parser.js/blob/master/lib/index.js
* /
2023-07-20 19:32:15 +02:00
function regexFromString ( input ) {
try {
// Parse input
var m = input . match ( /(\/?)(.+)\1([a-z]*)/i ) ;
2023-11-02 23:52:33 +01:00
2023-07-20 19:32:15 +02:00
// Invalid flags
if ( m [ 3 ] && ! /^(?!.*?(.).*?\1)[gmixXsuUAJ]+$/ . test ( m [ 3 ] ) ) {
return RegExp ( input ) ;
}
2023-11-02 23:52:33 +01:00
2023-07-20 19:32:15 +02:00
// Create the regular expression
return new RegExp ( m [ 2 ] , m [ 3 ] ) ;
} catch {
return ;
}
}
2024-01-03 18:53:24 +01:00
/ * *
* Parent function to fetch a regexed version of a raw string
* @ param { string } rawString The raw string to be regexed
* @ param { regex _placement } placement The placement of the string
* @ param { RegexParams } params The parameters to use for the regex script
* @ returns { string } The regexed string
* @ typedef { { characterOverride ? : string , isMarkdown ? : boolean , isPrompt ? : boolean } } RegexParams The parameters to use for the regex script
* /
2023-11-02 23:52:33 +01:00
function getRegexedString ( rawString , placement , { characterOverride , isMarkdown , isPrompt } = { } ) {
2024-01-03 18:53:24 +01:00
// WTF have you passed me?
if ( typeof rawString !== 'string' ) {
console . warn ( 'getRegexedString: rawString is not a string. Returning empty string.' ) ;
return '' ;
}
2023-07-20 19:32:15 +02:00
let finalString = rawString ;
2023-12-02 19:04:51 +01:00
if ( extension _settings . disabledExtensions . includes ( 'regex' ) || ! rawString || placement === undefined ) {
2023-07-20 19:32:15 +02:00
return finalString ;
}
extension _settings . regex . forEach ( ( script ) => {
2023-11-02 23:52:33 +01:00
if (
// Script applies to Markdown and input is Markdown
( script . markdownOnly && isMarkdown ) ||
// Script applies to Generate and input is Generate
( script . promptOnly && isPrompt ) ||
// Script applies to all cases when neither "only"s are true, but there's no need to do it when `isMarkdown`, the as source (chat history) should already be changed beforehand
( ! script . markdownOnly && ! script . promptOnly && ! isMarkdown )
) {
if ( script . placement . includes ( placement ) ) {
finalString = runRegexScript ( script , finalString , { characterOverride } ) ;
}
2023-07-20 19:32:15 +02:00
}
} ) ;
return finalString ;
}
2024-01-03 11:28:56 +01:00
/ * *
* Runs the provided regex script on the given string
* @ param { object } regexScript The regex script to run
* @ param { string } rawString The string to run the regex script on
2024-01-03 18:53:24 +01:00
* @ param { RegexScriptParams } params The parameters to use for the regex script
2024-01-03 11:28:56 +01:00
* @ returns { string } The new string
2024-01-03 18:53:24 +01:00
* @ typedef { { characterOverride ? : string } } RegexScriptParams The parameters to use for the regex script
2024-01-03 11:28:56 +01:00
* /
2023-07-20 19:32:15 +02:00
function runRegexScript ( regexScript , rawString , { characterOverride } = { } ) {
let newString = rawString ;
if ( ! regexScript || ! ! ( regexScript . disabled ) || ! regexScript ? . findRegex || ! rawString ) {
return newString ;
}
const findRegex = regexFromString ( regexScript . substituteRegex ? substituteParams ( regexScript . findRegex ) : regexScript . findRegex ) ;
// The user skill issued. Return with nothing.
if ( ! findRegex ) {
return newString ;
}
2024-01-06 06:30:25 +01:00
// Run replacement. Currently does not support the Overlay strategy
newString = rawString . replace ( findRegex , function ( match ) {
const args = [ ... arguments ] ;
2024-01-11 01:41:00 +01:00
const replaceString = regexScript . replaceString . replace ( /{{match}}/gi , '$0' ) ;
2024-01-06 06:30:25 +01:00
const replaceWithGroups = replaceString . replaceAll ( /\$(\d)+/g , ( _ , num ) => {
2024-01-11 01:41:00 +01:00
// Get a full match or a capture group
2024-01-06 06:30:25 +01:00
const match = args [ Number ( num ) ] ;
2024-01-11 01:41:00 +01:00
// No match found - return the empty string
if ( ! match ) {
return '' ;
}
// Remove trim strings from the match
2024-01-06 06:30:25 +01:00
const filteredMatch = filterString ( match , regexScript . trimStrings , { characterOverride } ) ;
// TODO: Handle overlay here
return filteredMatch ;
} ) ;
// Substitute at the end
return substituteParams ( replaceWithGroups ) ;
2024-01-03 18:53:24 +01:00
} ) ;
2023-07-20 19:32:15 +02:00
return newString ;
}
2024-01-03 18:53:24 +01:00
/ * *
* Filters anything to trim from the regex match
* @ param { string } rawString The raw string to filter
* @ param { string [ ] } trimStrings The strings to trim
* @ param { RegexScriptParams } params The parameters to use for the regex filter
* @ returns { string } The filtered string
* /
2023-07-20 19:32:15 +02:00
function filterString ( rawString , trimStrings , { characterOverride } = { } ) {
let finalString = rawString ;
2024-01-06 06:33:52 +01:00
trimStrings . forEach ( ( trimString ) => {
2023-07-20 19:32:15 +02:00
const subTrimString = substituteParams ( trimString , undefined , characterOverride ) ;
2023-12-02 19:04:51 +01:00
finalString = finalString . replaceAll ( subTrimString , '' ) ;
2024-01-06 06:33:52 +01:00
} ) ;
2023-07-20 19:32:15 +02:00
return finalString ;
}
2024-01-03 18:53:24 +01:00
/ * *
* Substitutes regex - specific and normal parameters
* @ param { string } rawString
* @ param { string } regexMatch
* @ param { RegexSubstituteParams } params The parameters to use for the regex substitution
* @ returns { string } The substituted string
* @ typedef { { characterOverride ? : string , replaceStrategy ? : number } } RegexSubstituteParams The parameters to use for the regex substitution
* /
2023-07-20 19:32:15 +02:00
function substituteRegexParams ( rawString , regexMatch , { characterOverride , replaceStrategy } = { } ) {
let finalString = rawString ;
finalString = substituteParams ( finalString , undefined , characterOverride ) ;
let overlaidMatch = regexMatch ;
// TODO: Maybe move the for loops into a separate function?
if ( replaceStrategy === regex _replace _strategy . OVERLAY ) {
2023-12-02 19:04:51 +01:00
const splitReplace = finalString . split ( '{{match}}' ) ;
2023-07-20 19:32:15 +02:00
// There's a prefix
if ( splitReplace [ 0 ] ) {
// Fetch the prefix
const splicedPrefix = spliceSymbols ( splitReplace [ 0 ] , false ) ;
// Sequentially remove all occurrences of prefix from start of split
const splitMatch = overlaidMatch . split ( splicedPrefix ) ;
let sliceNum = 0 ;
for ( let index = 0 ; index < splitMatch . length ; index ++ ) {
if ( splitMatch [ index ] . length === 0 ) {
sliceNum ++ ;
} else {
break ;
}
}
overlaidMatch = splitMatch . slice ( sliceNum , splitMatch . length ) . join ( splicedPrefix ) ;
}
// There's a suffix
if ( splitReplace [ 1 ] ) {
// Fetch the suffix
const splicedSuffix = spliceSymbols ( splitReplace [ 1 ] , true ) ;
// Sequential removal of all suffix occurrences from end of split
const splitMatch = overlaidMatch . split ( splicedSuffix ) ;
let sliceNum = 0 ;
for ( let index = splitMatch . length - 1 ; index >= 0 ; index -- ) {
if ( splitMatch [ index ] . length === 0 ) {
sliceNum ++ ;
} else {
break ;
}
}
overlaidMatch = splitMatch . slice ( 0 , splitMatch . length - sliceNum ) . join ( splicedSuffix ) ;
}
}
// Only one match is replaced. This is by design
2023-12-02 19:04:51 +01:00
finalString = finalString . replace ( '{{match}}' , overlaidMatch ) || finalString . replace ( '{{match}}' , regexMatch ) ;
2023-07-20 19:32:15 +02:00
return finalString ;
}
2024-01-03 18:53:24 +01:00
/ * *
* Splices common sentence symbols and whitespace from the beginning and end of a string .
* Using a for loop due to sequential ordering .
* @ param { string } rawString The raw string to splice
* @ param { boolean } isSuffix String is a suffix
* @ returns { string } The spliced string
* /
2023-07-20 19:32:15 +02:00
function spliceSymbols ( rawString , isSuffix ) {
let offset = 0 ;
for ( const ch of isSuffix ? rawString . split ( '' ) . reverse ( ) : rawString ) {
if ( ch . match ( /[^\w.,?'!]/ ) ) {
offset ++ ;
} else {
break ;
}
}
return isSuffix ? rawString . substring ( 0 , rawString . length - offset ) : rawString . substring ( offset ) ;
}