tooot/src/modules/autolinker/matcher/email-matcher.js

312 lines
13 KiB
JavaScript

import * as tslib_1 from "tslib";
import { Matcher } from "./matcher";
import { alphaNumericAndMarksCharsStr, domainNameCharRegex } from "../regex-lib";
import { EmailMatch } from "../match/email-match";
import { throwUnhandledCaseError } from '../utils';
import { tldRegex } from "./tld-regex";
// For debugging: search for other "For debugging" lines
// import CliTable from 'cli-table';
// RegExp objects which are shared by all instances of EmailMatcher. These are
// here to avoid re-instantiating the RegExp objects if `Autolinker.link()` is
// called multiple times, thus instantiating EmailMatcher and its RegExp
// objects each time (which is very expensive - see https://github.com/gregjacobs/Autolinker.js/issues/314).
// See descriptions of the properties where they are used for details about them
var localPartCharRegex = new RegExp("[" + alphaNumericAndMarksCharsStr + "!#$%&'*+/=?^_`{|}~-]");
var strictTldRegex = new RegExp("^" + tldRegex.source + "$");
/**
* @class Autolinker.matcher.Email
* @extends Autolinker.matcher.Matcher
*
* Matcher to find email matches in an input string.
*
* See this class's superclass ({@link Autolinker.matcher.Matcher}) for more details.
*/
var EmailMatcher = /** @class */ (function (_super) {
tslib_1.__extends(EmailMatcher, _super);
function EmailMatcher() {
var _this = _super !== null && _super.apply(this, arguments) || this;
/**
* Valid characters that can be used in the "local" part of an email address,
* i.e. the "name" part of "name@site.com"
*/
_this.localPartCharRegex = localPartCharRegex;
/**
* Stricter TLD regex which adds a beginning and end check to ensure
* the string is a valid TLD
*/
_this.strictTldRegex = strictTldRegex;
return _this;
}
/**
* @inheritdoc
*/
EmailMatcher.prototype.parseMatches = function (text) {
var tagBuilder = this.tagBuilder, localPartCharRegex = this.localPartCharRegex, strictTldRegex = this.strictTldRegex, matches = [], len = text.length, noCurrentEmailMatch = new CurrentEmailMatch();
// for matching a 'mailto:' prefix
var mailtoTransitions = {
'm': 'a',
'a': 'i',
'i': 'l',
'l': 't',
't': 'o',
'o': ':',
};
var charIdx = 0, state = 0 /* NonEmailMatch */, currentEmailMatch = noCurrentEmailMatch;
// For debugging: search for other "For debugging" lines
// const table = new CliTable( {
// head: [ 'charIdx', 'char', 'state', 'charIdx', 'currentEmailAddress.idx', 'hasDomainDot' ]
// } );
while (charIdx < len) {
var char = text.charAt(charIdx);
// For debugging: search for other "For debugging" lines
// table.push(
// [ charIdx, char, State[ state ], charIdx, currentEmailAddress.idx, currentEmailAddress.hasDomainDot ]
// );
switch (state) {
case 0 /* NonEmailMatch */:
stateNonEmailAddress(char);
break;
case 1 /* Mailto */:
stateMailTo(text.charAt(charIdx - 1), char);
break;
case 2 /* LocalPart */:
stateLocalPart(char);
break;
case 3 /* LocalPartDot */:
stateLocalPartDot(char);
break;
case 4 /* AtSign */:
stateAtSign(char);
break;
case 5 /* DomainChar */:
stateDomainChar(char);
break;
case 6 /* DomainHyphen */:
stateDomainHyphen(char);
break;
case 7 /* DomainDot */:
stateDomainDot(char);
break;
default:
throwUnhandledCaseError(state);
}
// For debugging: search for other "For debugging" lines
// table.push(
// [ charIdx, char, State[ state ], charIdx, currentEmailAddress.idx, currentEmailAddress.hasDomainDot ]
// );
charIdx++;
}
// Capture any valid match at the end of the string
captureMatchIfValidAndReset();
// For debugging: search for other "For debugging" lines
//console.log( '\n' + table.toString() );
return matches;
// Handles the state when we're not in an email address
function stateNonEmailAddress(char) {
if (char === 'm') {
beginEmailMatch(1 /* Mailto */);
}
else if (localPartCharRegex.test(char)) {
beginEmailMatch();
}
else {
// not an email address character, continue
}
}
// Handles if we're reading a 'mailto:' prefix on the string
function stateMailTo(prevChar, char) {
if (prevChar === ':') {
// We've reached the end of the 'mailto:' prefix
if (localPartCharRegex.test(char)) {
state = 2 /* LocalPart */;
currentEmailMatch = new CurrentEmailMatch(tslib_1.__assign({}, currentEmailMatch, { hasMailtoPrefix: true }));
}
else {
// we've matched 'mailto:' but didn't get anything meaningful
// immediately afterwards (for example, we encountered a
// space character, or an '@' character which formed 'mailto:@'
resetToNonEmailMatchState();
}
}
else if (mailtoTransitions[prevChar] === char) {
// We're currently reading the 'mailto:' prefix, stay in
// Mailto state
}
else if (localPartCharRegex.test(char)) {
// We we're reading a prefix of 'mailto:', but encountered a
// different character that didn't continue the prefix
state = 2 /* LocalPart */;
}
else if (char === '.') {
// We we're reading a prefix of 'mailto:', but encountered a
// dot character
state = 3 /* LocalPartDot */;
}
else if (char === '@') {
// We we're reading a prefix of 'mailto:', but encountered a
// an @ character
state = 4 /* AtSign */;
}
else {
// not an email address character, return to "NonEmailAddress" state
resetToNonEmailMatchState();
}
}
// Handles the state when we're currently in the "local part" of an
// email address (as opposed to the "domain part")
function stateLocalPart(char) {
if (char === '.') {
state = 3 /* LocalPartDot */;
}
else if (char === '@') {
state = 4 /* AtSign */;
}
else if (localPartCharRegex.test(char)) {
// stay in the "local part" of the email address
}
else {
// not an email address character, return to "NonEmailAddress" state
resetToNonEmailMatchState();
}
}
// Handles the state where we've read
function stateLocalPartDot(char) {
if (char === '.') {
// We read a second '.' in a row, not a valid email address
// local part
resetToNonEmailMatchState();
}
else if (char === '@') {
// We read the '@' character immediately after a dot ('.'), not
// an email address
resetToNonEmailMatchState();
}
else if (localPartCharRegex.test(char)) {
state = 2 /* LocalPart */;
}
else {
// Anything else, not an email address
resetToNonEmailMatchState();
}
}
function stateAtSign(char) {
if (domainNameCharRegex.test(char)) {
state = 5 /* DomainChar */;
}
else {
// Anything else, not an email address
resetToNonEmailMatchState();
}
}
function stateDomainChar(char) {
if (char === '.') {
state = 7 /* DomainDot */;
}
else if (char === '-') {
state = 6 /* DomainHyphen */;
}
else if (domainNameCharRegex.test(char)) {
// Stay in the DomainChar state
}
else {
// Anything else, we potentially matched if the criteria has
// been met
captureMatchIfValidAndReset();
}
}
function stateDomainHyphen(char) {
if (char === '-' || char === '.') {
// Not valid to have two hyphens ("--") or hypen+dot ("-.")
captureMatchIfValidAndReset();
}
else if (domainNameCharRegex.test(char)) {
state = 5 /* DomainChar */;
}
else {
// Anything else
captureMatchIfValidAndReset();
}
}
function stateDomainDot(char) {
if (char === '.' || char === '-') {
// not valid to have two dots ("..") or dot+hypen (".-")
captureMatchIfValidAndReset();
}
else if (domainNameCharRegex.test(char)) {
state = 5 /* DomainChar */;
// After having read a '.' and then a valid domain character,
// we now know that the domain part of the email is valid, and
// we have found at least a partial EmailMatch (however, the
// email address may have additional characters from this point)
currentEmailMatch = new CurrentEmailMatch(tslib_1.__assign({}, currentEmailMatch, { hasDomainDot: true }));
}
else {
// Anything else
captureMatchIfValidAndReset();
}
}
function beginEmailMatch(newState) {
if (newState === void 0) { newState = 2 /* LocalPart */; }
state = newState;
currentEmailMatch = new CurrentEmailMatch({ idx: charIdx });
}
function resetToNonEmailMatchState() {
state = 0 /* NonEmailMatch */;
currentEmailMatch = noCurrentEmailMatch;
}
/*
* Captures the current email address as an EmailMatch if it's valid,
* and resets the state to read another email address.
*/
function captureMatchIfValidAndReset() {
if (currentEmailMatch.hasDomainDot) { // we need at least one dot in the domain to be considered a valid email address
var matchedText = text.slice(currentEmailMatch.idx, charIdx);
// If we read a '.' or '-' char that ended the email address
// (valid domain name characters, but only valid email address
// characters if they are followed by something else), strip
// it off now
if (/[-.]$/.test(matchedText)) {
matchedText = matchedText.slice(0, -1);
}
var emailAddress = currentEmailMatch.hasMailtoPrefix
? matchedText.slice('mailto:'.length)
: matchedText;
// if the email address has a valid TLD, add it to the list of matches
if (doesEmailHaveValidTld(emailAddress)) {
matches.push(new EmailMatch({
tagBuilder: tagBuilder,
matchedText: matchedText,
offset: currentEmailMatch.idx,
email: emailAddress
}));
}
}
resetToNonEmailMatchState();
/**
* Determines if the given email address has a valid TLD or not
* @param {string} emailAddress - email address
* @return {Boolean} - true is email have valid TLD, false otherwise
*/
function doesEmailHaveValidTld(emailAddress) {
var emailAddressTld = emailAddress.split('.').pop() || '';
var emailAddressNormalized = emailAddressTld.toLowerCase();
var isValidTld = strictTldRegex.test(emailAddressNormalized);
return isValidTld;
}
}
};
return EmailMatcher;
}(Matcher));
export { EmailMatcher };
var CurrentEmailMatch = /** @class */ (function () {
function CurrentEmailMatch(cfg) {
if (cfg === void 0) { cfg = {}; }
this.idx = cfg.idx !== undefined ? cfg.idx : -1;
this.hasMailtoPrefix = !!cfg.hasMailtoPrefix;
this.hasDomainDot = !!cfg.hasDomainDot;
}
return CurrentEmailMatch;
}());
//# sourceMappingURL=email-matcher.js.map