mirror of
https://github.com/usememos/memos.git
synced 2025-03-25 15:10:17 +01:00
184 lines
4.3 KiB
Go
184 lines
4.3 KiB
Go
package tokenizer
|
|
|
|
type TokenType = string
|
|
|
|
// Special character tokens.
|
|
const (
|
|
Underscore TokenType = "_"
|
|
Asterisk TokenType = "*"
|
|
PoundSign TokenType = "#"
|
|
Backtick TokenType = "`"
|
|
LeftSquareBracket TokenType = "["
|
|
RightSquareBracket TokenType = "]"
|
|
LeftParenthesis TokenType = "("
|
|
RightParenthesis TokenType = ")"
|
|
ExclamationMark TokenType = "!"
|
|
QuestionMark TokenType = "?"
|
|
Tilde TokenType = "~"
|
|
Hyphen TokenType = "-"
|
|
PlusSign TokenType = "+"
|
|
Dot TokenType = "."
|
|
LessThan TokenType = "<"
|
|
GreaterThan TokenType = ">"
|
|
DollarSign TokenType = "$"
|
|
EqualSign TokenType = "="
|
|
Pipe TokenType = "|"
|
|
Colon TokenType = ":"
|
|
Caret TokenType = "^"
|
|
Backslash TokenType = "\\"
|
|
Newline TokenType = "\n"
|
|
Space TokenType = " "
|
|
)
|
|
|
|
// Text based tokens.
|
|
const (
|
|
Number TokenType = "number"
|
|
Text TokenType = ""
|
|
)
|
|
|
|
type Token struct {
|
|
Type TokenType
|
|
Value string
|
|
}
|
|
|
|
func NewToken(tp, text string) *Token {
|
|
return &Token{
|
|
Type: tp,
|
|
Value: text,
|
|
}
|
|
}
|
|
|
|
func Tokenize(text string) []*Token {
|
|
tokens := []*Token{}
|
|
for _, c := range text {
|
|
switch c {
|
|
case '_':
|
|
tokens = append(tokens, NewToken(Underscore, "_"))
|
|
case '*':
|
|
tokens = append(tokens, NewToken(Asterisk, "*"))
|
|
case '#':
|
|
tokens = append(tokens, NewToken(PoundSign, "#"))
|
|
case '`':
|
|
tokens = append(tokens, NewToken(Backtick, "`"))
|
|
case '[':
|
|
tokens = append(tokens, NewToken(LeftSquareBracket, "["))
|
|
case ']':
|
|
tokens = append(tokens, NewToken(RightSquareBracket, "]"))
|
|
case '(':
|
|
tokens = append(tokens, NewToken(LeftParenthesis, "("))
|
|
case ')':
|
|
tokens = append(tokens, NewToken(RightParenthesis, ")"))
|
|
case '!':
|
|
tokens = append(tokens, NewToken(ExclamationMark, "!"))
|
|
case '?':
|
|
tokens = append(tokens, NewToken(QuestionMark, "?"))
|
|
case '~':
|
|
tokens = append(tokens, NewToken(Tilde, "~"))
|
|
case '-':
|
|
tokens = append(tokens, NewToken(Hyphen, "-"))
|
|
case '<':
|
|
tokens = append(tokens, NewToken(LessThan, "<"))
|
|
case '>':
|
|
tokens = append(tokens, NewToken(GreaterThan, ">"))
|
|
case '+':
|
|
tokens = append(tokens, NewToken(PlusSign, "+"))
|
|
case '.':
|
|
tokens = append(tokens, NewToken(Dot, "."))
|
|
case '$':
|
|
tokens = append(tokens, NewToken(DollarSign, "$"))
|
|
case '=':
|
|
tokens = append(tokens, NewToken(EqualSign, "="))
|
|
case '|':
|
|
tokens = append(tokens, NewToken(Pipe, "|"))
|
|
case ':':
|
|
tokens = append(tokens, NewToken(Colon, ":"))
|
|
case '^':
|
|
tokens = append(tokens, NewToken(Caret, "^"))
|
|
case '\\':
|
|
tokens = append(tokens, NewToken(Backslash, `\`))
|
|
case '\n':
|
|
tokens = append(tokens, NewToken(Newline, "\n"))
|
|
case ' ':
|
|
tokens = append(tokens, NewToken(Space, " "))
|
|
default:
|
|
var prevToken *Token
|
|
if len(tokens) > 0 {
|
|
prevToken = tokens[len(tokens)-1]
|
|
}
|
|
|
|
isNumber := c >= '0' && c <= '9'
|
|
if prevToken != nil {
|
|
if (prevToken.Type == Text && !isNumber) || (prevToken.Type == Number && isNumber) {
|
|
prevToken.Value += string(c)
|
|
continue
|
|
}
|
|
}
|
|
|
|
if isNumber {
|
|
tokens = append(tokens, NewToken(Number, string(c)))
|
|
} else {
|
|
tokens = append(tokens, NewToken(Text, string(c)))
|
|
}
|
|
}
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
func (t *Token) String() string {
|
|
return t.Value
|
|
}
|
|
|
|
func Stringify(tokens []*Token) string {
|
|
text := ""
|
|
for _, token := range tokens {
|
|
text += token.String()
|
|
}
|
|
return text
|
|
}
|
|
|
|
func Split(tokens []*Token, delimiter TokenType) [][]*Token {
|
|
if len(tokens) == 0 {
|
|
return [][]*Token{}
|
|
}
|
|
|
|
result := make([][]*Token, 0)
|
|
current := make([]*Token, 0)
|
|
for _, token := range tokens {
|
|
if token.Type == delimiter {
|
|
result = append(result, current)
|
|
current = make([]*Token, 0)
|
|
} else {
|
|
current = append(current, token)
|
|
}
|
|
}
|
|
result = append(result, current)
|
|
return result
|
|
}
|
|
|
|
func Find(tokens []*Token, target TokenType) int {
|
|
for i, token := range tokens {
|
|
if token.Type == target {
|
|
return i
|
|
}
|
|
}
|
|
return -1
|
|
}
|
|
|
|
func FindUnescaped(tokens []*Token, target TokenType) int {
|
|
for i, token := range tokens {
|
|
if token.Type == target && (i == 0 || (i > 0 && tokens[i-1].Type != Backslash)) {
|
|
return i
|
|
}
|
|
}
|
|
return -1
|
|
}
|
|
|
|
func GetFirstLine(tokens []*Token) []*Token {
|
|
for i, token := range tokens {
|
|
if token.Type == Newline {
|
|
return tokens[:i]
|
|
}
|
|
}
|
|
return tokens
|
|
}
|