refactor: markdown parser matchers

This commit is contained in:
Steven 2024-01-23 21:23:40 +08:00
parent bf905bba86
commit d165d87288
55 changed files with 454 additions and 894 deletions

View File

@ -79,7 +79,7 @@ func (n *BaseNode) SetNextSibling(node Node) {
func IsBlockNode(node Node) bool { func IsBlockNode(node Node) bool {
switch node.Type() { switch node.Type() {
case ParagraphNode, CodeBlockNode, HeadingNode, HorizontalRuleNode, BlockquoteNode, OrderedListNode, UnorderedListNode, TaskListNode, MathBlockNode: case ParagraphNode, CodeBlockNode, HeadingNode, HorizontalRuleNode, BlockquoteNode, OrderedListNode, UnorderedListNode, TaskListNode, MathBlockNode, TableNode, EmbeddedContentNode:
return true return true
default: default:
return false return false

View File

@ -1,7 +1,6 @@
package parser package parser
import ( import (
"errors"
"net/url" "net/url"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
@ -14,56 +13,31 @@ func NewAutoLinkParser() *AutoLinkParser {
return &AutoLinkParser{} return &AutoLinkParser{}
} }
func (*AutoLinkParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*AutoLinkParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 3 { if len(tokens) < 3 {
return 0, false return nil, 0
} }
hasAngleBrackets := false matchedTokens := tokenizer.GetFirstLine(tokens)
if tokens[0].Type == tokenizer.LessThan { urlStr, isRawText := "", true
hasAngleBrackets = true if matchedTokens[0].Type == tokenizer.LessThan {
} greaterThanIndex := tokenizer.FindUnescaped(matchedTokens, tokenizer.GreaterThan)
if greaterThanIndex < 0 {
contentTokens := []*tokenizer.Token{} return nil, 0
for _, token := range tokens {
if token.Type == tokenizer.Newline || token.Type == tokenizer.Space {
break
} }
contentTokens = append(contentTokens, token) matchedTokens = matchedTokens[:greaterThanIndex+1]
if hasAngleBrackets && token.Type == tokenizer.GreaterThan { urlStr = tokenizer.Stringify(matchedTokens[1 : len(matchedTokens)-1])
break
}
}
if hasAngleBrackets && contentTokens[len(contentTokens)-1].Type != tokenizer.GreaterThan {
return 0, false
}
content := tokenizer.Stringify(contentTokens)
if !hasAngleBrackets {
u, err := url.Parse(content)
if err != nil || u.Scheme == "" || u.Host == "" {
return 0, false
}
}
return len(contentTokens), true
}
func (p *AutoLinkParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
url := tokenizer.Stringify(tokens[:size])
isRawText := true
if tokens[0].Type == tokenizer.LessThan && tokens[size-1].Type == tokenizer.GreaterThan {
isRawText = false isRawText = false
url = tokenizer.Stringify(tokens[1 : size-1]) } else {
u, err := url.Parse(tokenizer.Stringify(matchedTokens))
if err != nil || u.Scheme == "" || u.Host == "" {
return nil, 0
}
urlStr = tokenizer.Stringify(matchedTokens)
} }
return &ast.AutoLink{ return &ast.AutoLink{
URL: url, URL: urlStr,
IsRawText: isRawText, IsRawText: isRawText,
}, nil }, len(matchedTokens)
} }

View File

@ -36,7 +36,7 @@ func TestAutoLinkParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewAutoLinkParser().Parse(tokens) node, _ := NewAutoLinkParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,40 +11,26 @@ func NewBlockquoteParser() *BlockquoteParser {
return &BlockquoteParser{} return &BlockquoteParser{}
} }
func (*BlockquoteParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *BlockquoteParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 3 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 3 {
return nil, 0
} }
if tokens[0].Type != tokenizer.GreaterThan || tokens[1].Type != tokenizer.Space { if matchedTokens[0].Type != tokenizer.GreaterThan || matchedTokens[1].Type != tokenizer.Space {
return 0, false return nil, 0
} }
contentTokens := []*tokenizer.Token{} contentTokens := matchedTokens[2:]
for _, token := range tokens[2:] { children, err := ParseInlineWithParsers(contentTokens, []InlineParser{NewLinkParser(), NewTextParser()})
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens) + 2, true
}
func (p *BlockquoteParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
contentTokens := tokens[2:size]
children, err := ParseBlockWithParsers(contentTokens, []BlockParser{NewParagraphParser(), NewLineBreakParser()})
if err != nil { if err != nil {
return nil, err return nil, 0
} }
return &ast.Blockquote{ return &ast.Blockquote{
Children: children, Children: []ast.Node{
}, nil &ast.Paragraph{
Children: children,
},
},
}, len(matchedTokens)
} }

View File

@ -15,6 +15,10 @@ func TestBlockquoteParser(t *testing.T) {
text string text string
blockquote ast.Node blockquote ast.Node
}{ }{
{
text: ">Hello world",
blockquote: nil,
},
{ {
text: "> Hello world", text: "> Hello world",
blockquote: &ast.Blockquote{ blockquote: &ast.Blockquote{
@ -57,15 +61,11 @@ func TestBlockquoteParser(t *testing.T) {
}, },
}, },
}, },
{
text: ">Hello\nworld",
blockquote: nil,
},
} }
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewBlockquoteParser().Parse(tokens) node, _ := NewBlockquoteParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.blockquote}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.blockquote}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,52 +11,44 @@ func NewBoldParser() InlineParser {
return &BoldParser{} return &BoldParser{}
} }
func (*BoldParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *BoldParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 5 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 5 {
return nil, 0
} }
prefixTokens := tokens[:2] prefixTokens := matchedTokens[:2]
if prefixTokens[0].Type != prefixTokens[1].Type { if prefixTokens[0].Type != prefixTokens[1].Type {
return 0, false return nil, 0
} }
prefixTokenType := prefixTokens[0].Type prefixTokenType := prefixTokens[0].Type
if prefixTokenType != tokenizer.Asterisk && prefixTokenType != tokenizer.Underscore { if prefixTokenType != tokenizer.Asterisk && prefixTokenType != tokenizer.Underscore {
return 0, false return nil, 0
} }
cursor, matched := 2, false cursor, matched := 2, false
for ; cursor < len(tokens)-1; cursor++ { for ; cursor < len(matchedTokens)-1; cursor++ {
token, nextToken := tokens[cursor], tokens[cursor+1] token, nextToken := matchedTokens[cursor], matchedTokens[cursor+1]
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline { if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline {
return 0, false return nil, 0
} }
if token.Type == prefixTokenType && nextToken.Type == prefixTokenType { if token.Type == prefixTokenType && nextToken.Type == prefixTokenType {
matchedTokens = matchedTokens[:cursor+2]
matched = true matched = true
break break
} }
} }
if !matched { if !matched {
return 0, false return nil, 0
} }
return cursor + 2, true size := len(matchedTokens)
} children, err := ParseInlineWithParsers(matchedTokens[2:size-2], []InlineParser{NewLinkParser(), NewTextParser()})
func (p *BoldParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
prefixTokenType := tokens[0].Type
contentTokens := tokens[2 : size-2]
children, err := ParseInlineWithParsers(contentTokens, []InlineParser{NewLinkParser(), NewTextParser()})
if err != nil { if err != nil {
return nil, err return nil, 0
} }
return &ast.Bold{ return &ast.Bold{
Symbol: prefixTokenType, Symbol: prefixTokenType,
Children: children, Children: children,
}, nil }, size
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,48 +11,39 @@ func NewBoldItalicParser() InlineParser {
return &BoldItalicParser{} return &BoldItalicParser{}
} }
func (*BoldItalicParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*BoldItalicParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 7 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 7 {
return nil, 0
} }
prefixTokens := matchedTokens[:3]
prefixTokens := tokens[:3]
if prefixTokens[0].Type != prefixTokens[1].Type || prefixTokens[0].Type != prefixTokens[2].Type || prefixTokens[1].Type != prefixTokens[2].Type { if prefixTokens[0].Type != prefixTokens[1].Type || prefixTokens[0].Type != prefixTokens[2].Type || prefixTokens[1].Type != prefixTokens[2].Type {
return 0, false return nil, 0
} }
prefixTokenType := prefixTokens[0].Type prefixTokenType := prefixTokens[0].Type
if prefixTokenType != tokenizer.Asterisk && prefixTokenType != tokenizer.Underscore { if prefixTokenType != tokenizer.Asterisk && prefixTokenType != tokenizer.Underscore {
return 0, false return nil, 0
} }
cursor, matched := 3, false cursor, matched := 3, false
for ; cursor < len(tokens)-2; cursor++ { for ; cursor < len(matchedTokens)-2; cursor++ {
token, nextToken, endToken := tokens[cursor], tokens[cursor+1], tokens[cursor+2] token, nextToken, endToken := matchedTokens[cursor], matchedTokens[cursor+1], matchedTokens[cursor+2]
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline || endToken.Type == tokenizer.Newline { if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline || endToken.Type == tokenizer.Newline {
return 0, false return nil, 0
} }
if token.Type == prefixTokenType && nextToken.Type == prefixTokenType && endToken.Type == prefixTokenType { if token.Type == prefixTokenType && nextToken.Type == prefixTokenType && endToken.Type == prefixTokenType {
matchedTokens = matchedTokens[:cursor+3]
matched = true matched = true
break break
} }
} }
if !matched { if !matched {
return 0, false return nil, 0
} }
return cursor + 3, true size := len(matchedTokens)
}
func (p *BoldItalicParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
prefixTokenType := tokens[0].Type
contentTokens := tokens[3 : size-3]
return &ast.BoldItalic{ return &ast.BoldItalic{
Symbol: prefixTokenType, Symbol: prefixTokenType,
Content: tokenizer.Stringify(contentTokens), Content: tokenizer.Stringify(matchedTokens[3 : size-3]),
}, nil }, len(matchedTokens)
} }

View File

@ -19,6 +19,14 @@ func TestBoldItalicParser(t *testing.T) {
text: "*Hello world!", text: "*Hello world!",
boldItalic: nil, boldItalic: nil,
}, },
{
text: "*** Hello * *",
boldItalic: nil,
},
{
text: "*** Hello **",
boldItalic: nil,
},
{ {
text: "***Hello***", text: "***Hello***",
boldItalic: &ast.BoldItalic{ boldItalic: &ast.BoldItalic{
@ -33,19 +41,11 @@ func TestBoldItalicParser(t *testing.T) {
Content: " Hello ", Content: " Hello ",
}, },
}, },
{
text: "*** Hello * *",
boldItalic: nil,
},
{
text: "*** Hello **",
boldItalic: nil,
},
} }
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewBoldItalicParser().Parse(tokens) node, _ := NewBoldItalicParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.boldItalic}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.boldItalic}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -53,7 +53,7 @@ func TestBoldParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewBoldParser().Parse(tokens) node, _ := NewBoldParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.bold}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.bold}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,39 +11,20 @@ func NewCodeParser() *CodeParser {
return &CodeParser{} return &CodeParser{}
} }
func (*CodeParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *CodeParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 3 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 3 {
return nil, 0
} }
if tokens[0].Type != tokenizer.Backtick { if matchedTokens[0].Type != tokenizer.Backtick {
return 0, false return nil, 0
} }
nextBacktickIndex := tokenizer.FindUnescaped(matchedTokens[1:], tokenizer.Backtick)
contentTokens, matched := []*tokenizer.Token{}, false if nextBacktickIndex < 0 {
for _, token := range tokens[1:] { return nil, 0
if token.Type == tokenizer.Newline {
return 0, false
}
if token.Type == tokenizer.Backtick {
matched = true
break
}
contentTokens = append(contentTokens, token)
} }
if !matched || len(contentTokens) == 0 { matchedTokens = matchedTokens[:1+nextBacktickIndex+1]
return 0, false
}
return len(contentTokens) + 2, true
}
func (p *CodeParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
contentTokens := tokens[1 : size-1]
return &ast.Code{ return &ast.Code{
Content: tokenizer.Stringify(contentTokens), Content: tokenizer.Stringify(matchedTokens[1 : len(matchedTokens)-1]),
}, nil }, len(matchedTokens)
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -16,16 +14,16 @@ func NewCodeBlockParser() *CodeBlockParser {
return &CodeBlockParser{} return &CodeBlockParser{}
} }
func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 9 { if len(tokens) < 9 {
return 0, false return nil, 0
} }
if tokens[0].Type != tokenizer.Backtick || tokens[1].Type != tokenizer.Backtick || tokens[2].Type != tokenizer.Backtick { if tokens[0].Type != tokenizer.Backtick || tokens[1].Type != tokenizer.Backtick || tokens[2].Type != tokenizer.Backtick {
return 0, false return nil, 0
} }
if tokens[3].Type != tokenizer.Newline && tokens[4].Type != tokenizer.Newline { if tokens[3].Type != tokenizer.Newline && tokens[4].Type != tokenizer.Newline {
return 0, false return nil, 0
} }
cursor := 4 cursor := 4
if tokens[3].Type != tokenizer.Newline { if tokens[3].Type != tokenizer.Newline {
@ -47,20 +45,11 @@ func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (int, bool) {
} }
} }
if !matched { if !matched {
return 0, false return nil, 0
}
return cursor, true
}
func (p *CodeBlockParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
} }
languageToken := tokens[3] languageToken := tokens[3]
contentStart, contentEnd := 5, size-4 contentStart, contentEnd := 5, cursor-4
if languageToken.Type == tokenizer.Newline { if languageToken.Type == tokenizer.Newline {
languageToken = nil languageToken = nil
contentStart = 4 contentStart = 4
@ -72,5 +61,5 @@ func (p *CodeBlockParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
if languageToken != nil { if languageToken != nil {
codeBlock.Language = languageToken.String() codeBlock.Language = languageToken.String()
} }
return codeBlock, nil return codeBlock, cursor
} }

View File

@ -59,7 +59,7 @@ func TestCodeBlockParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewCodeBlockParser().Parse(tokens) node, _ := NewCodeBlockParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.codeBlock}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.codeBlock}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -33,7 +33,7 @@ func TestCodeParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewCodeParser().Parse(tokens) node, _ := NewCodeParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.code}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.code}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,50 +11,33 @@ func NewEmbeddedContentParser() *EmbeddedContentParser {
return &EmbeddedContentParser{} return &EmbeddedContentParser{}
} }
func (*EmbeddedContentParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *EmbeddedContentParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
lines := tokenizer.Split(tokens, tokenizer.Newline) matchedTokens := tokenizer.GetFirstLine(tokens)
if len(lines) < 1 { if len(matchedTokens) < 5 {
return 0, false return nil, 0
} }
firstLine := lines[0] if matchedTokens[0].Type != tokenizer.ExclamationMark || matchedTokens[1].Type != tokenizer.LeftSquareBracket || matchedTokens[2].Type != tokenizer.LeftSquareBracket {
if len(firstLine) < 5 { return nil, 0
return 0, false
}
if firstLine[0].Type != tokenizer.ExclamationMark || firstLine[1].Type != tokenizer.LeftSquareBracket || firstLine[2].Type != tokenizer.LeftSquareBracket {
return 0, false
} }
matched := false matched := false
for index, token := range firstLine[:len(firstLine)-1] { for index, token := range matchedTokens[:len(matchedTokens)-1] {
if token.Type == tokenizer.RightSquareBracket && firstLine[index+1].Type == tokenizer.RightSquareBracket && index+1 == len(firstLine)-1 { if token.Type == tokenizer.RightSquareBracket && matchedTokens[index+1].Type == tokenizer.RightSquareBracket && index+1 == len(matchedTokens)-1 {
matched = true matched = true
break break
} }
} }
if !matched { if !matched {
return 0, false return nil, 0
} }
return len(firstLine), true contentTokens := matchedTokens[3 : len(matchedTokens)-2]
} resourceName, params := tokenizer.Stringify(contentTokens), ""
questionMarkIndex := tokenizer.FindUnescaped(contentTokens, tokenizer.QuestionMark)
func (p *EmbeddedContentParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) { if questionMarkIndex > 0 {
size, ok := p.Match(tokens) resourceName, params = tokenizer.Stringify(contentTokens[:questionMarkIndex]), tokenizer.Stringify(contentTokens[questionMarkIndex+1:])
if size == 0 || !ok {
return nil, errors.New("not matched")
} }
contentTokens := tokens[3 : size-2]
resourceName, params := "", ""
paramsIndex, ok := tokenizer.Find(contentTokens, tokenizer.QuestionMark)
if ok && paramsIndex > 0 {
resourceName = tokenizer.Stringify(contentTokens[:paramsIndex])
params = tokenizer.Stringify(contentTokens[paramsIndex+1:])
} else {
resourceName = tokenizer.Stringify(contentTokens)
}
return &ast.EmbeddedContent{ return &ast.EmbeddedContent{
ResourceName: resourceName, ResourceName: resourceName,
Params: params, Params: params,
}, nil }, len(matchedTokens)
} }

View File

@ -59,7 +59,7 @@ func TestEmbeddedContentParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewEmbeddedContentParser().Parse(tokens) node, _ := NewEmbeddedContentParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.embeddedContent}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.embeddedContent}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,29 +11,17 @@ func NewEscapingCharacterParser() *EscapingCharacterParser {
return &EscapingCharacterParser{} return &EscapingCharacterParser{}
} }
func (*EscapingCharacterParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *EscapingCharacterParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) == 0 { if len(tokens) < 2 {
return 0, false return nil, 0
} }
if tokens[0].Type != tokenizer.Backslash { if tokens[0].Type != tokenizer.Backslash {
return 0, false return nil, 0
}
if len(tokens) == 1 {
return 0, false
} }
if tokens[1].Type == tokenizer.Newline || tokens[1].Type == tokenizer.Space || tokens[1].Type == tokenizer.Text || tokens[1].Type == tokenizer.Number { if tokens[1].Type == tokenizer.Newline || tokens[1].Type == tokenizer.Space || tokens[1].Type == tokenizer.Text || tokens[1].Type == tokenizer.Number {
return 0, false return nil, 0
} }
return 2, true
}
func (p *EscapingCharacterParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
return &ast.EscapingCharacter{ return &ast.EscapingCharacter{
Symbol: tokens[1].Value, Symbol: tokens[1].Value,
}, nil }, 2
} }

View File

@ -25,7 +25,7 @@ func TestEscapingCharacterParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewEscapingCharacterParser().Parse(tokens) node, _ := NewEscapingCharacterParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,61 +11,34 @@ func NewHeadingParser() *HeadingParser {
return &HeadingParser{} return &HeadingParser{}
} }
func (*HeadingParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *HeadingParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
level := 0 matchedTokens := tokenizer.GetFirstLine(tokens)
for _, token := range tokens { spaceIndex := tokenizer.FindUnescaped(matchedTokens, tokenizer.Space)
if token.Type == tokenizer.PoundSign { if spaceIndex < 0 {
level++ return nil, 0
} else { }
break
for _, token := range matchedTokens[:spaceIndex] {
if token.Type != tokenizer.PoundSign {
return nil, 0
} }
} }
if len(tokens) <= level+1 { level := spaceIndex
return 0, false
}
if tokens[level].Type != tokenizer.Space {
return 0, false
}
if level == 0 || level > 6 { if level == 0 || level > 6 {
return 0, false return nil, 0
} }
contentTokens := []*tokenizer.Token{} contentTokens := matchedTokens[level+1:]
for _, token := range tokens[level+1:] {
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 { if len(contentTokens) == 0 {
return 0, false return nil, 0
} }
return len(contentTokens) + level + 1, true
}
func (p *HeadingParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
level := 0
for _, token := range tokens {
if token.Type == tokenizer.PoundSign {
level++
} else {
break
}
}
contentTokens := tokens[level+1 : size]
children, err := ParseInline(contentTokens) children, err := ParseInline(contentTokens)
if err != nil { if err != nil {
return nil, err return nil, 0
} }
return &ast.Heading{ return &ast.Heading{
Level: level, Level: level,
Children: children, Children: children,
}, nil }, len(contentTokens) + level + 1
} }

View File

@ -80,7 +80,7 @@ Hello World`,
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewHeadingParser().Parse(tokens) node, _ := NewHeadingParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.heading}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.heading}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,46 +11,34 @@ func NewHighlightParser() InlineParser {
return &HighlightParser{} return &HighlightParser{}
} }
func (*HighlightParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *HighlightParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 5 { matchedToken := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedToken) < 5 {
return nil, 0
} }
prefixTokens := tokens[:2] prefixTokens := matchedToken[:2]
if prefixTokens[0].Type != prefixTokens[1].Type { if prefixTokens[0].Type != prefixTokens[1].Type {
return 0, false return nil, 0
} }
prefixTokenType := prefixTokens[0].Type prefixTokenType := prefixTokens[0].Type
if prefixTokenType != tokenizer.EqualSign { if prefixTokenType != tokenizer.EqualSign {
return 0, false return nil, 0
} }
cursor, matched := 2, false cursor, matched := 2, false
for ; cursor < len(tokens)-1; cursor++ { for ; cursor < len(matchedToken)-1; cursor++ {
token, nextToken := tokens[cursor], tokens[cursor+1] token, nextToken := matchedToken[cursor], matchedToken[cursor+1]
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline {
return 0, false
}
if token.Type == prefixTokenType && nextToken.Type == prefixTokenType { if token.Type == prefixTokenType && nextToken.Type == prefixTokenType {
matched = true matched = true
break break
} }
} }
if !matched { if !matched {
return 0, false return nil, 0
} }
return cursor + 2, true
}
func (p *HighlightParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
contentTokens := tokens[2 : size-2]
return &ast.Highlight{ return &ast.Highlight{
Content: tokenizer.Stringify(contentTokens), Content: tokenizer.Stringify(matchedToken[2:cursor]),
}, nil }, cursor + 1
} }

View File

@ -35,7 +35,7 @@ func TestHighlightParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewHighlightParser().Parse(tokens) node, _ := NewHighlightParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.bold}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.bold}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,29 +11,21 @@ func NewHorizontalRuleParser() *HorizontalRuleParser {
return &HorizontalRuleParser{} return &HorizontalRuleParser{}
} }
func (*HorizontalRuleParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *HorizontalRuleParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 3 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 3 {
return nil, 0
} }
if tokens[0].Type != tokens[1].Type || tokens[0].Type != tokens[2].Type || tokens[1].Type != tokens[2].Type { if len(matchedTokens) > 3 && matchedTokens[3].Type != tokenizer.Newline {
return 0, false return nil, 0
} }
if tokens[0].Type != tokenizer.Hyphen && tokens[0].Type != tokenizer.Underscore && tokens[0].Type != tokenizer.Asterisk { if matchedTokens[0].Type != matchedTokens[1].Type || matchedTokens[0].Type != matchedTokens[2].Type || matchedTokens[1].Type != matchedTokens[2].Type {
return 0, false return nil, 0
} }
if len(tokens) > 3 && tokens[3].Type != tokenizer.Newline { if matchedTokens[0].Type != tokenizer.Hyphen && matchedTokens[0].Type != tokenizer.Underscore && matchedTokens[0].Type != tokenizer.Asterisk {
return 0, false return nil, 0
} }
return 3, true
}
func (p *HorizontalRuleParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
return &ast.HorizontalRule{ return &ast.HorizontalRule{
Symbol: tokens[0].Type, Symbol: matchedTokens[0].Type,
}, nil }, 3
} }

View File

@ -51,7 +51,7 @@ func TestHorizontalRuleParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewHorizontalRuleParser().Parse(tokens) node, _ := NewHorizontalRuleParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.horizontalRule}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.horizontalRule}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,34 +11,33 @@ func NewImageParser() *ImageParser {
return &ImageParser{} return &ImageParser{}
} }
func (*ImageParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *ImageParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 5 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 5 {
return nil, 0
} }
if tokens[0].Type != tokenizer.ExclamationMark { if matchedTokens[0].Type != tokenizer.ExclamationMark {
return 0, false return nil, 0
} }
if tokens[1].Type != tokenizer.LeftSquareBracket { if matchedTokens[1].Type != tokenizer.LeftSquareBracket {
return 0, false return nil, 0
} }
cursor, altText := 2, "" cursor, altTokens := 2, []*tokenizer.Token{}
for ; cursor < len(tokens)-2; cursor++ { for ; cursor < len(matchedTokens)-2; cursor++ {
if tokens[cursor].Type == tokenizer.Newline { if matchedTokens[cursor].Type == tokenizer.RightSquareBracket {
return 0, false
}
if tokens[cursor].Type == tokenizer.RightSquareBracket {
break break
} }
altText += tokens[cursor].Value altTokens = append(altTokens, matchedTokens[cursor])
} }
if tokens[cursor+1].Type != tokenizer.LeftParenthesis { if matchedTokens[cursor+1].Type != tokenizer.LeftParenthesis {
return 0, false return nil, 0
} }
cursor += 2 cursor += 2
contentTokens, matched := []*tokenizer.Token{}, false contentTokens, matched := []*tokenizer.Token{}, false
for _, token := range tokens[cursor:] { for _, token := range matchedTokens[cursor:] {
if token.Type == tokenizer.Newline || token.Type == tokenizer.Space { if token.Type == tokenizer.Space {
return 0, false return nil, 0
} }
if token.Type == tokenizer.RightParenthesis { if token.Type == tokenizer.RightParenthesis {
matched = true matched = true
@ -49,27 +46,11 @@ func (*ImageParser) Match(tokens []*tokenizer.Token) (int, bool) {
contentTokens = append(contentTokens, token) contentTokens = append(contentTokens, token)
} }
if !matched || len(contentTokens) == 0 { if !matched || len(contentTokens) == 0 {
return 0, false return nil, 0
}
return cursor + len(contentTokens) + 1, true
}
func (p *ImageParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
} }
altTextTokens := []*tokenizer.Token{}
for _, token := range tokens[2:] {
if token.Type == tokenizer.RightSquareBracket {
break
}
altTextTokens = append(altTextTokens, token)
}
contentTokens := tokens[2+len(altTextTokens)+2 : size-1]
return &ast.Image{ return &ast.Image{
AltText: tokenizer.Stringify(altTextTokens), AltText: tokenizer.Stringify(altTokens),
URL: tokenizer.Stringify(contentTokens), URL: tokenizer.Stringify(contentTokens),
}, nil }, 0
} }

View File

@ -40,7 +40,7 @@ func TestImageParser(t *testing.T) {
} }
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewImageParser().Parse(tokens) node, _ := NewImageParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.image}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.image}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -15,22 +13,20 @@ func NewItalicParser() *ItalicParser {
return &ItalicParser{} return &ItalicParser{}
} }
func (*ItalicParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *ItalicParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 3 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 3 {
return nil, 0
} }
prefixTokens := tokens[:1] prefixTokens := matchedTokens[:1]
if prefixTokens[0].Type != tokenizer.Asterisk && prefixTokens[0].Type != tokenizer.Underscore { if prefixTokens[0].Type != tokenizer.Asterisk && prefixTokens[0].Type != tokenizer.Underscore {
return 0, false return nil, 0
} }
prefixTokenType := prefixTokens[0].Type prefixTokenType := prefixTokens[0].Type
contentTokens := []*tokenizer.Token{} contentTokens := []*tokenizer.Token{}
matched := false matched := false
for _, token := range tokens[1:] { for _, token := range matchedTokens[1:] {
if token.Type == tokenizer.Newline {
return 0, false
}
if token.Type == prefixTokenType { if token.Type == prefixTokenType {
matched = true matched = true
break break
@ -38,22 +34,11 @@ func (*ItalicParser) Match(tokens []*tokenizer.Token) (int, bool) {
contentTokens = append(contentTokens, token) contentTokens = append(contentTokens, token)
} }
if !matched || len(contentTokens) == 0 { if !matched || len(contentTokens) == 0 {
return 0, false return nil, 0
} }
return len(contentTokens) + 2, true
}
func (p *ItalicParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
prefixTokenType := tokens[0].Type
contentTokens := tokens[1 : size-1]
return &ast.Italic{ return &ast.Italic{
Symbol: prefixTokenType, Symbol: prefixTokenType,
Content: tokenizer.Stringify(contentTokens), Content: tokenizer.Stringify(contentTokens),
}, nil }, len(contentTokens) + 2
} }

View File

@ -44,7 +44,7 @@ func TestItalicParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewItalicParser().Parse(tokens) node, _ := NewItalicParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.italic}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.italic}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,21 +11,12 @@ func NewLineBreakParser() *LineBreakParser {
return &LineBreakParser{} return &LineBreakParser{}
} }
func (*LineBreakParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *LineBreakParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) == 0 { if len(tokens) == 0 {
return 0, false return nil, 0
} }
if tokens[0].Type != tokenizer.Newline { if tokens[0].Type != tokenizer.Newline {
return 0, false return nil, 0
} }
return 1, true return &ast.LineBreak{}, 1
}
func (p *LineBreakParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
return &ast.LineBreak{}, nil
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,62 +11,44 @@ func NewLinkParser() *LinkParser {
return &LinkParser{} return &LinkParser{}
} }
func (*LinkParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *LinkParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 5 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 5 {
return nil, 0
} }
if tokens[0].Type != tokenizer.LeftSquareBracket { if matchedTokens[0].Type != tokenizer.LeftSquareBracket {
return 0, false return nil, 0
} }
textTokens := []*tokenizer.Token{} textTokens := []*tokenizer.Token{}
for _, token := range tokens[1:] { for _, token := range matchedTokens[1:] {
if token.Type == tokenizer.Newline {
return 0, false
}
if token.Type == tokenizer.RightSquareBracket { if token.Type == tokenizer.RightSquareBracket {
break break
} }
textTokens = append(textTokens, token) textTokens = append(textTokens, token)
} }
if len(textTokens)+4 >= len(tokens) { if len(textTokens)+4 >= len(matchedTokens) {
return 0, false return nil, 0
} }
if tokens[2+len(textTokens)].Type != tokenizer.LeftParenthesis { if matchedTokens[2+len(textTokens)].Type != tokenizer.LeftParenthesis {
return 0, false return nil, 0
} }
urlTokens := []*tokenizer.Token{} urlTokens := []*tokenizer.Token{}
for _, token := range tokens[3+len(textTokens):] { for _, token := range matchedTokens[3+len(textTokens):] {
if token.Type == tokenizer.Newline || token.Type == tokenizer.Space { if token.Type == tokenizer.Space {
return 0, false return nil, 0
} }
if token.Type == tokenizer.RightParenthesis { if token.Type == tokenizer.RightParenthesis {
break break
} }
urlTokens = append(urlTokens, token) urlTokens = append(urlTokens, token)
} }
if 4+len(urlTokens)+len(textTokens) > len(tokens) { if 4+len(urlTokens)+len(textTokens) > len(matchedTokens) {
return 0, false return nil, 0
} }
return 4 + len(urlTokens) + len(textTokens), true
}
func (p *LinkParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
textTokens := []*tokenizer.Token{}
for _, token := range tokens[1:] {
if token.Type == tokenizer.RightSquareBracket {
break
}
textTokens = append(textTokens, token)
}
urlTokens := tokens[2+len(textTokens)+1 : size-1]
return &ast.Link{ return &ast.Link{
Text: tokenizer.Stringify(textTokens), Text: tokenizer.Stringify(textTokens),
URL: tokenizer.Stringify(urlTokens), URL: tokenizer.Stringify(urlTokens),
}, nil }, 4 + len(urlTokens) + len(textTokens)
} }

View File

@ -47,7 +47,7 @@ func TestLinkParser(t *testing.T) {
} }
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewLinkParser().Parse(tokens) node, _ := NewLinkParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,44 +11,29 @@ func NewMathParser() *MathParser {
return &MathParser{} return &MathParser{}
} }
func (*MathParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *MathParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 3 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 3 {
return nil, 0
} }
if tokens[0].Type != tokenizer.DollarSign { if matchedTokens[0].Type != tokenizer.DollarSign {
return 0, false return nil, 0
} }
contentTokens := []*tokenizer.Token{} contentTokens := []*tokenizer.Token{}
for _, token := range tokens[1:] { matched := false
if token.Type == tokenizer.Newline { for _, token := range matchedTokens[1:] {
return 0, false
}
if token.Type == tokenizer.DollarSign { if token.Type == tokenizer.DollarSign {
matched = true
break break
} }
contentTokens = append(contentTokens, token) contentTokens = append(contentTokens, token)
} }
if len(contentTokens) == 0 { if !matched || len(contentTokens) == 0 {
return 0, false return nil, 0
} }
if len(contentTokens)+2 > len(tokens) {
return 0, false
}
if tokens[len(contentTokens)+1].Type != tokenizer.DollarSign {
return 0, false
}
return len(contentTokens) + 2, true
}
func (p *MathParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
return &ast.Math{ return &ast.Math{
Content: tokenizer.Stringify(tokens[1 : size-1]), Content: tokenizer.Stringify(contentTokens),
}, nil }, len(contentTokens) + 2
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,44 +11,40 @@ func NewMathBlockParser() *MathBlockParser {
return &MathBlockParser{} return &MathBlockParser{}
} }
func (*MathBlockParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *MathBlockParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 7 { rows := tokenizer.Split(tokens, tokenizer.Newline)
return 0, false if len(rows) < 3 {
return nil, 0
}
firstRow := rows[0]
if len(firstRow) != 2 {
return nil, 0
}
if firstRow[0].Type != tokenizer.DollarSign || firstRow[1].Type != tokenizer.DollarSign {
return nil, 0
} }
if tokens[0].Type != tokenizer.DollarSign || tokens[1].Type != tokenizer.DollarSign || tokens[2].Type != tokenizer.Newline { contentRows := [][]*tokenizer.Token{}
return 0, false
}
cursor := 3
matched := false matched := false
for ; cursor < len(tokens)-2; cursor++ { for _, row := range rows[1:] {
if tokens[cursor].Type == tokenizer.Newline && tokens[cursor+1].Type == tokenizer.DollarSign && tokens[cursor+2].Type == tokenizer.DollarSign { if len(row) == 2 && row[0].Type == tokenizer.DollarSign && row[1].Type == tokenizer.DollarSign {
if cursor+2 == len(tokens)-1 { matched = true
cursor += 3 break
matched = true
break
} else if tokens[cursor+3].Type == tokenizer.Newline {
cursor += 3
matched = true
break
}
} }
contentRows = append(contentRows, row)
} }
if !matched { if !matched {
return 0, false return nil, 0
} }
return cursor, true contentTokens := []*tokenizer.Token{}
} for _, row := range contentRows {
contentTokens = append(contentTokens, row...)
func (p *MathBlockParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) { contentTokens = append(contentTokens, &tokenizer.Token{
size, ok := p.Match(tokens) Type: tokenizer.Newline,
if size == 0 || !ok { })
return nil, errors.New("not matched")
} }
return &ast.MathBlock{ return &ast.MathBlock{
Content: tokenizer.Stringify(tokens[3 : size-3]), Content: tokenizer.Stringify(contentTokens),
}, nil }, 3 + len(contentTokens) + 2
} }

View File

@ -30,7 +30,7 @@ func TestMathBlockParser(t *testing.T) {
} }
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewMathBlockParser().Parse(tokens) node, _ := NewMathBlockParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -24,7 +24,7 @@ func TestMathParser(t *testing.T) {
} }
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewMathParser().Parse(tokens) node, _ := NewMathParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.link}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,61 +11,37 @@ func NewOrderedListParser() *OrderedListParser {
return &OrderedListParser{} return &OrderedListParser{}
} }
func (*OrderedListParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *OrderedListParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 4 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false
}
indent := 0 indent := 0
for _, token := range tokens { for _, token := range matchedTokens {
if token.Type == tokenizer.Space { if token.Type == tokenizer.Space {
indent++ indent++
} else { } else {
break break
} }
} }
if len(matchedTokens) < indent+3 {
return nil, 0
}
corsor := indent corsor := indent
if tokens[corsor].Type != tokenizer.Number || tokens[corsor+1].Type != tokenizer.Dot || tokens[corsor+2].Type != tokenizer.Space { if matchedTokens[corsor].Type != tokenizer.Number || matchedTokens[corsor+1].Type != tokenizer.Dot || matchedTokens[corsor+2].Type != tokenizer.Space {
return 0, false return nil, 0
}
contentTokens := []*tokenizer.Token{}
for _, token := range tokens[corsor+3:] {
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
} }
contentTokens := matchedTokens[corsor+3:]
if len(contentTokens) == 0 { if len(contentTokens) == 0 {
return 0, false return nil, 0
} }
return indent + len(contentTokens) + 3, true
}
func (p *OrderedListParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
indent := 0
for _, token := range tokens {
if token.Type == tokenizer.Space {
indent++
} else {
break
}
}
contentTokens := tokens[indent+3 : size]
children, err := ParseInline(contentTokens) children, err := ParseInline(contentTokens)
if err != nil { if err != nil {
return nil, err return nil, 0
} }
return &ast.OrderedList{ return &ast.OrderedList{
Number: tokens[indent].Value, Number: matchedTokens[indent].Value,
Indent: indent, Indent: indent,
Children: children, Children: children,
}, nil }, indent + 3 + len(contentTokens)
} }

View File

@ -65,7 +65,7 @@ func TestOrderedListParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewOrderedListParser().Parse(tokens) node, _ := NewOrderedListParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -15,31 +13,17 @@ func NewParagraphParser() *ParagraphParser {
return &ParagraphParser{} return &ParagraphParser{}
} }
func (*ParagraphParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *ParagraphParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
contentTokens := []*tokenizer.Token{} matchedTokens := tokenizer.GetFirstLine(tokens)
for _, token := range tokens { if len(matchedTokens) == 0 {
if token.Type == tokenizer.Newline { return nil, 0
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens), true
}
func (p *ParagraphParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
} }
children, err := ParseInline(tokens[:size]) children, err := ParseInline(matchedTokens)
if err != nil { if err != nil {
return nil, err return nil, 0
} }
return &ast.Paragraph{ return &ast.Paragraph{
Children: children, Children: children,
}, nil }, len(matchedTokens)
} }

View File

@ -57,7 +57,7 @@ func TestParagraphParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewParagraphParser().Parse(tokens) node, _ := NewParagraphParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.paragraph}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.paragraph}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,8 +11,7 @@ type Context struct {
} }
type BaseParser interface { type BaseParser interface {
Match(tokens []*tokenizer.Token) (int, bool) Match(tokens []*tokenizer.Token) (ast.Node, int)
Parse(tokens []*tokenizer.Token) (ast.Node, error)
} }
type InlineParser interface { type InlineParser interface {
@ -53,13 +50,9 @@ func ParseBlockWithParsers(tokens []*tokenizer.Token, blockParsers []BlockParser
var prevNode ast.Node var prevNode ast.Node
for len(tokens) > 0 { for len(tokens) > 0 {
for _, blockParser := range blockParsers { for _, blockParser := range blockParsers {
size, matched := blockParser.Match(tokens) node, size := blockParser.Match(tokens)
if matched { if node != nil {
node, err := blockParser.Parse(tokens) // Consume matched tokens.
if err != nil {
return nil, errors.New("parse error")
}
tokens = tokens[size:] tokens = tokens[size:]
if prevNode != nil { if prevNode != nil {
prevNode.SetNextSibling(node) prevNode.SetNextSibling(node)
@ -102,13 +95,9 @@ func ParseInlineWithParsers(tokens []*tokenizer.Token, inlineParsers []InlinePar
var prevNode ast.Node var prevNode ast.Node
for len(tokens) > 0 { for len(tokens) > 0 {
for _, inlineParser := range inlineParsers { for _, inlineParser := range inlineParsers {
size, matched := inlineParser.Match(tokens) node, size := inlineParser.Match(tokens)
if matched { if node != nil {
node, err := inlineParser.Parse(tokens) // Consume matched tokens.
if err != nil {
return nil, errors.New("parse error")
}
tokens = tokens[size:] tokens = tokens[size:]
if prevNode != nil { if prevNode != nil {
// Merge text nodes if possible. // Merge text nodes if possible.
@ -120,8 +109,8 @@ func ParseInlineWithParsers(tokens []*tokenizer.Token, inlineParsers []InlinePar
prevNode.SetNextSibling(node) prevNode.SetNextSibling(node)
node.SetPrevSibling(prevNode) node.SetPrevSibling(prevNode)
} }
nodes = append(nodes, node)
prevNode = node prevNode = node
nodes = append(nodes, node)
break break
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,39 +11,29 @@ func NewStrikethroughParser() *StrikethroughParser {
return &StrikethroughParser{} return &StrikethroughParser{}
} }
func (*StrikethroughParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *StrikethroughParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 5 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 5 {
return nil, 0
} }
if tokens[0].Type != tokenizer.Tilde || tokens[1].Type != tokenizer.Tilde { if matchedTokens[0].Type != tokenizer.Tilde || matchedTokens[1].Type != tokenizer.Tilde {
return 0, false return nil, 0
} }
cursor, matched := 2, false contentTokens := []*tokenizer.Token{}
for ; cursor < len(tokens)-1; cursor++ { matched := false
token, nextToken := tokens[cursor], tokens[cursor+1] for cursor := 2; cursor < len(matchedTokens)-1; cursor++ {
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline { token, nextToken := matchedTokens[cursor], matchedTokens[cursor+1]
return 0, false
}
if token.Type == tokenizer.Tilde && nextToken.Type == tokenizer.Tilde { if token.Type == tokenizer.Tilde && nextToken.Type == tokenizer.Tilde {
matched = true matched = true
break break
} }
contentTokens = append(contentTokens, token)
} }
if !matched { if !matched || len(contentTokens) == 0 {
return 0, false return nil, 0
} }
return cursor + 2, true
}
func (p *StrikethroughParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
contentTokens := tokens[2 : size-2]
return &ast.Strikethrough{ return &ast.Strikethrough{
Content: tokenizer.Stringify(contentTokens), Content: tokenizer.Stringify(contentTokens),
}, nil }, len(contentTokens) + 4
} }

View File

@ -41,7 +41,7 @@ func TestStrikethroughParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewStrikethroughParser().Parse(tokens) node, _ := NewStrikethroughParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.strikethrough}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.strikethrough}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,20 +11,18 @@ func NewSubscriptParser() *SubscriptParser {
return &SubscriptParser{} return &SubscriptParser{}
} }
func (*SubscriptParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *SubscriptParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 3 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 3 {
return nil, 0
} }
if tokens[0].Type != tokenizer.Tilde { if matchedTokens[0].Type != tokenizer.Tilde {
return 0, false return nil, 0
} }
contentTokens := []*tokenizer.Token{} contentTokens := []*tokenizer.Token{}
matched := false matched := false
for _, token := range tokens[1:] { for _, token := range matchedTokens[1:] {
if token.Type == tokenizer.Newline {
return 0, false
}
if token.Type == tokenizer.Tilde { if token.Type == tokenizer.Tilde {
matched = true matched = true
break break
@ -34,20 +30,10 @@ func (*SubscriptParser) Match(tokens []*tokenizer.Token) (int, bool) {
contentTokens = append(contentTokens, token) contentTokens = append(contentTokens, token)
} }
if !matched || len(contentTokens) == 0 { if !matched || len(contentTokens) == 0 {
return 0, false return nil, 0
} }
return len(contentTokens) + 2, true
}
func (p *SubscriptParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
contentTokens := tokens[1 : size-1]
return &ast.Subscript{ return &ast.Subscript{
Content: tokenizer.Stringify(contentTokens), Content: tokenizer.Stringify(contentTokens),
}, nil }, len(contentTokens) + 2
} }

View File

@ -41,7 +41,7 @@ func TestSubscriptParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewSubscriptParser().Parse(tokens) node, _ := NewSubscriptParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.subscript}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.subscript}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,20 +11,18 @@ func NewSuperscriptParser() *SuperscriptParser {
return &SuperscriptParser{} return &SuperscriptParser{}
} }
func (*SuperscriptParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *SuperscriptParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 3 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 3 {
return nil, 0
} }
if tokens[0].Type != tokenizer.Caret { if matchedTokens[0].Type != tokenizer.Caret {
return 0, false return nil, 0
} }
contentTokens := []*tokenizer.Token{} contentTokens := []*tokenizer.Token{}
matched := false matched := false
for _, token := range tokens[1:] { for _, token := range matchedTokens[1:] {
if token.Type == tokenizer.Newline {
return 0, false
}
if token.Type == tokenizer.Caret { if token.Type == tokenizer.Caret {
matched = true matched = true
break break
@ -34,20 +30,10 @@ func (*SuperscriptParser) Match(tokens []*tokenizer.Token) (int, bool) {
contentTokens = append(contentTokens, token) contentTokens = append(contentTokens, token)
} }
if !matched || len(contentTokens) == 0 { if !matched || len(contentTokens) == 0 {
return 0, false return nil, 0
} }
return len(contentTokens) + 2, true
}
func (p *SuperscriptParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
contentTokens := tokens[1 : size-1]
return &ast.Superscript{ return &ast.Superscript{
Content: tokenizer.Stringify(contentTokens), Content: tokenizer.Stringify(contentTokens),
}, nil }, len(contentTokens) + 2
} }

View File

@ -41,7 +41,7 @@ func TestSuperscriptParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewSuperscriptParser().Parse(tokens) node, _ := NewSuperscriptParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.superscript}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.superscript}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,138 +11,113 @@ func NewTableParser() *TableParser {
return &TableParser{} return &TableParser{}
} }
func (*TableParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *TableParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
headerTokens := []*tokenizer.Token{} rawRows := tokenizer.Split(tokens, tokenizer.Newline)
for _, token := range tokens { if len(rawRows) < 3 {
if token.Type == tokenizer.Newline { return nil, 0
break
}
headerTokens = append(headerTokens, token)
}
if len(headerTokens) < 5 || len(tokens) < len(headerTokens)+3 {
return 0, false
} }
delimiterTokens := []*tokenizer.Token{} headerTokens := rawRows[0]
for _, token := range tokens[len(headerTokens)+1:] { if len(headerTokens) < 3 {
if token.Type == tokenizer.Newline { return nil, 0
break
}
delimiterTokens = append(delimiterTokens, token)
}
if len(delimiterTokens) < 5 || len(tokens) < len(headerTokens)+len(delimiterTokens)+3 {
return 0, false
} }
rowTokens := []*tokenizer.Token{} delimiterTokens := rawRows[1]
for index, token := range tokens[len(headerTokens)+len(delimiterTokens)+2:] { if len(delimiterTokens) < 3 {
temp := len(headerTokens) + len(delimiterTokens) + 2 + index return nil, 0
if token.Type == tokenizer.Newline {
if (temp == len(tokens)-1) || (temp+1 == len(tokens)-1 && tokens[temp+1].Type == tokenizer.Newline) {
break
}
}
rowTokens = append(rowTokens, token)
}
if len(rowTokens) < 5 {
return 0, false
} }
// Check header. // Check header.
if len(headerTokens) < 5 { if len(headerTokens) < 5 {
return 0, false return nil, 0
} }
headerCells, ok := matchTableCellTokens(headerTokens) headerCells, ok := matchTableCellTokens(headerTokens)
if headerCells == 0 || !ok { if headerCells == 0 || !ok {
return 0, false return nil, 0
} }
// Check delimiter. // Check delimiter.
if len(delimiterTokens) < 5 { if len(delimiterTokens) < 5 {
return 0, false return nil, 0
} }
delimiterCells, ok := matchTableCellTokens(delimiterTokens) delimiterCells, ok := matchTableCellTokens(delimiterTokens)
if delimiterCells != headerCells || !ok { if delimiterCells != headerCells || !ok {
return 0, false return nil, 0
} }
for index, t := range tokenizer.Split(delimiterTokens, tokenizer.Pipe) { for index, t := range tokenizer.Split(delimiterTokens, tokenizer.Pipe) {
if index == 0 || index == headerCells { if index == 0 || index == headerCells {
if len(t) != 0 { if len(t) != 0 {
return 0, false return nil, 0
} }
continue continue
} }
// Each delimiter cell should be like ` --- `, ` :-- `, ` --: `, ` :-: `.
if len(t) < 5 { if len(t) < 5 {
return 0, false return nil, 0
} }
delimiterTokens := t[1 : len(t)-1] delimiterTokens := t[1 : len(t)-1]
if len(delimiterTokens) < 3 { if len(delimiterTokens) < 3 {
return 0, false return nil, 0
} }
if (delimiterTokens[0].Type != tokenizer.Colon && delimiterTokens[0].Type != tokenizer.Hyphen) || (delimiterTokens[len(delimiterTokens)-1].Type != tokenizer.Colon && delimiterTokens[len(delimiterTokens)-1].Type != tokenizer.Hyphen) { if (delimiterTokens[0].Type != tokenizer.Colon &&
return 0, false delimiterTokens[0].Type != tokenizer.Hyphen) ||
(delimiterTokens[len(delimiterTokens)-1].Type != tokenizer.Colon &&
delimiterTokens[len(delimiterTokens)-1].Type != tokenizer.Hyphen) {
return nil, 0
} }
for _, token := range delimiterTokens[1 : len(delimiterTokens)-1] { for _, token := range delimiterTokens[1 : len(delimiterTokens)-1] {
if token.Type != tokenizer.Hyphen { if token.Type != tokenizer.Hyphen {
return 0, false return nil, 0
} }
} }
} }
// Check rows. // Check rows.
if len(rowTokens) < 5 { rows := rawRows[2:]
return 0, false matchedRows := 0
} for _, rowTokens := range rows {
rows := tokenizer.Split(rowTokens, tokenizer.Newline) cells, ok := matchTableCellTokens(rowTokens)
if len(rows) == 0 {
return 0, false
}
for _, row := range rows {
cells, ok := matchTableCellTokens(row)
if cells != headerCells || !ok { if cells != headerCells || !ok {
return 0, false break
} }
matchedRows++
} }
if matchedRows == 0 {
return len(headerTokens) + len(delimiterTokens) + len(rowTokens) + 2, true return nil, 0
}
func (p *TableParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
} }
rows = rows[:matchedRows]
rawRows := tokenizer.Split(tokens[:size-1], tokenizer.Newline)
headerTokens := rawRows[0]
dilimiterTokens := rawRows[1]
rowTokens := rawRows[2:]
header := make([]string, 0) header := make([]string, 0)
delimiter := make([]string, 0) delimiter := make([]string, 0)
rows := make([][]string, 0) rowsStr := make([][]string, 0)
cols := len(tokenizer.Split(headerTokens, tokenizer.Pipe)) - 2 cols := len(tokenizer.Split(headerTokens, tokenizer.Pipe)) - 2
for _, t := range tokenizer.Split(headerTokens, tokenizer.Pipe)[1 : cols+1] { for _, t := range tokenizer.Split(headerTokens, tokenizer.Pipe)[1 : cols+1] {
header = append(header, tokenizer.Stringify(t[1:len(t)-1])) header = append(header, tokenizer.Stringify(t[1:len(t)-1]))
} }
for _, t := range tokenizer.Split(dilimiterTokens, tokenizer.Pipe)[1 : cols+1] { for _, t := range tokenizer.Split(delimiterTokens, tokenizer.Pipe)[1 : cols+1] {
delimiter = append(delimiter, tokenizer.Stringify(t[1:len(t)-1])) delimiter = append(delimiter, tokenizer.Stringify(t[1:len(t)-1]))
} }
for _, row := range rowTokens { for _, row := range rows {
cells := make([]string, 0) cells := make([]string, 0)
for _, t := range tokenizer.Split(row, tokenizer.Pipe)[1 : cols+1] { for _, t := range tokenizer.Split(row, tokenizer.Pipe)[1 : cols+1] {
cells = append(cells, tokenizer.Stringify(t[1:len(t)-1])) cells = append(cells, tokenizer.Stringify(t[1:len(t)-1]))
} }
rows = append(rows, cells) rowsStr = append(rowsStr, cells)
} }
size := len(headerTokens) + len(delimiterTokens) + 2
for _, row := range rows {
size += len(row)
}
size = size + len(rows) - 1
return &ast.Table{ return &ast.Table{
Header: header, Header: header,
Delimiter: delimiter, Delimiter: delimiter,
Rows: rows, Rows: rowsStr,
}, nil }, size
} }
func matchTableCellTokens(tokens []*tokenizer.Token) (int, bool) { func matchTableCellTokens(tokens []*tokenizer.Token) (int, bool) {

View File

@ -51,7 +51,7 @@ func TestTableParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewTableParser().Parse(tokens) node, _ := NewTableParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.table}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.table}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,35 +11,27 @@ func NewTagParser() *TagParser {
return &TagParser{} return &TagParser{}
} }
func (*TagParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *TagParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 2 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false if len(matchedTokens) < 2 {
return nil, 0
} }
if tokens[0].Type != tokenizer.PoundSign { if matchedTokens[0].Type != tokenizer.PoundSign {
return 0, false return nil, 0
} }
contentTokens := []*tokenizer.Token{} contentTokens := []*tokenizer.Token{}
for _, token := range tokens[1:] { for _, token := range matchedTokens[1:] {
if token.Type == tokenizer.Newline || token.Type == tokenizer.Space || token.Type == tokenizer.PoundSign { if token.Type == tokenizer.Space || token.Type == tokenizer.PoundSign {
break break
} }
contentTokens = append(contentTokens, token) contentTokens = append(contentTokens, token)
} }
if len(contentTokens) == 0 { if len(contentTokens) == 0 {
return 0, false return nil, 0
} }
return len(contentTokens) + 1, true
}
func (p *TagParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
contentTokens := tokens[1:size]
return &ast.Tag{ return &ast.Tag{
Content: tokenizer.Stringify(contentTokens), Content: tokenizer.Stringify(contentTokens),
}, nil }, len(contentTokens) + 1
} }

View File

@ -39,7 +39,7 @@ func TestTagParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewTagParser().Parse(tokens) node, _ := NewTagParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.tag}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.tag}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,71 +11,47 @@ func NewTaskListParser() *TaskListParser {
return &TaskListParser{} return &TaskListParser{}
} }
func (*TaskListParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *TaskListParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 7 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false
}
indent := 0 indent := 0
for _, token := range tokens { for _, token := range matchedTokens {
if token.Type == tokenizer.Space { if token.Type == tokenizer.Space {
indent++ indent++
} else { } else {
break break
} }
} }
symbolToken := tokens[indent] if len(matchedTokens) < indent+6 {
return nil, 0
}
symbolToken := matchedTokens[indent]
if symbolToken.Type != tokenizer.Hyphen && symbolToken.Type != tokenizer.Asterisk && symbolToken.Type != tokenizer.PlusSign { if symbolToken.Type != tokenizer.Hyphen && symbolToken.Type != tokenizer.Asterisk && symbolToken.Type != tokenizer.PlusSign {
return 0, false return nil, 0
} }
if tokens[indent+1].Type != tokenizer.Space { if matchedTokens[indent+1].Type != tokenizer.Space {
return 0, false return nil, 0
} }
if tokens[indent+2].Type != tokenizer.LeftSquareBracket || (tokens[indent+3].Type != tokenizer.Space && tokens[indent+3].Value != "x") || tokens[indent+4].Type != tokenizer.RightSquareBracket { if matchedTokens[indent+2].Type != tokenizer.LeftSquareBracket || (matchedTokens[indent+3].Type != tokenizer.Space && matchedTokens[indent+3].Value != "x") || matchedTokens[indent+4].Type != tokenizer.RightSquareBracket {
return 0, false return nil, 0
} }
if tokens[indent+5].Type != tokenizer.Space { if matchedTokens[indent+5].Type != tokenizer.Space {
return 0, false return nil, 0
} }
contentTokens := []*tokenizer.Token{} contentTokens := matchedTokens[indent+6:]
for _, token := range tokens[indent+6:] {
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 { if len(contentTokens) == 0 {
return 0, false return nil, 0
} }
return indent + len(contentTokens) + 6, true
}
func (p *TaskListParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
indent := 0
for _, token := range tokens {
if token.Type == tokenizer.Space {
indent++
} else {
break
}
}
symbolToken := tokens[indent]
contentTokens := tokens[indent+6 : size]
children, err := ParseInline(contentTokens) children, err := ParseInline(contentTokens)
if err != nil { if err != nil {
return nil, err return nil, 0
} }
return &ast.TaskList{ return &ast.TaskList{
Symbol: symbolToken.Type, Symbol: symbolToken.Type,
Indent: indent, Indent: indent,
Complete: tokens[indent+3].Value == "x", Complete: matchedTokens[indent+3].Value == "x",
Children: children, Children: children,
}, nil }, indent + len(contentTokens) + 6
} }

View File

@ -65,7 +65,7 @@ func TestTaskListParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewTaskListParser().Parse(tokens) node, _ := NewTaskListParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node}))
} }
} }

View File

@ -13,18 +13,11 @@ func NewTextParser() *TextParser {
return &TextParser{} return &TextParser{}
} }
func (*TextParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*TextParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) == 0 { if len(tokens) == 0 {
return 0, false return nil, 0
}
return 1, true
}
func (*TextParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
if len(tokens) == 0 {
return &ast.Text{}, nil
} }
return &ast.Text{ return &ast.Text{
Content: tokens[0].String(), Content: tokens[0].String(),
}, nil }, 1
} }

View File

@ -2,6 +2,7 @@ package tokenizer
type TokenType = string type TokenType = string
// Special character tokens.
const ( const (
Underscore TokenType = "_" Underscore TokenType = "_"
Asterisk TokenType = "*" Asterisk TokenType = "*"
@ -29,6 +30,7 @@ const (
Space TokenType = " " Space TokenType = " "
) )
// Text based tokens.
const ( const (
Number TokenType = "number" Number TokenType = "number"
Text TokenType = "" Text TokenType = ""
@ -153,11 +155,29 @@ func Split(tokens []*Token, delimiter TokenType) [][]*Token {
return result return result
} }
func Find(tokens []*Token, delimiter TokenType) (int, bool) { func Find(tokens []*Token, target TokenType) int {
for index, token := range tokens { for i, token := range tokens {
if token.Type == delimiter { if token.Type == target {
return index, true return i
} }
} }
return 0, false return -1
}
func FindUnescaped(tokens []*Token, target TokenType) int {
for i, token := range tokens {
if token.Type == target && (i == 0 || (i > 0 && tokens[i-1].Type != Backslash)) {
return i
}
}
return -1
}
func GetFirstLine(tokens []*Token) []*Token {
for i, token := range tokens {
if token.Type == Newline {
return tokens[:i]
}
}
return tokens
} }

View File

@ -1,8 +1,6 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -13,62 +11,36 @@ func NewUnorderedListParser() *UnorderedListParser {
return &UnorderedListParser{} return &UnorderedListParser{}
} }
func (*UnorderedListParser) Match(tokens []*tokenizer.Token) (int, bool) { func (p *UnorderedListParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
if len(tokens) < 3 { matchedTokens := tokenizer.GetFirstLine(tokens)
return 0, false
}
indent := 0 indent := 0
for _, token := range tokens { for _, token := range matchedTokens {
if token.Type == tokenizer.Space { if token.Type == tokenizer.Space {
indent++ indent++
} else { } else {
break break
} }
} }
corsor := indent if len(matchedTokens) < indent+2 {
symbolToken := tokens[corsor] return nil, 0
if (symbolToken.Type != tokenizer.Hyphen && symbolToken.Type != tokenizer.Asterisk && symbolToken.Type != tokenizer.PlusSign) || tokens[corsor+1].Type != tokenizer.Space {
return 0, false
} }
contentTokens := []*tokenizer.Token{} symbolToken := matchedTokens[indent]
for _, token := range tokens[corsor+2:] { if (symbolToken.Type != tokenizer.Hyphen && symbolToken.Type != tokenizer.Asterisk && symbolToken.Type != tokenizer.PlusSign) || matchedTokens[indent+1].Type != tokenizer.Space {
if token.Type == tokenizer.Newline { return nil, 0
break
}
contentTokens = append(contentTokens, token)
} }
contentTokens := matchedTokens[indent+2:]
if len(contentTokens) == 0 { if len(contentTokens) == 0 {
return 0, false return nil, 0
} }
return indent + len(contentTokens) + 2, true
}
func (p *UnorderedListParser) Parse(tokens []*tokenizer.Token) (ast.Node, error) {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil, errors.New("not matched")
}
indent := 0
for _, token := range tokens {
if token.Type == tokenizer.Space {
indent++
} else {
break
}
}
symbolToken := tokens[indent]
contentTokens := tokens[indent+2 : size]
children, err := ParseInline(contentTokens) children, err := ParseInline(contentTokens)
if err != nil { if err != nil {
return nil, err return nil, 0
} }
return &ast.UnorderedList{ return &ast.UnorderedList{
Symbol: symbolToken.Type, Symbol: symbolToken.Type,
Indent: indent, Indent: indent,
Children: children, Children: children,
}, nil }, indent + len(contentTokens) + 2
} }

View File

@ -50,7 +50,7 @@ func TestUnorderedListParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
node, _ := NewUnorderedListParser().Parse(tokens) node, _ := NewUnorderedListParser().Match(tokens)
require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node})) require.Equal(t, restore.Restore([]ast.Node{test.node}), restore.Restore([]ast.Node{node}))
} }
} }