feat: implement gomark parsers

This commit is contained in:
Steven
2023-12-13 21:00:13 +08:00
parent 2d9c5d16e1
commit 453707d18c
28 changed files with 625 additions and 209 deletions

View File

@ -1,6 +1,7 @@
package ast package ast
type BaseBlock struct { type BaseBlock struct {
Node
} }
type LineBreak struct { type LineBreak struct {
@ -50,3 +51,28 @@ var NodeTypeHeading = NewNodeType("Heading")
func (*Heading) Type() NodeType { func (*Heading) Type() NodeType {
return NodeTypeHeading return NodeTypeHeading
} }
type HorizontalRule struct {
BaseBlock
// Symbol is "*" or "-" or "_".
Symbol string
}
var NodeTypeHorizontalRule = NewNodeType("HorizontalRule")
func (*HorizontalRule) Type() NodeType {
return NodeTypeHorizontalRule
}
type Blockquote struct {
BaseBlock
Children []Node
}
var NodeTypeBlockquote = NewNodeType("Blockquote")
func (*Blockquote) Type() NodeType {
return NodeTypeBlockquote
}

View File

@ -1,6 +1,8 @@
package ast package ast
type BaseInline struct{} type BaseInline struct {
Node
}
type Text struct { type Text struct {
BaseInline BaseInline
@ -28,6 +30,34 @@ func (*Bold) Type() NodeType {
return NodeTypeBold return NodeTypeBold
} }
type Italic struct {
BaseInline
// Symbol is "*" or "_"
Symbol string
Content string
}
var NodeTypeItalic = NewNodeType("Italic")
func (*Italic) Type() NodeType {
return NodeTypeItalic
}
type BoldItalic struct {
BaseInline
// Symbol is "*" or "_"
Symbol string
Content string
}
var NodeTypeBoldItalic = NewNodeType("BoldItalic")
func (*BoldItalic) Type() NodeType {
return NodeTypeBoldItalic
}
type Code struct { type Code struct {
BaseInline BaseInline
@ -66,20 +96,6 @@ func (*Link) Type() NodeType {
return NodeTypeLink return NodeTypeLink
} }
type Italic struct {
BaseInline
// Symbol is "*" or "_"
Symbol string
Content string
}
var NodeTypeItalic = NewNodeType("Italic")
func (*Italic) Type() NodeType {
return NodeTypeItalic
}
type Tag struct { type Tag struct {
BaseInline BaseInline
@ -91,3 +107,15 @@ var NodeTypeTag = NewNodeType("Tag")
func (*Tag) Type() NodeType { func (*Tag) Type() NodeType {
return NodeTypeTag return NodeTypeTag
} }
type Strikethrough struct {
BaseInline
Content string
}
var NodeTypeStrikethrough = NewNodeType("Strikethrough")
func (*Strikethrough) Type() NodeType {
return NodeTypeStrikethrough
}

View File

@ -0,0 +1,47 @@
package parser
import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type BlockquoteParser struct{}
func NewBlockquoteParser() *BlockquoteParser {
return &BlockquoteParser{}
}
func (*BlockquoteParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 4 {
return 0, false
}
if tokens[0].Type != tokenizer.GreaterThan || tokens[1].Type != tokenizer.Space {
return 0, false
}
contentTokens := []*tokenizer.Token{}
for _, token := range tokens[2:] {
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens) + 2, true
}
func (p *BlockquoteParser) Parse(tokens []*tokenizer.Token) ast.Node {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil
}
contentTokens := tokens[2:size]
children := ParseInline(contentTokens)
return &ast.Blockquote{
Children: children,
}
}

View File

@ -0,0 +1,47 @@
package parser
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
func TestBlockquoteParser(t *testing.T) {
tests := []struct {
text string
blockquote ast.Node
}{
{
text: "> Hello world",
blockquote: &ast.Blockquote{
Children: []ast.Node{
&ast.Text{
Content: "Hello world",
},
},
},
},
{
text: "> Hello\nworld",
blockquote: &ast.Blockquote{
Children: []ast.Node{
&ast.Text{
Content: "Hello",
},
},
},
},
{
text: ">Hello\nworld",
blockquote: nil,
},
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
require.Equal(t, test.blockquote, NewBlockquoteParser().Parse(tokens))
}
}

View File

@ -7,10 +7,8 @@ import (
type BoldParser struct{} type BoldParser struct{}
var defaultBoldParser = &BoldParser{}
func NewBoldParser() InlineParser { func NewBoldParser() InlineParser {
return defaultBoldParser return &BoldParser{}
} }
func (*BoldParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*BoldParser) Match(tokens []*tokenizer.Token) (int, bool) {
@ -23,7 +21,7 @@ func (*BoldParser) Match(tokens []*tokenizer.Token) (int, bool) {
return 0, false return 0, false
} }
prefixTokenType := prefixTokens[0].Type prefixTokenType := prefixTokens[0].Type
if prefixTokenType != tokenizer.Star && prefixTokenType != tokenizer.Underline { if prefixTokenType != tokenizer.Asterisk && prefixTokenType != tokenizer.Underline {
return 0, false return 0, false
} }

View File

@ -0,0 +1,58 @@
package parser
import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type BoldItalicParser struct{}
func NewBoldItalicParser() InlineParser {
return &BoldItalicParser{}
}
func (*BoldItalicParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 7 {
return 0, false
}
prefixTokens := tokens[:3]
if prefixTokens[0].Type != prefixTokens[1].Type || prefixTokens[0].Type != prefixTokens[2].Type || prefixTokens[1].Type != prefixTokens[2].Type {
return 0, false
}
prefixTokenType := prefixTokens[0].Type
if prefixTokenType != tokenizer.Asterisk && prefixTokenType != tokenizer.Underline {
return 0, false
}
cursor, matched := 3, false
for ; cursor < len(tokens)-2; cursor++ {
token, nextToken, endToken := tokens[cursor], tokens[cursor+1], tokens[cursor+2]
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline || endToken.Type == tokenizer.Newline {
return 0, false
}
if token.Type == prefixTokenType && nextToken.Type == prefixTokenType && endToken.Type == prefixTokenType {
matched = true
break
}
}
if !matched {
return 0, false
}
return cursor + 3, true
}
func (p *BoldItalicParser) Parse(tokens []*tokenizer.Token) ast.Node {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil
}
prefixTokenType := tokens[0].Type
contentTokens := tokens[3 : size-3]
return &ast.BoldItalic{
Symbol: prefixTokenType,
Content: tokenizer.Stringify(contentTokens),
}
}

View File

@ -0,0 +1,49 @@
package parser
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
func TestBoldItalicParser(t *testing.T) {
tests := []struct {
text string
boldItalic ast.Node
}{
{
text: "*Hello world!",
boldItalic: nil,
},
{
text: "***Hello***",
boldItalic: &ast.BoldItalic{
Symbol: "*",
Content: "Hello",
},
},
{
text: "*** Hello ***",
boldItalic: &ast.BoldItalic{
Symbol: "*",
Content: " Hello ",
},
},
{
text: "*** Hello * *",
boldItalic: nil,
},
{
text: "*** Hello **",
boldItalic: nil,
},
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
require.Equal(t, test.boldItalic, NewBoldItalicParser().Parse(tokens))
}
}

View File

@ -7,10 +7,8 @@ import (
type CodeParser struct{} type CodeParser struct{}
var defaultCodeParser = &CodeParser{}
func NewCodeParser() *CodeParser { func NewCodeParser() *CodeParser {
return defaultCodeParser return &CodeParser{}
} }
func (*CodeParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*CodeParser) Match(tokens []*tokenizer.Token) (int, bool) {

View File

@ -10,10 +10,8 @@ type CodeBlockParser struct {
Content string Content string
} }
var defaultCodeBlockParser = &CodeBlockParser{}
func NewCodeBlockParser() *CodeBlockParser { func NewCodeBlockParser() *CodeBlockParser {
return defaultCodeBlockParser return &CodeBlockParser{}
} }
func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*CodeBlockParser) Match(tokens []*tokenizer.Token) (int, bool) {

View File

@ -62,11 +62,7 @@ func (p *HeadingParser) Parse(tokens []*tokenizer.Token) ast.Node {
} }
} }
contentTokens := tokens[level+1 : size] contentTokens := tokens[level+1 : size]
children := ParseInline(contentTokens, []InlineParser{ children := ParseInline(contentTokens)
NewBoldParser(),
NewCodeParser(),
NewTextParser(),
})
return &ast.Heading{ return &ast.Heading{
Level: level, Level: level,
Children: children, Children: children,

View File

@ -0,0 +1,39 @@
package parser
import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type HorizontalRuleParser struct{}
func NewHorizontalRuleParser() *HorizontalRuleParser {
return &HorizontalRuleParser{}
}
func (*HorizontalRuleParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 3 {
return 0, false
}
if tokens[0].Type != tokens[1].Type || tokens[0].Type != tokens[2].Type || tokens[1].Type != tokens[2].Type {
return 0, false
}
if tokens[0].Type != tokenizer.Dash && tokens[0].Type != tokenizer.Underline && tokens[0].Type != tokenizer.Asterisk {
return 0, false
}
if len(tokens) > 3 && tokens[3].Type != tokenizer.Newline {
return 0, false
}
return 3, true
}
func (p *HorizontalRuleParser) Parse(tokens []*tokenizer.Token) ast.Node {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil
}
return &ast.HorizontalRule{
Symbol: tokens[0].Type,
}
}

View File

@ -0,0 +1,49 @@
package parser
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
func TestHorizontalRuleParser(t *testing.T) {
tests := []struct {
text string
horizontalRule ast.Node
}{
{
text: "---",
horizontalRule: &ast.HorizontalRule{
Symbol: "-",
},
},
{
text: "****",
horizontalRule: nil,
},
{
text: "***",
horizontalRule: &ast.HorizontalRule{
Symbol: "*",
},
},
{
text: "-*-",
horizontalRule: nil,
},
{
text: "___",
horizontalRule: &ast.HorizontalRule{
Symbol: "_",
},
},
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
require.Equal(t, test.horizontalRule, NewHorizontalRuleParser().Parse(tokens))
}
}

View File

@ -7,10 +7,8 @@ import (
type ImageParser struct{} type ImageParser struct{}
var defaultImageParser = &ImageParser{}
func NewImageParser() *ImageParser { func NewImageParser() *ImageParser {
return defaultImageParser return &ImageParser{}
} }
func (*ImageParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*ImageParser) Match(tokens []*tokenizer.Token) (int, bool) {

View File

@ -1,6 +1,9 @@
package parser package parser
import "github.com/usememos/memos/plugin/gomark/parser/tokenizer" import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type ItalicParser struct { type ItalicParser struct {
ContentTokens []*tokenizer.Token ContentTokens []*tokenizer.Token
@ -10,21 +13,21 @@ func NewItalicParser() *ItalicParser {
return &ItalicParser{} return &ItalicParser{}
} }
func (*ItalicParser) Match(tokens []*tokenizer.Token) *ItalicParser { func (*ItalicParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 3 { if len(tokens) < 3 {
return nil return 0, false
} }
prefixTokens := tokens[:1] prefixTokens := tokens[:1]
if prefixTokens[0].Type != tokenizer.Star && prefixTokens[0].Type != tokenizer.Underline { if prefixTokens[0].Type != tokenizer.Asterisk && prefixTokens[0].Type != tokenizer.Underline {
return nil return 0, false
} }
prefixTokenType := prefixTokens[0].Type prefixTokenType := prefixTokens[0].Type
contentTokens := []*tokenizer.Token{} contentTokens := []*tokenizer.Token{}
matched := false matched := false
for _, token := range tokens[1:] { for _, token := range tokens[1:] {
if token.Type == tokenizer.Newline { if token.Type == tokenizer.Newline {
return nil return 0, false
} }
if token.Type == prefixTokenType { if token.Type == prefixTokenType {
matched = true matched = true
@ -33,10 +36,22 @@ func (*ItalicParser) Match(tokens []*tokenizer.Token) *ItalicParser {
contentTokens = append(contentTokens, token) contentTokens = append(contentTokens, token)
} }
if !matched || len(contentTokens) == 0 { if !matched || len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens) + 2, true
}
func (p *ItalicParser) Parse(tokens []*tokenizer.Token) ast.Node {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil return nil
} }
return &ItalicParser{ prefixTokenType := tokens[0].Type
ContentTokens: contentTokens, contentTokens := tokens[1 : size-1]
return &ast.Italic{
Symbol: prefixTokenType,
Content: tokenizer.Stringify(contentTokens),
} }
} }

View File

@ -5,13 +5,14 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
func TestItalicParser(t *testing.T) { func TestItalicParser(t *testing.T) {
tests := []struct { tests := []struct {
text string text string
italic *ItalicParser italic ast.Node
}{ }{
{ {
text: "*Hello world!", text: "*Hello world!",
@ -19,76 +20,29 @@ func TestItalicParser(t *testing.T) {
}, },
{ {
text: "*Hello*", text: "*Hello*",
italic: &ItalicParser{ italic: &ast.Italic{
ContentTokens: []*tokenizer.Token{ Symbol: "*",
{ Content: "Hello",
Type: tokenizer.Text,
Value: "Hello",
},
},
}, },
}, },
{ {
text: "* Hello *", text: "* Hello *",
italic: &ItalicParser{ italic: &ast.Italic{
ContentTokens: []*tokenizer.Token{ Symbol: "*",
{ Content: " Hello ",
Type: tokenizer.Space,
Value: " ",
}, },
{
Type: tokenizer.Text,
Value: "Hello",
},
{
Type: tokenizer.Space,
Value: " ",
},
},
},
},
{
text: "** Hello * *",
italic: nil,
}, },
{ {
text: "*1* Hello * *", text: "*1* Hello * *",
italic: &ItalicParser{ italic: &ast.Italic{
ContentTokens: []*tokenizer.Token{ Symbol: "*",
{ Content: "1",
Type: tokenizer.Text,
Value: "1",
}, },
}, },
},
},
{
text: `* \n * Hello * *`,
italic: &ItalicParser{
ContentTokens: []*tokenizer.Token{
{
Type: tokenizer.Space,
Value: " ",
},
{
Type: tokenizer.Text,
Value: `\n`,
},
{
Type: tokenizer.Space,
Value: " ",
},
},
},
},
{
text: "* \n * Hello * *",
italic: nil,
},
} }
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
require.Equal(t, test.italic, NewItalicParser().Match(tokens)) require.Equal(t, test.italic, NewItalicParser().Parse(tokens))
} }
} }

View File

@ -7,10 +7,8 @@ import (
type LineBreakParser struct{} type LineBreakParser struct{}
var defaultLineBreakParser = &LineBreakParser{}
func NewLineBreakParser() *LineBreakParser { func NewLineBreakParser() *LineBreakParser {
return defaultLineBreakParser return &LineBreakParser{}
} }
func (*LineBreakParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*LineBreakParser) Match(tokens []*tokenizer.Token) (int, bool) {

View File

@ -1,58 +1,72 @@
package parser package parser
import "github.com/usememos/memos/plugin/gomark/parser/tokenizer" import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type LinkParser struct { type LinkParser struct{}
ContentTokens []*tokenizer.Token
URL string
}
func NewLinkParser() *LinkParser { func NewLinkParser() *LinkParser {
return &LinkParser{} return &LinkParser{}
} }
func (*LinkParser) Match(tokens []*tokenizer.Token) *LinkParser { func (*LinkParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 4 { if len(tokens) < 5 {
return nil return 0, false
} }
if tokens[0].Type != tokenizer.LeftSquareBracket { if tokens[0].Type != tokenizer.LeftSquareBracket {
return nil return 0, false
} }
cursor, contentTokens := 1, []*tokenizer.Token{} textTokens := []*tokenizer.Token{}
for ; cursor < len(tokens)-2; cursor++ { for _, token := range tokens[1:] {
if tokens[cursor].Type == tokenizer.Newline { if token.Type == tokenizer.Newline {
return nil return 0, false
} }
if tokens[cursor].Type == tokenizer.RightSquareBracket { if token.Type == tokenizer.RightSquareBracket {
break break
} }
contentTokens = append(contentTokens, tokens[cursor]) textTokens = append(textTokens, token)
} }
if tokens[cursor+1].Type != tokenizer.LeftParenthesis { if len(textTokens)+4 >= len(tokens) {
return nil return 0, false
} }
matched, url := false, "" if tokens[2+len(textTokens)].Type != tokenizer.LeftParenthesis {
for _, token := range tokens[cursor+2:] { return 0, false
}
urlTokens := []*tokenizer.Token{}
for _, token := range tokens[3+len(textTokens):] {
if token.Type == tokenizer.Newline || token.Type == tokenizer.Space { if token.Type == tokenizer.Newline || token.Type == tokenizer.Space {
return nil return 0, false
} }
if token.Type == tokenizer.RightParenthesis { if token.Type == tokenizer.RightParenthesis {
matched = true
break break
} }
url += token.Value urlTokens = append(urlTokens, token)
} }
if !matched || url == "" { if 4+len(urlTokens)+len(textTokens) > len(tokens) {
return 0, false
}
return 4 + len(urlTokens) + len(textTokens), true
}
func (p *LinkParser) Parse(tokens []*tokenizer.Token) ast.Node {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil return nil
} }
if len(contentTokens) == 0 {
contentTokens = append(contentTokens, &tokenizer.Token{ textTokens := []*tokenizer.Token{}
Type: tokenizer.Text, for _, token := range tokens[1:] {
Value: url, if token.Type == tokenizer.RightSquareBracket {
}) break
} }
return &LinkParser{ textTokens = append(textTokens, token)
ContentTokens: contentTokens, }
URL: url, urlTokens := tokens[2+len(textTokens)+1 : size-1]
return &ast.Link{
Text: tokenizer.Stringify(textTokens),
URL: tokenizer.Stringify(urlTokens),
} }
} }

View File

@ -5,23 +5,19 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
func TestLinkParser(t *testing.T) { func TestLinkParser(t *testing.T) {
tests := []struct { tests := []struct {
text string text string
link *LinkParser link ast.Node
}{ }{
{ {
text: "[](https://example.com)", text: "[](https://example.com)",
link: &LinkParser{ link: &ast.Link{
ContentTokens: []*tokenizer.Token{ Text: "",
{
Type: tokenizer.Text,
Value: "https://example.com",
},
},
URL: "https://example.com", URL: "https://example.com",
}, },
}, },
@ -35,27 +31,14 @@ func TestLinkParser(t *testing.T) {
}, },
{ {
text: "[hello world](https://example.com)", text: "[hello world](https://example.com)",
link: &LinkParser{ link: &ast.Link{
ContentTokens: []*tokenizer.Token{ Text: "hello world",
{
Type: tokenizer.Text,
Value: "hello",
},
{
Type: tokenizer.Space,
Value: " ",
},
{
Type: tokenizer.Text,
Value: "world",
},
},
URL: "https://example.com", URL: "https://example.com",
}, },
}, },
} }
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
require.Equal(t, test.link, NewLinkParser().Match(tokens)) require.Equal(t, test.link, NewLinkParser().Parse(tokens))
} }
} }

View File

@ -9,10 +9,8 @@ type ParagraphParser struct {
ContentTokens []*tokenizer.Token ContentTokens []*tokenizer.Token
} }
var defaultParagraphParser = &ParagraphParser{}
func NewParagraphParser() *ParagraphParser { func NewParagraphParser() *ParagraphParser {
return defaultParagraphParser return &ParagraphParser{}
} }
func (*ParagraphParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*ParagraphParser) Match(tokens []*tokenizer.Token) (int, bool) {
@ -38,10 +36,7 @@ func (p *ParagraphParser) Parse(tokens []*tokenizer.Token) ast.Node {
} }
contentTokens := tokens[:size] contentTokens := tokens[:size]
children := ParseInline(contentTokens, []InlineParser{ children := ParseInline(contentTokens)
NewBoldParser(),
NewTextParser(),
})
return &ast.Paragraph{ return &ast.Paragraph{
Children: children, Children: children,
} }

View File

@ -1,6 +1,8 @@
package parser package parser
import ( import (
"errors"
"github.com/usememos/memos/plugin/gomark/ast" "github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
@ -23,32 +25,51 @@ type BlockParser interface {
BaseParser BaseParser
} }
func Parse(tokens []*tokenizer.Token) []ast.Node { var defaultBlockParsers = []BlockParser{
nodes := []ast.Node{}
blockParsers := []BlockParser{
NewCodeBlockParser(), NewCodeBlockParser(),
NewHorizontalRuleParser(),
NewHeadingParser(),
NewBlockquoteParser(),
NewParagraphParser(), NewParagraphParser(),
NewLineBreakParser(), NewLineBreakParser(),
} }
func Parse(tokens []*tokenizer.Token) ([]ast.Node, error) {
nodes := []ast.Node{}
for len(tokens) > 0 { for len(tokens) > 0 {
for _, blockParser := range blockParsers { for _, blockParser := range defaultBlockParsers {
cursor, matched := blockParser.Match(tokens) cursor, matched := blockParser.Match(tokens)
if matched { if matched {
node := blockParser.Parse(tokens) node := blockParser.Parse(tokens)
if node == nil {
return nil, errors.New("parse error")
}
nodes = append(nodes, node) nodes = append(nodes, node)
tokens = tokens[cursor:] tokens = tokens[cursor:]
break break
} }
} }
} }
return nodes return nodes, nil
} }
func ParseInline(tokens []*tokenizer.Token, inlineParsers []InlineParser) []ast.Node { var defaultInlineParsers = []InlineParser{
NewBoldItalicParser(),
NewImageParser(),
NewLinkParser(),
NewBoldParser(),
NewItalicParser(),
NewCodeParser(),
NewTagParser(),
NewStrikethroughParser(),
NewTextParser(),
}
func ParseInline(tokens []*tokenizer.Token) []ast.Node {
nodes := []ast.Node{} nodes := []ast.Node{}
var lastNode ast.Node var lastNode ast.Node
for len(tokens) > 0 { for len(tokens) > 0 {
for _, inlineParser := range inlineParsers { for _, inlineParser := range defaultInlineParsers {
cursor, matched := inlineParser.Match(tokens) cursor, matched := inlineParser.Match(tokens)
if matched { if matched {
node := inlineParser.Parse(tokens) node := inlineParser.Parse(tokens)

View File

@ -89,6 +89,8 @@ func TestParser(t *testing.T) {
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
require.Equal(t, test.nodes, Parse(tokens)) nodes, err := Parse(tokens)
require.NoError(t, err)
require.Equal(t, test.nodes, nodes)
} }
} }

View File

@ -0,0 +1,49 @@
package parser
import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type StrikethroughParser struct{}
func NewStrikethroughParser() *StrikethroughParser {
return &StrikethroughParser{}
}
func (*StrikethroughParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 5 {
return 0, false
}
if tokens[0].Type != tokenizer.Tilde || tokens[1].Type != tokenizer.Tilde {
return 0, false
}
cursor, matched := 2, false
for ; cursor < len(tokens)-1; cursor++ {
token, nextToken := tokens[cursor], tokens[cursor+1]
if token.Type == tokenizer.Newline || nextToken.Type == tokenizer.Newline {
return 0, false
}
if token.Type == tokenizer.Tilde && nextToken.Type == tokenizer.Tilde {
matched = true
break
}
}
if !matched {
return 0, false
}
return cursor + 2, true
}
func (p *StrikethroughParser) Parse(tokens []*tokenizer.Token) ast.Node {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil
}
contentTokens := tokens[2 : size-2]
return &ast.Strikethrough{
Content: tokenizer.Stringify(contentTokens),
}
}

View File

@ -0,0 +1,45 @@
package parser
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
func TestStrikethroughParser(t *testing.T) {
tests := []struct {
text string
strikethrough ast.Node
}{
{
text: "~~Hello world",
strikethrough: nil,
},
{
text: "~~Hello~~",
strikethrough: &ast.Strikethrough{
Content: "Hello",
},
},
{
text: "~~ Hello ~~",
strikethrough: &ast.Strikethrough{
Content: " Hello ",
},
},
{
text: "~~1~~ Hello ~~~",
strikethrough: &ast.Strikethrough{
Content: "1",
},
},
}
for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
require.Equal(t, test.strikethrough, NewStrikethroughParser().Parse(tokens))
}
}

View File

@ -1,21 +1,22 @@
package parser package parser
import "github.com/usememos/memos/plugin/gomark/parser/tokenizer" import (
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)
type TagParser struct { type TagParser struct{}
ContentTokens []*tokenizer.Token
}
func NewTagParser() *TagParser { func NewTagParser() *TagParser {
return &TagParser{} return &TagParser{}
} }
func (*TagParser) Match(tokens []*tokenizer.Token) *TagParser { func (*TagParser) Match(tokens []*tokenizer.Token) (int, bool) {
if len(tokens) < 2 { if len(tokens) < 2 {
return nil return 0, false
} }
if tokens[0].Type != tokenizer.Hash { if tokens[0].Type != tokenizer.Hash {
return nil return 0, false
} }
contentTokens := []*tokenizer.Token{} contentTokens := []*tokenizer.Token{}
for _, token := range tokens[1:] { for _, token := range tokens[1:] {
@ -25,10 +26,20 @@ func (*TagParser) Match(tokens []*tokenizer.Token) *TagParser {
contentTokens = append(contentTokens, token) contentTokens = append(contentTokens, token)
} }
if len(contentTokens) == 0 { if len(contentTokens) == 0 {
return 0, false
}
return len(contentTokens) + 1, true
}
func (p *TagParser) Parse(tokens []*tokenizer.Token) ast.Node {
size, ok := p.Match(tokens)
if size == 0 || !ok {
return nil return nil
} }
return &TagParser{ contentTokens := tokens[1:size]
ContentTokens: contentTokens, return &ast.Tag{
Content: tokenizer.Stringify(contentTokens),
} }
} }

View File

@ -5,13 +5,14 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer" "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
) )
func TestTagParser(t *testing.T) { func TestTagParser(t *testing.T) {
tests := []struct { tests := []struct {
text string text string
tag *TagParser tag ast.Node
}{ }{
{ {
text: "*Hello world", text: "*Hello world",
@ -23,30 +24,20 @@ func TestTagParser(t *testing.T) {
}, },
{ {
text: "#tag", text: "#tag",
tag: &TagParser{ tag: &ast.Tag{
ContentTokens: []*tokenizer.Token{ Content: "tag",
{
Type: tokenizer.Text,
Value: "tag",
},
},
}, },
}, },
{ {
text: "#tag/subtag", text: "#tag/subtag 123",
tag: &TagParser{ tag: &ast.Tag{
ContentTokens: []*tokenizer.Token{ Content: "tag/subtag",
{
Type: tokenizer.Text,
Value: "tag/subtag",
},
},
}, },
}, },
} }
for _, test := range tests { for _, test := range tests {
tokens := tokenizer.Tokenize(test.text) tokens := tokenizer.Tokenize(test.text)
require.Equal(t, test.tag, NewTagParser().Match(tokens)) require.Equal(t, test.tag, NewTagParser().Parse(tokens))
} }
} }

View File

@ -9,10 +9,8 @@ type TextParser struct {
Content string Content string
} }
var defaultTextParser = &TextParser{}
func NewTextParser() *TextParser { func NewTextParser() *TextParser {
return defaultTextParser return &TextParser{}
} }
func (*TextParser) Match(tokens []*tokenizer.Token) (int, bool) { func (*TextParser) Match(tokens []*tokenizer.Token) (int, bool) {

View File

@ -4,7 +4,7 @@ type TokenType = string
const ( const (
Underline TokenType = "_" Underline TokenType = "_"
Star TokenType = "*" Asterisk TokenType = "*"
Hash TokenType = "#" Hash TokenType = "#"
Backtick TokenType = "`" Backtick TokenType = "`"
LeftSquareBracket TokenType = "[" LeftSquareBracket TokenType = "["
@ -12,6 +12,9 @@ const (
LeftParenthesis TokenType = "(" LeftParenthesis TokenType = "("
RightParenthesis TokenType = ")" RightParenthesis TokenType = ")"
ExclamationMark TokenType = "!" ExclamationMark TokenType = "!"
Tilde TokenType = "~"
Dash TokenType = "-"
GreaterThan TokenType = ">"
Newline TokenType = "\n" Newline TokenType = "\n"
Space TokenType = " " Space TokenType = " "
) )
@ -39,7 +42,7 @@ func Tokenize(text string) []*Token {
case '_': case '_':
tokens = append(tokens, NewToken(Underline, "_")) tokens = append(tokens, NewToken(Underline, "_"))
case '*': case '*':
tokens = append(tokens, NewToken(Star, "*")) tokens = append(tokens, NewToken(Asterisk, "*"))
case '#': case '#':
tokens = append(tokens, NewToken(Hash, "#")) tokens = append(tokens, NewToken(Hash, "#"))
case '`': case '`':
@ -54,6 +57,12 @@ func Tokenize(text string) []*Token {
tokens = append(tokens, NewToken(RightParenthesis, ")")) tokens = append(tokens, NewToken(RightParenthesis, ")"))
case '!': case '!':
tokens = append(tokens, NewToken(ExclamationMark, "!")) tokens = append(tokens, NewToken(ExclamationMark, "!"))
case '~':
tokens = append(tokens, NewToken(Tilde, "~"))
case '-':
tokens = append(tokens, NewToken(Dash, "-"))
case '>':
tokens = append(tokens, NewToken(GreaterThan, ">"))
case '\n': case '\n':
tokens = append(tokens, NewToken(Newline, "\n")) tokens = append(tokens, NewToken(Newline, "\n"))
case ' ': case ' ':

View File

@ -15,7 +15,7 @@ func TestTokenize(t *testing.T) {
text: "*Hello world!", text: "*Hello world!",
tokens: []*Token{ tokens: []*Token{
{ {
Type: Star, Type: Asterisk,
Value: "*", Value: "*",
}, },
{ {