feat: add heading tokenizer (#1723)

2025-06-05 22:09:59 +02:00 · 2023-05-23 19:52:31 +08:00
parent 616b8b0ee6
commit fa53a2550a
5 changed files with 191 additions and 50 deletions
--- a/plugin/gomark/parser/heading.go
+++ b/plugin/gomark/parser/heading.go
@ -1,41 +1,52 @@
 package parser
 import (
-	"strings"
+	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 	"github.com/usememos/memos/plugin/gomark/ast"
 )
 type HeadingTokenizer struct {
 	Level         int
 	ContentTokens []*tokenizer.Token
 }
 func NewHeadingTokenizer() *HeadingTokenizer {
 	return &HeadingTokenizer{}
 }
-func (*HeadingTokenizer) Trigger() []byte {
+func (*HeadingTokenizer) Match(tokens []*tokenizer.Token) *HeadingTokenizer {
-	return []byte{'#'}
+	cursor := 0
-}
+	for _, token := range tokens {
-
+		if token.Type == tokenizer.Hash {
-func (*HeadingTokenizer) Parse(parent *ast.Node, block string) *ast.Node {
+			cursor++
 	line := block
 	level := 0
 	for _, c := range line {
 		if c == '#' {
 			level++
 		} else if c == ' ' {
 			break
 		} else {
-			return nil
+			break
 		}
 	}
 	if len(tokens) <= cursor+1 {
 		return nil
 	}
 	if tokens[cursor].Type != tokenizer.Space {
 		return nil
 	}
 	level := cursor
 	if level == 0 || level > 6 {
 		return nil
 	}
-	text := strings.TrimSpace(line[level+1:])
+
-	node := ast.NewNode("h1", text)
+	cursor++
-	if parent != nil {
+	contentTokens := []*tokenizer.Token{}
-		parent.AddChild(node)
+	for _, token := range tokens[cursor:] {
 		if token.Type == tokenizer.Newline {
 			break
 		}
 		contentTokens = append(contentTokens, token)
 	}
 	if len(contentTokens) == 0 {
 		return nil
 	}
 	return &HeadingTokenizer{
 		Level:         level,
 		ContentTokens: contentTokens,
 	}
 	return node
 }
--- a/plugin/gomark/parser/heading_test.go
+++ b/plugin/gomark/parser/heading_test.go
@ -1 +1,95 @@
 package parser
 import (
 	"testing"
 	"github.com/stretchr/testify/require"
 	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 )
 func TestHeadingParser(t *testing.T) {
 	tests := []struct {
 		text    string
 		heading *HeadingTokenizer
 	}{
 		{
 			text:    "*Hello world!",
 			heading: nil,
 		},
 		{
 			text: "## Hello World!",
 			heading: &HeadingTokenizer{
 				Level: 2,
 				ContentTokens: []*tokenizer.Token{
 					{
 						Type:  tokenizer.Text,
 						Value: "Hello",
 					},
 					{
 						Type:  tokenizer.Space,
 						Value: " ",
 					},
 					{
 						Type:  tokenizer.Text,
 						Value: "World!",
 					},
 				},
 			},
 		},
 		{
 			text: "# # Hello World",
 			heading: &HeadingTokenizer{
 				Level: 1,
 				ContentTokens: []*tokenizer.Token{
 					{
 						Type:  tokenizer.Hash,
 						Value: "#",
 					},
 					{
 						Type:  tokenizer.Space,
 						Value: " ",
 					},
 					{
 						Type:  tokenizer.Text,
 						Value: "Hello",
 					},
 					{
 						Type:  tokenizer.Space,
 						Value: " ",
 					},
 					{
 						Type:  tokenizer.Text,
 						Value: "World",
 					},
 				},
 			},
 		},
 		{
 			text:    " # 123123 Hello World!",
 			heading: nil,
 		},
 		{
 			text: `# 123 
 Hello World!`,
 			heading: &HeadingTokenizer{
 				Level: 1,
 				ContentTokens: []*tokenizer.Token{
 					{
 						Type:  tokenizer.Text,
 						Value: "123",
 					},
 					{
 						Type:  tokenizer.Space,
 						Value: " ",
 					},
 				},
 			},
 		},
 	}
 	for _, test := range tests {
 		tokens := tokenizer.Tokenize(test.text)
 		headingTokenizer := NewHeadingTokenizer()
 		require.Equal(t, test.heading, headingTokenizer.Match(tokens))
 	}
 }
--- a/plugin/gomark/parser/tokenizer/token.go
+++ b/plugin/gomark/parser/tokenizer/token.go
@ -1,27 +0,0 @@
 package tokenizer
 type TokenType = string
 const (
 	Underline TokenType = "_"
 	Star      TokenType = "*"
 	Newline   TokenType = "\n"
 	Hash      TokenType = "#"
 	Space     TokenType = " "
 )
 const (
 	Text TokenType = ""
 )
 type Token struct {
 	Type  TokenType
 	Value string
 }
 func NewToken(tp, text string) *Token {
 	return &Token{
 		Type:  tp,
 		Value: text,
 	}
 }
--- a/plugin/gomark/parser/tokenizer/tokenizer.go
+++ b/plugin/gomark/parser/tokenizer/tokenizer.go
@ -1,6 +1,32 @@
 package tokenizer
-func tokenize(text string) []*Token {
+type TokenType = string
 const (
 	Underline TokenType = "_"
 	Star      TokenType = "*"
 	Hash      TokenType = "#"
 	Newline   TokenType = "\n"
 	Space     TokenType = " "
 )
 const (
 	Text TokenType = ""
 )
 type Token struct {
 	Type  TokenType
 	Value string
 }
 func NewToken(tp, text string) *Token {
 	return &Token{
 		Type:  tp,
 		Value: text,
 	}
 }
 func Tokenize(text string) []*Token {
 	tokens := []*Token{}
 	for _, c := range text {
 		switch c {
@ -8,6 +34,8 @@ func tokenize(text string) []*Token {
 			tokens = append(tokens, NewToken(Underline, "_"))
 		case '*':
 			tokens = append(tokens, NewToken(Star, "*"))
 		case '#':
 			tokens = append(tokens, NewToken(Hash, "#"))
 		case '\n':
 			tokens = append(tokens, NewToken(Newline, "\n"))
 		case ' ':
--- a/plugin/gomark/parser/tokenizer/tokenizer_test.go
+++ b/plugin/gomark/parser/tokenizer/tokenizer_test.go
@ -32,9 +32,44 @@ func TestTokenize(t *testing.T) {
 				},
 			},
 		},
 		{
 			text: `# hello 
 world`,
 			tokens: []*Token{
 				{
 					Type:  Hash,
 					Value: "#",
 				},
 				{
 					Type:  Space,
 					Value: " ",
 				},
 				{
 					Type:  Text,
 					Value: "hello",
 				},
 				{
 					Type:  Space,
 					Value: " ",
 				},
 				{
 					Type:  Newline,
 					Value: "\n",
 				},
 				{
 					Type:  Space,
 					Value: " ",
 				},
 				{
 					Type:  Text,
 					Value: "world",
 				},
 			},
 		},
 	}
 	for _, test := range tests {
-		result := tokenize(test.text)
+		result := Tokenize(test.text)
 		require.Equal(t, test.tokens, result)
 	}
 }