feat: implement code block parser (#1727)

2025-06-05 22:09:59 +02:00 · 2023-05-24 00:31:37 +08:00
parent 42c653e1a4
commit 65890bc257
5 changed files with 191 additions and 0 deletions
--- a/plugin/gomark/parser/code.go
+++ b/plugin/gomark/parser/code.go
@@ -0,0 +1,38 @@
 package parser
 import "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 type CodeParser struct {
 	Content string
 }
 func NewCodeParser() *CodeParser {
 	return &CodeParser{}
 }
 func (*CodeParser) Match(tokens []*tokenizer.Token) *CodeParser {
 	if len(tokens) < 3 {
 		return nil
 	}
 	if tokens[0].Type != tokenizer.Backtick {
 		return nil
 	}
 	content, matched := "", false
 	for _, token := range tokens[1:] {
 		if token.Type == tokenizer.Newline {
 			return nil
 		}
 		if token.Type == tokenizer.Backtick {
 			matched = true
 			break
 		}
 		content += token.Value
 	}
 	if !matched || len(content) == 0 {
 		return nil
 	}
 	return &CodeParser{
 		Content: content,
 	}
 }
--- a/plugin/gomark/parser/code_block.go
+++ b/plugin/gomark/parser/code_block.go
@@ -0,0 +1,52 @@
 package parser
 import "github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 type CodeBlockParser struct {
 	Language string
 	Content  string
 }
 func NewCodeBlockParser() *CodeBlockParser {
 	return &CodeBlockParser{}
 }
 func (*CodeBlockParser) Match(tokens []*tokenizer.Token) *CodeBlockParser {
 	if len(tokens) < 9 {
 		return nil
 	}
 	if tokens[0].Type != tokenizer.Backtick || tokens[1].Type != tokenizer.Backtick || tokens[2].Type != tokenizer.Backtick {
 		return nil
 	}
 	if tokens[3].Type != tokenizer.Newline && tokens[4].Type != tokenizer.Newline {
 		return nil
 	}
 	cursor, language := 4, ""
 	if tokens[3].Type != tokenizer.Newline {
 		language = tokens[3].Value
 		cursor = 5
 	}
 	content, matched := "", false
 	for ; cursor < len(tokens)-3; cursor++ {
 		if tokens[cursor].Type == tokenizer.Newline && tokens[cursor+1].Type == tokenizer.Backtick && tokens[cursor+2].Type == tokenizer.Backtick && tokens[cursor+3].Type == tokenizer.Backtick {
 			if cursor+3 == len(tokens)-1 {
 				matched = true
 				break
 			} else if tokens[cursor+4].Type == tokenizer.Newline {
 				matched = true
 				break
 			}
 		}
 		content += tokens[cursor].Value
 	}
 	if !matched {
 		return nil
 	}
 	return &CodeBlockParser{
 		Language: language,
 		Content:  content,
 	}
 }
--- a/plugin/gomark/parser/code_block_test.go
+++ b/plugin/gomark/parser/code_block_test.go
@@ -0,0 +1,62 @@
 package parser
 import (
 	"testing"
 	"github.com/stretchr/testify/require"
 	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 )
 func TestCodeBlockParser(t *testing.T) {
 	tests := []struct {
 		text      string
 		codeBlock *CodeBlockParser
 	}{
 		{
 			text:      "```Hello world!```",
 			codeBlock: nil,
 		},
 		{
 			text: "```\nHello\n```",
 			codeBlock: &CodeBlockParser{
 				Language: "",
 				Content:  "Hello",
 			},
 		},
 		{
 			text: "```\nHello world!\n```",
 			codeBlock: &CodeBlockParser{
 				Language: "",
 				Content:  "Hello world!",
 			},
 		},
 		{
 			text: "```java\nHello \n world!\n```",
 			codeBlock: &CodeBlockParser{
 				Language: "java",
 				Content:  "Hello \n world!",
 			},
 		},
 		{
 			text:      "```java\nHello \n world!\n```111",
 			codeBlock: nil,
 		},
 		{
 			text:      "```java\nHello \n world!\n``` 111",
 			codeBlock: nil,
 		},
 		{
 			text: "```java\nHello \n world!\n```\n123123",
 			codeBlock: &CodeBlockParser{
 				Language: "java",
 				Content:  "Hello \n world!",
 			},
 		},
 	}
 	for _, test := range tests {
 		tokens := tokenizer.Tokenize(test.text)
 		codeBlock := NewCodeBlockParser()
 		require.Equal(t, test.codeBlock, codeBlock.Match(tokens))
 	}
 }
--- a/plugin/gomark/parser/code_test.go
+++ b/plugin/gomark/parser/code_test.go
@@ -0,0 +1,36 @@
 package parser
 import (
 	"testing"
 	"github.com/stretchr/testify/require"
 	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 )
 func TestCodeParser(t *testing.T) {
 	tests := []struct {
 		text string
 		code *CodeParser
 	}{
 		{
 			text: "`Hello world!",
 			code: nil,
 		},
 		{
 			text: "`Hello world!`",
 			code: &CodeParser{
 				Content: "Hello world!",
 			},
 		},
 		{
 			text: "`Hello \nworld!`",
 			code: nil,
 		},
 	}
 	for _, test := range tests {
 		tokens := tokenizer.Tokenize(test.text)
 		code := NewCodeParser()
 		require.Equal(t, test.code, code.Match(tokens))
 	}
 }
--- a/plugin/gomark/parser/tokenizer/tokenizer.go
+++ b/plugin/gomark/parser/tokenizer/tokenizer.go
@@ -6,6 +6,7 @@ const (
 	Underline TokenType = "_"
 	Star      TokenType = "*"
 	Hash      TokenType = "#"
 	Backtick  TokenType = "`"
 	Newline   TokenType = "\n"
 	Space     TokenType = " "
 )
@@ -38,6 +39,8 @@ func Tokenize(text string) []*Token {
 			tokens = append(tokens, NewToken(Hash, "#"))
 		case '\n':
 			tokens = append(tokens, NewToken(Newline, "\n"))
 		case '`':
 			tokens = append(tokens, NewToken(Backtick, "`"))
 		case ' ':
 			tokens = append(tokens, NewToken(Space, " "))
 		default: