diff --git a/go.mod b/go.mod index dba4f0e22..79b4eb0d1 100644 --- a/go.mod +++ b/go.mod @@ -47,7 +47,7 @@ require ( github.com/superseriousbusiness/activity v1.4.0-gts github.com/superseriousbusiness/exif-terminator v0.6.0 github.com/superseriousbusiness/oauth2/v4 v4.3.2-SSB.0.20230227143000-f4900831d6c8 - github.com/tdewolff/minify/v2 v2.20.9 + github.com/tdewolff/minify/v2 v2.20.12 github.com/technologize/otel-go-contrib v1.1.0 github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 github.com/ulule/limiter/v3 v3.11.2 @@ -160,7 +160,7 @@ require ( github.com/subosito/gotenv v1.4.2 // indirect github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe // indirect github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB // indirect - github.com/tdewolff/parse/v2 v2.7.6 // indirect + github.com/tdewolff/parse/v2 v2.7.7 // indirect github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect diff --git a/go.sum b/go.sum index 31f5e05d2..316052452 100644 --- a/go.sum +++ b/go.sum @@ -511,12 +511,13 @@ github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB h1:8psprYSK github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB/go.mod h1:ymKGfy9kg4dIdraeZRAdobMS/flzLk3VcRPLpEWOAXg= github.com/superseriousbusiness/oauth2/v4 v4.3.2-SSB.0.20230227143000-f4900831d6c8 h1:nTIhuP157oOFcscuoK1kCme1xTeGIzztSw70lX9NrDQ= github.com/superseriousbusiness/oauth2/v4 v4.3.2-SSB.0.20230227143000-f4900831d6c8/go.mod h1:uYC/W92oVRJ49Vh1GcvTqpeFqHi+Ovrl2sMllQWRAEo= -github.com/tdewolff/minify/v2 v2.20.9 h1:0RGsL+jBpm77obkuNCjNZ2eiN81CZzTnjeVmTqxCmYk= -github.com/tdewolff/minify/v2 v2.20.9/go.mod h1:hZnNtFqXVQ5QIAR05tdgvS7h6E80jyRwHSGVmM4jbzQ= -github.com/tdewolff/parse/v2 v2.7.6 h1:PGZH2b/itDSye9RatReRn4GBhsT+KFEMtAMjHRuY1h8= -github.com/tdewolff/parse/v2 v2.7.6/go.mod h1:3FbJWZp3XT9OWVN3Hmfp0p/a08v4h8J9W1aghka0soA= -github.com/tdewolff/test v1.0.11-0.20231101010635-f1265d231d52 h1:gAQliwn+zJrkjAHVcBEYW/RFvd2St4yYimisvozAYlA= +github.com/tdewolff/minify/v2 v2.20.12 h1:ie5+91QGUUeEDbLkexhx2tlI9BQgwwnfY+/Qdj4BlQ4= +github.com/tdewolff/minify/v2 v2.20.12/go.mod h1:8ktdncc9Rh41MkTX2KYaicHT9+VnpvIDjCyIVsr/nN8= +github.com/tdewolff/parse/v2 v2.7.7 h1:V+50eFDH7Piw4IBwH8D8FtYeYbZp3T4SCtIvmBSIMyc= +github.com/tdewolff/parse/v2 v2.7.7/go.mod h1:3FbJWZp3XT9OWVN3Hmfp0p/a08v4h8J9W1aghka0soA= github.com/tdewolff/test v1.0.11-0.20231101010635-f1265d231d52/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= +github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739 h1:IkjBCtQOOjIn03u/dMQK9g+Iw9ewps4mCl1nB8Sscbo= +github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739/go.mod h1:XPuWBzvdUzhCuxWO1ojpXsyzsA5bFoS3tO/Q3kFuTG8= github.com/technologize/otel-go-contrib v1.1.0 h1:gl9bxxJAgXFnKJzoprJOfbvNRE1k3Ky9O7ppVJDb9gg= github.com/technologize/otel-go-contrib v1.1.0/go.mod h1:dCN/wj2WyUO8aFZFdIN+6tfJHImjTML/8r2YVYAy3So= github.com/tidwall/btree v0.0.0-20191029221954-400434d76274 h1:G6Z6HvJuPjG6XfNGi/feOATzeJrfgTNJY+rGrHbA04E= diff --git a/vendor/github.com/tdewolff/minify/v2/html/html.go b/vendor/github.com/tdewolff/minify/v2/html/html.go index 1a5aa9450..ea817037b 100644 --- a/vendor/github.com/tdewolff/minify/v2/html/html.go +++ b/vendor/github.com/tdewolff/minify/v2/html/html.go @@ -126,6 +126,7 @@ func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]st } w.Write(t.Data) } + omitSpace = false case html.MathToken: if err := m.MinifyMimetype(mathMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil { if err != minify.ErrNotExist { @@ -133,6 +134,7 @@ func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]st } w.Write(t.Data) } + omitSpace = false case html.TextToken: if t.HasTemplate { w.Write(t.Data) diff --git a/vendor/github.com/tdewolff/parse/v2/css/README.md b/vendor/github.com/tdewolff/parse/v2/css/README.md new file mode 100644 index 000000000..02797a711 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/css/README.md @@ -0,0 +1,170 @@ +# CSS [![API reference](https://img.shields.io/badge/godoc-reference-5272B4)](https://pkg.go.dev/github.com/tdewolff/parse/v2/css?tab=doc) + +This package is a CSS3 lexer and parser written in [Go][1]. Both follow the specification at [CSS Syntax Module Level 3](http://www.w3.org/TR/css-syntax-3/). The lexer takes an io.Reader and converts it into tokens until the EOF. The parser returns a parse tree of the full io.Reader input stream, but the low-level `Next` function can be used for stream parsing to returns grammar units until the EOF. + +## Installation +Run the following command + + go get -u github.com/tdewolff/parse/v2/css + +or add the following import and run project with `go get` + + import "github.com/tdewolff/parse/v2/css" + +## Lexer +### Usage +The following initializes a new Lexer with io.Reader `r`: +``` go +l := css.NewLexer(parse.NewInput(r)) +``` + +To tokenize until EOF an error, use: +``` go +for { + tt, text := l.Next() + switch tt { + case css.ErrorToken: + // error or EOF set in l.Err() + return + // ... + } +} +``` + +All tokens (see [CSS Syntax Module Level 3](http://www.w3.org/TR/css3-syntax/)): +``` go +ErrorToken // non-official token, returned when errors occur +IdentToken +FunctionToken // rgb( rgba( ... +AtKeywordToken // @abc +HashToken // #abc +StringToken +BadStringToken +URLToken // url( +BadURLToken +DelimToken // any unmatched character +NumberToken // 5 +PercentageToken // 5% +DimensionToken // 5em +UnicodeRangeToken +IncludeMatchToken // ~= +DashMatchToken // |= +PrefixMatchToken // ^= +SuffixMatchToken // $= +SubstringMatchToken // *= +ColumnToken // || +WhitespaceToken +CDOToken // +ColonToken +SemicolonToken +CommaToken +BracketToken // ( ) [ ] { }, all bracket tokens use this, Data() can distinguish between the brackets +CommentToken // non-official token +``` + +### Examples +``` go +package main + +import ( + "os" + + "github.com/tdewolff/parse/v2/css" +) + +// Tokenize CSS3 from stdin. +func main() { + l := css.NewLexer(parse.NewInput(os.Stdin)) + for { + tt, text := l.Next() + switch tt { + case css.ErrorToken: + if l.Err() != io.EOF { + fmt.Println("Error on line", l.Line(), ":", l.Err()) + } + return + case css.IdentToken: + fmt.Println("Identifier", string(text)) + case css.NumberToken: + fmt.Println("Number", string(text)) + // ... + } + } +} +``` + +## Parser +### Usage +The following creates a new Parser. +``` go +// true because this is the content of an inline style attribute +p := css.NewParser(parse.NewInput(bytes.NewBufferString("color: red;")), true) +``` + +To iterate over the stylesheet, use: +``` go +for { + gt, _, data := p.Next() + if gt == css.ErrorGrammar { + break + } + // ... +} +``` + +All grammar units returned by `Next`: +``` go +ErrorGrammar +AtRuleGrammar +EndAtRuleGrammar +RulesetGrammar +EndRulesetGrammar +DeclarationGrammar +TokenGrammar +``` + +### Examples +``` go +package main + +import ( + "bytes" + "fmt" + + "github.com/tdewolff/parse/v2/css" +) + +func main() { + // true because this is the content of an inline style attribute + p := css.NewParser(parse.NewInput(bytes.NewBufferString("color: red;")), true) + out := "" + for { + gt, _, data := p.Next() + if gt == css.ErrorGrammar { + break + } else if gt == css.AtRuleGrammar || gt == css.BeginAtRuleGrammar || gt == css.BeginRulesetGrammar || gt == css.DeclarationGrammar { + out += string(data) + if gt == css.DeclarationGrammar { + out += ":" + } + for _, val := range p.Values() { + out += string(val.Data) + } + if gt == css.BeginAtRuleGrammar || gt == css.BeginRulesetGrammar { + out += "{" + } else if gt == css.AtRuleGrammar || gt == css.DeclarationGrammar { + out += ";" + } + } else { + out += string(data) + } + } + fmt.Println(out) +} +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/v2/css/hash.go b/vendor/github.com/tdewolff/parse/v2/css/hash.go new file mode 100644 index 000000000..25d2f7cf0 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/css/hash.go @@ -0,0 +1,75 @@ +package css + +// generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate + +// uses github.com/tdewolff/hasher +//go:generate hasher -type=Hash -file=hash.go + +// Hash defines perfect hashes for a predefined list of strings +type Hash uint32 + +// Unique hash definitions to be used instead of strings +const ( + Document Hash = 0x8 // document + Font_Face Hash = 0x809 // font-face + Keyframes Hash = 0x1109 // keyframes + Media Hash = 0x2105 // media + Page Hash = 0x2604 // page + Supports Hash = 0x1908 // supports +) + +// String returns the hash' name. +func (i Hash) String() string { + start := uint32(i >> 8) + n := uint32(i & 0xff) + if start+n > uint32(len(_Hash_text)) { + return "" + } + return _Hash_text[start : start+n] +} + +// ToHash returns the hash whose name is s. It returns zero if there is no +// such hash. It is case sensitive. +func ToHash(s []byte) Hash { + if len(s) == 0 || len(s) > _Hash_maxLen { + return 0 + } + h := uint32(_Hash_hash0) + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { + t := _Hash_text[i>>8 : i>>8+i&0xff] + for i := 0; i < len(s); i++ { + if t[i] != s[i] { + goto NEXT + } + } + return i + } +NEXT: + if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { + t := _Hash_text[i>>8 : i>>8+i&0xff] + for i := 0; i < len(s); i++ { + if t[i] != s[i] { + return 0 + } + } + return i + } + return 0 +} + +const _Hash_hash0 = 0x9acb0442 +const _Hash_maxLen = 9 +const _Hash_text = "documentfont-facekeyframesupportsmediapage" + +var _Hash_table = [1 << 3]Hash{ + 0x1: 0x2604, // page + 0x2: 0x2105, // media + 0x3: 0x809, // font-face + 0x5: 0x1109, // keyframes + 0x6: 0x1908, // supports + 0x7: 0x8, // document +} diff --git a/vendor/github.com/tdewolff/parse/v2/css/lex.go b/vendor/github.com/tdewolff/parse/v2/css/lex.go new file mode 100644 index 000000000..3d1ff7ea3 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/css/lex.go @@ -0,0 +1,698 @@ +// Package css is a CSS3 lexer and parser following the specifications at http://www.w3.org/TR/css-syntax-3/. +package css + +// TODO: \uFFFD replacement character for NULL bytes in strings for example, or atleast don't end the string early + +import ( + "bytes" + "io" + "strconv" + + "github.com/tdewolff/parse/v2" +) + +// TokenType determines the type of token, eg. a number or a semicolon. +type TokenType uint32 + +// TokenType values. +const ( + ErrorToken TokenType = iota // extra token when errors occur + IdentToken + FunctionToken // rgb( rgba( ... + AtKeywordToken // @abc + HashToken // #abc + StringToken + BadStringToken + URLToken + BadURLToken + DelimToken // any unmatched character + NumberToken // 5 + PercentageToken // 5% + DimensionToken // 5em + UnicodeRangeToken // U+554A + IncludeMatchToken // ~= + DashMatchToken // |= + PrefixMatchToken // ^= + SuffixMatchToken // $= + SubstringMatchToken // *= + ColumnToken // || + WhitespaceToken // space \t \r \n \f + CDOToken // + ColonToken // : + SemicolonToken // ; + CommaToken // , + LeftBracketToken // [ + RightBracketToken // ] + LeftParenthesisToken // ( + RightParenthesisToken // ) + LeftBraceToken // { + RightBraceToken // } + CommentToken // extra token for comments + EmptyToken + CustomPropertyNameToken + CustomPropertyValueToken +) + +// String returns the string representation of a TokenType. +func (tt TokenType) String() string { + switch tt { + case ErrorToken: + return "Error" + case IdentToken: + return "Ident" + case FunctionToken: + return "Function" + case AtKeywordToken: + return "AtKeyword" + case HashToken: + return "Hash" + case StringToken: + return "String" + case BadStringToken: + return "BadString" + case URLToken: + return "URL" + case BadURLToken: + return "BadURL" + case DelimToken: + return "Delim" + case NumberToken: + return "Number" + case PercentageToken: + return "Percentage" + case DimensionToken: + return "Dimension" + case UnicodeRangeToken: + return "UnicodeRange" + case IncludeMatchToken: + return "IncludeMatch" + case DashMatchToken: + return "DashMatch" + case PrefixMatchToken: + return "PrefixMatch" + case SuffixMatchToken: + return "SuffixMatch" + case SubstringMatchToken: + return "SubstringMatch" + case ColumnToken: + return "Column" + case WhitespaceToken: + return "Whitespace" + case CDOToken: + return "CDO" + case CDCToken: + return "CDC" + case ColonToken: + return "Colon" + case SemicolonToken: + return "Semicolon" + case CommaToken: + return "Comma" + case LeftBracketToken: + return "LeftBracket" + case RightBracketToken: + return "RightBracket" + case LeftParenthesisToken: + return "LeftParenthesis" + case RightParenthesisToken: + return "RightParenthesis" + case LeftBraceToken: + return "LeftBrace" + case RightBraceToken: + return "RightBrace" + case CommentToken: + return "Comment" + case EmptyToken: + return "Empty" + case CustomPropertyNameToken: + return "CustomPropertyName" + case CustomPropertyValueToken: + return "CustomPropertyValue" + } + return "Invalid(" + strconv.Itoa(int(tt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// Lexer is the state for the lexer. +type Lexer struct { + r *parse.Input +} + +// NewLexer returns a new Lexer for a given io.Reader. +func NewLexer(r *parse.Input) *Lexer { + return &Lexer{ + r: r, + } +} + +// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned. +func (l *Lexer) Err() error { + return l.r.Err() +} + +// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message. +func (l *Lexer) Next() (TokenType, []byte) { + switch l.r.Peek(0) { + case ' ', '\t', '\n', '\r', '\f': + l.r.Move(1) + for l.consumeWhitespace() { + } + return WhitespaceToken, l.r.Shift() + case ':': + l.r.Move(1) + return ColonToken, l.r.Shift() + case ';': + l.r.Move(1) + return SemicolonToken, l.r.Shift() + case ',': + l.r.Move(1) + return CommaToken, l.r.Shift() + case '(', ')', '[', ']', '{', '}': + if t := l.consumeBracket(); t != ErrorToken { + return t, l.r.Shift() + } + case '#': + if l.consumeHashToken() { + return HashToken, l.r.Shift() + } + case '"', '\'': + if t := l.consumeString(); t != ErrorToken { + return t, l.r.Shift() + } + case '.', '+': + if t := l.consumeNumeric(); t != ErrorToken { + return t, l.r.Shift() + } + case '-': + if t := l.consumeNumeric(); t != ErrorToken { + return t, l.r.Shift() + } else if t := l.consumeIdentlike(); t != ErrorToken { + return t, l.r.Shift() + } else if l.consumeCDCToken() { + return CDCToken, l.r.Shift() + } else if l.consumeCustomVariableToken() { + return CustomPropertyNameToken, l.r.Shift() + } + case '@': + if l.consumeAtKeywordToken() { + return AtKeywordToken, l.r.Shift() + } + case '$', '*', '^', '~': + if t := l.consumeMatch(); t != ErrorToken { + return t, l.r.Shift() + } + case '/': + if l.consumeComment() { + return CommentToken, l.r.Shift() + } + case '<': + if l.consumeCDOToken() { + return CDOToken, l.r.Shift() + } + case '\\': + if t := l.consumeIdentlike(); t != ErrorToken { + return t, l.r.Shift() + } + case 'u', 'U': + if l.consumeUnicodeRangeToken() { + return UnicodeRangeToken, l.r.Shift() + } else if t := l.consumeIdentlike(); t != ErrorToken { + return t, l.r.Shift() + } + case '|': + if t := l.consumeMatch(); t != ErrorToken { + return t, l.r.Shift() + } else if l.consumeColumnToken() { + return ColumnToken, l.r.Shift() + } + case 0: + if l.r.Err() != nil { + return ErrorToken, nil + } + default: + if t := l.consumeNumeric(); t != ErrorToken { + return t, l.r.Shift() + } else if t := l.consumeIdentlike(); t != ErrorToken { + return t, l.r.Shift() + } + } + // can't be rune because consumeIdentlike consumes that as an identifier + l.r.Move(1) + return DelimToken, l.r.Shift() +} + +//////////////////////////////////////////////////////////////// + +/* +The following functions follow the railroad diagrams in http://www.w3.org/TR/css3-syntax/ +*/ + +func (l *Lexer) consumeByte(c byte) bool { + if l.r.Peek(0) == c { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeComment() bool { + if l.r.Peek(0) != '/' || l.r.Peek(1) != '*' { + return false + } + l.r.Move(2) + for { + c := l.r.Peek(0) + if c == 0 && l.r.Err() != nil { + break + } else if c == '*' && l.r.Peek(1) == '/' { + l.r.Move(2) + return true + } + l.r.Move(1) + } + return true +} + +func (l *Lexer) consumeNewline() bool { + c := l.r.Peek(0) + if c == '\n' || c == '\f' { + l.r.Move(1) + return true + } else if c == '\r' { + if l.r.Peek(1) == '\n' { + l.r.Move(2) + } else { + l.r.Move(1) + } + return true + } + return false +} + +func (l *Lexer) consumeWhitespace() bool { + c := l.r.Peek(0) + if c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeDigit() bool { + c := l.r.Peek(0) + if c >= '0' && c <= '9' { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeHexDigit() bool { + c := l.r.Peek(0) + if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') { + l.r.Move(1) + return true + } + return false +} + +func (l *Lexer) consumeEscape() bool { + if l.r.Peek(0) != '\\' { + return false + } + mark := l.r.Pos() + l.r.Move(1) + if l.consumeNewline() { + l.r.Rewind(mark) + return false + } else if l.consumeHexDigit() { + for k := 1; k < 6; k++ { + if !l.consumeHexDigit() { + break + } + } + l.consumeWhitespace() + return true + } else { + c := l.r.Peek(0) + if c >= 0xC0 { + _, n := l.r.PeekRune(0) + l.r.Move(n) + return true + } else if c == 0 && l.r.Err() != nil { + l.r.Rewind(mark) + return false + } + } + l.r.Move(1) + return true +} + +func (l *Lexer) consumeIdentToken() bool { + mark := l.r.Pos() + if l.r.Peek(0) == '-' { + l.r.Move(1) + } + c := l.r.Peek(0) + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c >= 0x80) { + if c != '\\' || !l.consumeEscape() { + l.r.Rewind(mark) + return false + } + } else { + l.r.Move(1) + } + for { + c := l.r.Peek(0) + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c >= 0x80) { + if c != '\\' || !l.consumeEscape() { + break + } + } else { + l.r.Move(1) + } + } + return true +} + +// support custom variables, https://www.w3.org/TR/css-variables-1/ +func (l *Lexer) consumeCustomVariableToken() bool { + // expect to be on a '-' + l.r.Move(1) + if l.r.Peek(0) != '-' { + l.r.Move(-1) + return false + } + if !l.consumeIdentToken() { + l.r.Move(-1) + return false + } + return true +} + +func (l *Lexer) consumeAtKeywordToken() bool { + // expect to be on an '@' + l.r.Move(1) + if !l.consumeIdentToken() { + l.r.Move(-1) + return false + } + return true +} + +func (l *Lexer) consumeHashToken() bool { + // expect to be on a '#' + mark := l.r.Pos() + l.r.Move(1) + c := l.r.Peek(0) + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c >= 0x80) { + if c != '\\' || !l.consumeEscape() { + l.r.Rewind(mark) + return false + } + } else { + l.r.Move(1) + } + for { + c := l.r.Peek(0) + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c >= 0x80) { + if c != '\\' || !l.consumeEscape() { + break + } + } else { + l.r.Move(1) + } + } + return true +} + +func (l *Lexer) consumeNumberToken() bool { + mark := l.r.Pos() + c := l.r.Peek(0) + if c == '+' || c == '-' { + l.r.Move(1) + } + firstDigit := l.consumeDigit() + if firstDigit { + for l.consumeDigit() { + } + } + if l.r.Peek(0) == '.' { + l.r.Move(1) + if l.consumeDigit() { + for l.consumeDigit() { + } + } else if firstDigit { + // . could belong to the next token + l.r.Move(-1) + return true + } else { + l.r.Rewind(mark) + return false + } + } else if !firstDigit { + l.r.Rewind(mark) + return false + } + mark = l.r.Pos() + c = l.r.Peek(0) + if c == 'e' || c == 'E' { + l.r.Move(1) + c = l.r.Peek(0) + if c == '+' || c == '-' { + l.r.Move(1) + } + if !l.consumeDigit() { + // e could belong to next token + l.r.Rewind(mark) + return true + } + for l.consumeDigit() { + } + } + return true +} + +func (l *Lexer) consumeUnicodeRangeToken() bool { + c := l.r.Peek(0) + if (c != 'u' && c != 'U') || l.r.Peek(1) != '+' { + return false + } + mark := l.r.Pos() + l.r.Move(2) + + // consume up to 6 hexDigits + k := 0 + for l.consumeHexDigit() { + k++ + } + + // either a minus or a question mark or the end is expected + if l.consumeByte('-') { + if k == 0 || 6 < k { + l.r.Rewind(mark) + return false + } + + // consume another up to 6 hexDigits + if l.consumeHexDigit() { + k = 1 + for l.consumeHexDigit() { + k++ + } + } else { + l.r.Rewind(mark) + return false + } + } else if l.consumeByte('?') { + // could be filled up to 6 characters with question marks or else regular hexDigits + k++ + for l.consumeByte('?') { + k++ + } + } + if k == 0 || 6 < k { + l.r.Rewind(mark) + return false + } + return true +} + +func (l *Lexer) consumeColumnToken() bool { + if l.r.Peek(0) == '|' && l.r.Peek(1) == '|' { + l.r.Move(2) + return true + } + return false +} + +func (l *Lexer) consumeCDOToken() bool { + if l.r.Peek(0) == '<' && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' { + l.r.Move(4) + return true + } + return false +} + +func (l *Lexer) consumeCDCToken() bool { + if l.r.Peek(0) == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' { + l.r.Move(3) + return true + } + return false +} + +//////////////////////////////////////////////////////////////// + +// consumeMatch consumes any MatchToken. +func (l *Lexer) consumeMatch() TokenType { + if l.r.Peek(1) == '=' { + switch l.r.Peek(0) { + case '~': + l.r.Move(2) + return IncludeMatchToken + case '|': + l.r.Move(2) + return DashMatchToken + case '^': + l.r.Move(2) + return PrefixMatchToken + case '$': + l.r.Move(2) + return SuffixMatchToken + case '*': + l.r.Move(2) + return SubstringMatchToken + } + } + return ErrorToken +} + +// consumeBracket consumes any bracket token. +func (l *Lexer) consumeBracket() TokenType { + switch l.r.Peek(0) { + case '(': + l.r.Move(1) + return LeftParenthesisToken + case ')': + l.r.Move(1) + return RightParenthesisToken + case '[': + l.r.Move(1) + return LeftBracketToken + case ']': + l.r.Move(1) + return RightBracketToken + case '{': + l.r.Move(1) + return LeftBraceToken + case '}': + l.r.Move(1) + return RightBraceToken + } + return ErrorToken +} + +// consumeNumeric consumes NumberToken, PercentageToken or DimensionToken. +func (l *Lexer) consumeNumeric() TokenType { + if l.consumeNumberToken() { + if l.consumeByte('%') { + return PercentageToken + } else if l.consumeIdentToken() { + return DimensionToken + } + return NumberToken + } + return ErrorToken +} + +// consumeString consumes a string and may return BadStringToken when a newline is encountered. +func (l *Lexer) consumeString() TokenType { + // assume to be on " or ' + delim := l.r.Peek(0) + l.r.Move(1) + for { + c := l.r.Peek(0) + if c == 0 && l.r.Err() != nil { + break + } else if c == '\n' || c == '\r' || c == '\f' { + l.r.Move(1) + return BadStringToken + } else if c == delim { + l.r.Move(1) + break + } else if c == '\\' { + if !l.consumeEscape() { + // either newline or EOF after backslash + l.r.Move(1) + l.consumeNewline() + } + } else { + l.r.Move(1) + } + } + return StringToken +} + +func (l *Lexer) consumeUnquotedURL() bool { + for { + c := l.r.Peek(0) + if c == 0 && l.r.Err() != nil || c == ')' { + break + } else if c == '"' || c == '\'' || c == '(' || c == '\\' || c == ' ' || c <= 0x1F || c == 0x7F { + if c != '\\' || !l.consumeEscape() { + return false + } + } else { + l.r.Move(1) + } + } + return true +} + +// consumeRemnantsBadUrl consumes bytes of a BadUrlToken so that normal tokenization may continue. +func (l *Lexer) consumeRemnantsBadURL() { + for { + if l.consumeByte(')') || l.r.Err() != nil { + break + } else if !l.consumeEscape() { + l.r.Move(1) + } + } +} + +// consumeIdentlike consumes IdentToken, FunctionToken or UrlToken. +func (l *Lexer) consumeIdentlike() TokenType { + if l.consumeIdentToken() { + if l.r.Peek(0) != '(' { + return IdentToken + } else if !parse.EqualFold(bytes.Replace(l.r.Lexeme(), []byte{'\\'}, nil, -1), []byte{'u', 'r', 'l'}) { + l.r.Move(1) + return FunctionToken + } + l.r.Move(1) + + // consume url + for l.consumeWhitespace() { + } + if c := l.r.Peek(0); c == '"' || c == '\'' { + if l.consumeString() == BadStringToken { + l.consumeRemnantsBadURL() + return BadURLToken + } + } else if !l.consumeUnquotedURL() && !l.consumeWhitespace() { // if unquoted URL fails due to encountering whitespace, continue + l.consumeRemnantsBadURL() + return BadURLToken + } + for l.consumeWhitespace() { + } + if !l.consumeByte(')') && l.r.Err() != io.EOF { + l.consumeRemnantsBadURL() + return BadURLToken + } + return URLToken + } + return ErrorToken +} diff --git a/vendor/github.com/tdewolff/parse/v2/css/parse.go b/vendor/github.com/tdewolff/parse/v2/css/parse.go new file mode 100644 index 000000000..381db4146 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/css/parse.go @@ -0,0 +1,493 @@ +package css + +import ( + "bytes" + "fmt" + "strconv" + + "github.com/tdewolff/parse/v2" + "github.com/tdewolff/parse/v2/buffer" +) + +var wsBytes = []byte(" ") +var endBytes = []byte("}") +var emptyBytes = []byte("") + +// GrammarType determines the type of grammar. +type GrammarType uint32 + +// GrammarType values. +const ( + ErrorGrammar GrammarType = iota // extra token when errors occur + CommentGrammar + AtRuleGrammar + BeginAtRuleGrammar + EndAtRuleGrammar + QualifiedRuleGrammar + BeginRulesetGrammar + EndRulesetGrammar + DeclarationGrammar + TokenGrammar + CustomPropertyGrammar +) + +// String returns the string representation of a GrammarType. +func (tt GrammarType) String() string { + switch tt { + case ErrorGrammar: + return "Error" + case CommentGrammar: + return "Comment" + case AtRuleGrammar: + return "AtRule" + case BeginAtRuleGrammar: + return "BeginAtRule" + case EndAtRuleGrammar: + return "EndAtRule" + case QualifiedRuleGrammar: + return "QualifiedRule" + case BeginRulesetGrammar: + return "BeginRuleset" + case EndRulesetGrammar: + return "EndRuleset" + case DeclarationGrammar: + return "Declaration" + case TokenGrammar: + return "Token" + case CustomPropertyGrammar: + return "CustomProperty" + } + return "Invalid(" + strconv.Itoa(int(tt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// State is the state function the parser currently is in. +type State func(*Parser) GrammarType + +// Token is a single TokenType and its associated data. +type Token struct { + TokenType + Data []byte +} + +func (t Token) String() string { + return t.TokenType.String() + "('" + string(t.Data) + "')" +} + +// Parser is the state for the parser. +type Parser struct { + l *Lexer + state []State + err string + errPos int + + buf []Token + level int + + data []byte + tt TokenType + keepWS bool + prevWS bool + prevEnd bool + prevComment bool +} + +// NewParser returns a new CSS parser from an io.Reader. isInline specifies whether this is an inline style attribute. +func NewParser(r *parse.Input, isInline bool) *Parser { + l := NewLexer(r) + p := &Parser{ + l: l, + state: make([]State, 0, 4), + } + + if isInline { + p.state = append(p.state, (*Parser).parseDeclarationList) + } else { + p.state = append(p.state, (*Parser).parseStylesheet) + } + return p +} + +// HasParseError returns true if there is a parse error (and not a read error). +func (p *Parser) HasParseError() bool { + return p.err != "" +} + +// Err returns the error encountered during parsing, this is often io.EOF but also other errors can be returned. +func (p *Parser) Err() error { + if p.err != "" { + r := buffer.NewReader(p.l.r.Bytes()) + return parse.NewError(r, p.errPos, p.err) + } + return p.l.Err() +} + +// Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message. +func (p *Parser) Next() (GrammarType, TokenType, []byte) { + p.err = "" + + if p.prevEnd { + p.tt, p.data = RightBraceToken, endBytes + p.prevEnd = false + } else { + p.tt, p.data = p.popToken(true) + } + gt := p.state[len(p.state)-1](p) + return gt, p.tt, p.data +} + +// Offset return offset for current Grammar +func (p *Parser) Offset() int { + return p.l.r.Offset() +} + +// Values returns a slice of Tokens for the last Grammar. Only AtRuleGrammar, BeginAtRuleGrammar, BeginRulesetGrammar and Declaration will return the at-rule components, ruleset selector and declaration values respectively. +func (p *Parser) Values() []Token { + return p.buf +} + +func (p *Parser) popToken(allowComment bool) (TokenType, []byte) { + p.prevWS = false + p.prevComment = false + tt, data := p.l.Next() + for !p.keepWS && tt == WhitespaceToken || tt == CommentToken { + if tt == WhitespaceToken { + p.prevWS = true + } else { + p.prevComment = true + if allowComment && len(p.state) == 1 { + break + } + } + tt, data = p.l.Next() + } + return tt, data +} + +func (p *Parser) initBuf() { + p.buf = p.buf[:0] +} + +func (p *Parser) pushBuf(tt TokenType, data []byte) { + p.buf = append(p.buf, Token{tt, data}) +} + +//////////////////////////////////////////////////////////////// + +func (p *Parser) parseStylesheet() GrammarType { + if p.tt == CDOToken || p.tt == CDCToken { + return TokenGrammar + } else if p.tt == AtKeywordToken { + return p.parseAtRule() + } else if p.tt == CommentToken { + return CommentGrammar + } else if p.tt == ErrorToken { + return ErrorGrammar + } + return p.parseQualifiedRule() +} + +func (p *Parser) parseDeclarationList() GrammarType { + if p.tt == CommentToken { + p.tt, p.data = p.popToken(false) + } + for p.tt == SemicolonToken { + p.tt, p.data = p.popToken(false) + } + + // IE hack: *color:red; + if p.tt == DelimToken && p.data[0] == '*' { + tt, data := p.popToken(false) + p.tt = tt + p.data = append(p.data, data...) + } + + if p.tt == ErrorToken { + return ErrorGrammar + } else if p.tt == AtKeywordToken { + return p.parseAtRule() + } else if p.tt == IdentToken || p.tt == DelimToken { + return p.parseDeclaration() + } else if p.tt == CustomPropertyNameToken { + return p.parseCustomProperty() + } + + // parse error + p.initBuf() + p.l.r.Move(-len(p.data)) + p.err, p.errPos = fmt.Sprintf("unexpected token '%s' in declaration", string(p.data)), p.l.r.Offset() + p.l.r.Move(len(p.data)) + + if p.tt == RightBraceToken { + // right brace token will occur when we've had a decl error that ended in a right brace token + // as these are not handled by decl error, we handle it here explicitly. Normally its used to end eg. the qual rule. + p.pushBuf(p.tt, p.data) + return ErrorGrammar + } + return p.parseDeclarationError(p.tt, p.data) +} + +//////////////////////////////////////////////////////////////// + +func (p *Parser) parseAtRule() GrammarType { + p.initBuf() + p.data = parse.ToLower(parse.Copy(p.data)) + atRuleName := p.data + if len(atRuleName) > 0 && atRuleName[1] == '-' { + if i := bytes.IndexByte(atRuleName[2:], '-'); i != -1 { + atRuleName = atRuleName[i+2:] // skip vendor specific prefix + } + } + atRule := ToHash(atRuleName[1:]) + + first := true + skipWS := false + for { + tt, data := p.popToken(false) + if tt == LeftBraceToken && p.level == 0 { + if atRule == Font_Face || atRule == Page { + p.state = append(p.state, (*Parser).parseAtRuleDeclarationList) + } else if atRule == Document || atRule == Keyframes || atRule == Media || atRule == Supports { + p.state = append(p.state, (*Parser).parseAtRuleRuleList) + } else { + p.state = append(p.state, (*Parser).parseAtRuleUnknown) + } + return BeginAtRuleGrammar + } else if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { + p.prevEnd = (tt == RightBraceToken) + return AtRuleGrammar + } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { + p.level++ + } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { + if p.level == 0 { + // TODO: buggy + p.pushBuf(tt, data) + if 1 < len(p.state) { + p.state = p.state[:len(p.state)-1] + } + p.err, p.errPos = "unexpected ending in at rule", p.l.r.Offset() + return ErrorGrammar + } + p.level-- + } + if first { + if tt == LeftParenthesisToken || tt == LeftBracketToken { + p.prevWS = false + } + first = false + } + if len(data) == 1 && (data[0] == ',' || data[0] == ':') { + skipWS = true + } else if p.prevWS && !skipWS && tt != RightParenthesisToken { + p.pushBuf(WhitespaceToken, wsBytes) + } else { + skipWS = false + } + if tt == LeftParenthesisToken { + skipWS = true + } + p.pushBuf(tt, data) + } +} + +func (p *Parser) parseAtRuleRuleList() GrammarType { + if p.tt == RightBraceToken || p.tt == ErrorToken { + p.state = p.state[:len(p.state)-1] + return EndAtRuleGrammar + } else if p.tt == AtKeywordToken { + return p.parseAtRule() + } else { + return p.parseQualifiedRule() + } +} + +func (p *Parser) parseAtRuleDeclarationList() GrammarType { + for p.tt == SemicolonToken { + p.tt, p.data = p.popToken(false) + } + if p.tt == RightBraceToken || p.tt == ErrorToken { + p.state = p.state[:len(p.state)-1] + return EndAtRuleGrammar + } + return p.parseDeclarationList() +} + +func (p *Parser) parseAtRuleUnknown() GrammarType { + p.keepWS = true + if p.tt == RightBraceToken && p.level == 0 || p.tt == ErrorToken { + p.state = p.state[:len(p.state)-1] + p.keepWS = false + return EndAtRuleGrammar + } + if p.tt == LeftParenthesisToken || p.tt == LeftBraceToken || p.tt == LeftBracketToken || p.tt == FunctionToken { + p.level++ + } else if p.tt == RightParenthesisToken || p.tt == RightBraceToken || p.tt == RightBracketToken { + p.level-- + } + return TokenGrammar +} + +func (p *Parser) parseQualifiedRule() GrammarType { + p.initBuf() + first := true + inAttrSel := false + skipWS := true + var tt TokenType + var data []byte + for { + if first { + tt, data = p.tt, p.data + p.tt = WhitespaceToken + p.data = emptyBytes + first = false + } else { + tt, data = p.popToken(false) + } + if tt == LeftBraceToken && p.level == 0 { + p.state = append(p.state, (*Parser).parseQualifiedRuleDeclarationList) + return BeginRulesetGrammar + } else if tt == ErrorToken { + p.err, p.errPos = "unexpected ending in qualified rule", p.l.r.Offset() + return ErrorGrammar + } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { + p.level++ + } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { + if p.level == 0 { + // TODO: buggy + p.pushBuf(tt, data) + if 1 < len(p.state) { + p.state = p.state[:len(p.state)-1] + } + p.err, p.errPos = "unexpected ending in qualified rule", p.l.r.Offset() + return ErrorGrammar + } + p.level-- + } + if len(data) == 1 && (data[0] == ',' || data[0] == '>' || data[0] == '+' || data[0] == '~') { + if data[0] == ',' { + return QualifiedRuleGrammar + } + skipWS = true + } else if p.prevWS && !skipWS && !inAttrSel { + p.pushBuf(WhitespaceToken, wsBytes) + } else { + skipWS = false + } + if tt == LeftBracketToken { + inAttrSel = true + } else if tt == RightBracketToken { + inAttrSel = false + } + p.pushBuf(tt, data) + } +} + +func (p *Parser) parseQualifiedRuleDeclarationList() GrammarType { + for p.tt == SemicolonToken { + p.tt, p.data = p.popToken(false) + } + if p.tt == RightBraceToken || p.tt == ErrorToken { + p.state = p.state[:len(p.state)-1] + return EndRulesetGrammar + } + return p.parseDeclarationList() +} + +func (p *Parser) parseDeclaration() GrammarType { + p.initBuf() + p.data = parse.ToLower(parse.Copy(p.data)) + + ttName, dataName := p.tt, p.data + tt, data := p.popToken(false) + if tt != ColonToken { + p.l.r.Move(-len(data)) + p.err, p.errPos = "expected colon in declaration", p.l.r.Offset() + p.l.r.Move(len(data)) + p.pushBuf(ttName, dataName) + return p.parseDeclarationError(tt, data) + } + + skipWS := true + for { + tt, data := p.popToken(false) + if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { + p.prevEnd = (tt == RightBraceToken) + return DeclarationGrammar + } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { + p.level++ + } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { + if p.level == 0 { + // TODO: buggy + p.err, p.errPos = "unexpected ending in declaration", p.l.r.Offset() + p.pushBuf(ttName, dataName) + p.pushBuf(ColonToken, []byte{':'}) + return p.parseDeclarationError(tt, data) + } + p.level-- + } + if len(data) == 1 && (data[0] == ',' || data[0] == '/' || data[0] == ':' || data[0] == '!' || data[0] == '=') { + skipWS = true + } else if (p.prevWS || p.prevComment) && !skipWS { + p.pushBuf(WhitespaceToken, wsBytes) + } else { + skipWS = false + } + p.pushBuf(tt, data) + } +} + +func (p *Parser) parseDeclarationError(tt TokenType, data []byte) GrammarType { + // we're on the offending (tt,data), keep popping tokens till we reach ;, }, or EOF + p.tt, p.data = tt, data + for { + if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { + p.prevEnd = (tt == RightBraceToken) + if tt == SemicolonToken { + p.pushBuf(tt, data) + } + return ErrorGrammar + } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { + p.level++ + } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { + p.level-- + } + + if p.prevWS { + p.pushBuf(WhitespaceToken, wsBytes) + } + p.pushBuf(tt, data) + + tt, data = p.popToken(false) + } +} + +func (p *Parser) parseCustomProperty() GrammarType { + p.initBuf() + if tt, data := p.popToken(false); tt != ColonToken { + p.l.r.Move(-len(data)) + p.err, p.errPos = "expected colon in custom property", p.l.r.Offset() + p.l.r.Move(len(data)) + return ErrorGrammar + } + val := []byte{} + for { + tt, data := p.l.Next() + if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { + p.prevEnd = (tt == RightBraceToken) + p.pushBuf(CustomPropertyValueToken, val) + return CustomPropertyGrammar + } else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { + p.level++ + } else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { + if p.level == 0 { + // TODO: buggy + p.pushBuf(tt, data) + p.err, p.errPos = "unexpected ending in custom property", p.l.r.Offset() + return ErrorGrammar + } + p.level-- + } + val = append(val, data...) + } +} diff --git a/vendor/github.com/tdewolff/parse/v2/css/util.go b/vendor/github.com/tdewolff/parse/v2/css/util.go new file mode 100644 index 000000000..20b99a711 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/css/util.go @@ -0,0 +1,47 @@ +package css + +import "github.com/tdewolff/parse/v2" + +// IsIdent returns true if the bytes are a valid identifier. +func IsIdent(b []byte) bool { + l := NewLexer(parse.NewInputBytes(b)) + l.consumeIdentToken() + l.r.Restore() + return l.r.Pos() == len(b) +} + +// IsURLUnquoted returns true if the bytes are a valid unquoted URL. +func IsURLUnquoted(b []byte) bool { + l := NewLexer(parse.NewInputBytes(b)) + l.consumeUnquotedURL() + l.r.Restore() + return l.r.Pos() == len(b) +} + +// HSL2RGB converts HSL to RGB with all of range [0,1] +// from http://www.w3.org/TR/css3-color/#hsl-color +func HSL2RGB(h, s, l float64) (float64, float64, float64) { + m2 := l * (s + 1) + if l > 0.5 { + m2 = l + s - l*s + } + m1 := l*2 - m2 + return hue2rgb(m1, m2, h+1.0/3.0), hue2rgb(m1, m2, h), hue2rgb(m1, m2, h-1.0/3.0) +} + +func hue2rgb(m1, m2, h float64) float64 { + if h < 0.0 { + h += 1.0 + } + if h > 1.0 { + h -= 1.0 + } + if h*6.0 < 1.0 { + return m1 + (m2-m1)*h*6.0 + } else if h*2.0 < 1.0 { + return m2 + } else if h*3.0 < 2.0 { + return m1 + (m2-m1)*(2.0/3.0-h)*6.0 + } + return m1 +} diff --git a/vendor/github.com/tdewolff/parse/v2/html/lex.go b/vendor/github.com/tdewolff/parse/v2/html/lex.go index e3cb9bd04..c000edccc 100644 --- a/vendor/github.com/tdewolff/parse/v2/html/lex.go +++ b/vendor/github.com/tdewolff/parse/v2/html/lex.go @@ -166,6 +166,7 @@ func (l *Lexer) Next() (TokenType, []byte) { isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 || l.r.PeekErr(2) == nil) if !isEndTag && (c < 'a' || 'z' < c) && (c < 'A' || 'Z' < c) && c != '!' && c != '?' { // not a tag + l.r.Move(1) } else if 0 < l.r.Pos() { // return currently buffered texttoken so that we can return tag next iteration l.text = l.r.Shift() @@ -202,8 +203,9 @@ func (l *Lexer) Next() (TokenType, []byte) { return TextToken, l.text } return ErrorToken, nil + } else { + l.r.Move(1) } - l.r.Move(1) } } @@ -539,19 +541,19 @@ func (l *Lexer) shiftXML(rawTag Hash) []byte { func (l *Lexer) moveTemplate() { for { - if c := l.r.Peek(0); l.at(l.tmplEnd...) || c == 0 && l.r.Err() != nil { - if c != 0 { - l.r.Move(len(l.tmplEnd)) - } - break + if c := l.r.Peek(0); c == 0 && l.r.Err() != nil { + return + } else if l.at(l.tmplEnd...) { + l.r.Move(len(l.tmplEnd)) + return } else if c == '"' || c == '\'' { l.r.Move(1) escape := false for { - if c2 := l.r.Peek(0); !escape && c2 == c || c2 == 0 && l.r.Err() != nil { - if c2 != 0 { - l.r.Move(1) - } + if c2 := l.r.Peek(0); c2 == 0 && l.r.Err() != nil { + return + } else if !escape && c2 == c { + l.r.Move(1) break } else if c2 == '\\' { escape = !escape diff --git a/vendor/github.com/tdewolff/parse/v2/html/parse.go b/vendor/github.com/tdewolff/parse/v2/html/parse.go new file mode 100644 index 000000000..b7e1ba3dd --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/html/parse.go @@ -0,0 +1,403 @@ +package html + +import ( + "bytes" + "fmt" + "io" + "strings" + + "github.com/tdewolff/parse/v2" + "github.com/tdewolff/parse/v2/css" +) + +type AST struct { + Children []*Tag + Text []byte +} + +func (ast *AST) String() string { + sb := strings.Builder{} + for i, child := range ast.Children { + if i != 0 { + sb.WriteString("\n") + } + sb.WriteString(child.ASTString()) + } + return sb.String() +} + +type Attr struct { + Key, Val []byte +} + +func (attr *Attr) String() string { + return fmt.Sprintf(`%s="%s"`, string(attr.Key), string(attr.Val)) +} + +type Tag struct { + Root *AST + Parent *Tag + Prev, Next *Tag + Children []*Tag + Index int + + Name []byte + Attrs []Attr + textStart, textEnd int +} + +func (tag *Tag) getAttr(key []byte) ([]byte, bool) { + for _, attr := range tag.Attrs { + if bytes.Equal(key, attr.Key) { + return attr.Val, true + } + } + return nil, false +} + +func (tag *Tag) GetAttr(key string) (string, bool) { + val, ok := tag.getAttr([]byte(key)) + return string(val), ok +} + +func (tag *Tag) Text() string { + return string(tag.Root.Text[tag.textStart:tag.textEnd]) +} + +func (tag *Tag) String() string { + sb := strings.Builder{} + sb.WriteString("<") + sb.Write(tag.Name) + for _, attr := range tag.Attrs { + sb.WriteString(" ") + sb.WriteString(attr.String()) + } + sb.WriteString(">") + return sb.String() +} + +func (tag *Tag) ASTString() string { + sb := strings.Builder{} + sb.WriteString(tag.String()) + for _, child := range tag.Children { + sb.WriteString("\n ") + s := child.ASTString() + s = strings.ReplaceAll(s, "\n", "\n ") + sb.WriteString(s) + } + return sb.String() +} + +func Parse(r *parse.Input) (*AST, error) { + ast := &AST{} + root := &Tag{} + cur := root + + l := NewLexer(r) + for { + tt, data := l.Next() + switch tt { + case ErrorToken: + if err := l.Err(); err != io.EOF { + return nil, err + } + ast.Children = root.Children + return ast, nil + case TextToken: + ast.Text = append(ast.Text, data...) + case StartTagToken: + child := &Tag{ + Root: ast, + Parent: cur, + Index: len(cur.Children), + Name: l.Text(), + textStart: len(ast.Text), + } + if 0 < len(cur.Children) { + child.Prev = cur.Children[len(cur.Children)-1] + child.Prev.Next = child + } + cur.Children = append(cur.Children, child) + cur = child + case AttributeToken: + val := l.AttrVal() + if 0 < len(val) && (val[0] == '"' || val[0] == '\'') { + val = val[1 : len(val)-1] + } + cur.Attrs = append(cur.Attrs, Attr{l.AttrKey(), val}) + case StartTagCloseToken: + if voidTags[string(cur.Name)] { + cur.textEnd = len(ast.Text) + cur = cur.Parent + } + case EndTagToken, StartTagVoidToken: + start := cur + for start != root && !bytes.Equal(l.Text(), start.Name) { + start = start.Parent + } + if start == root { + // ignore + } else { + parent := start.Parent + for cur != parent { + cur.textEnd = len(ast.Text) + cur = cur.Parent + } + } + } + } +} + +func (ast *AST) Query(s string) (*Tag, error) { + sel, err := ParseSelector(s) + if err != nil { + return nil, err + } + + for _, child := range ast.Children { + if match := child.query(sel); match != nil { + return match, nil + } + } + return nil, nil +} + +func (tag *Tag) query(sel selector) *Tag { + if sel.AppliesTo(tag) { + return tag + } + for _, child := range tag.Children { + if match := child.query(sel); match != nil { + return match + } + } + return nil +} + +func (ast *AST) QueryAll(s string) ([]*Tag, error) { + sel, err := ParseSelector(s) + if err != nil { + return nil, err + } + + matches := []*Tag{} + for _, child := range ast.Children { + child.queryAll(&matches, sel) + } + return matches, nil +} + +func (tag *Tag) queryAll(matches *[]*Tag, sel selector) { + if sel.AppliesTo(tag) { + *matches = append(*matches, tag) + } + for _, child := range tag.Children { + child.queryAll(matches, sel) + } +} + +type attrSelector struct { + op byte // empty, =, ~, | + attr []byte + val []byte +} + +func (sel attrSelector) AppliesTo(tag *Tag) bool { + val, ok := tag.getAttr(sel.attr) + if !ok { + return false + } + + switch sel.op { + case 0: + return true + case '=': + return bytes.Equal(val, sel.val) + case '~': + if 0 < len(sel.val) { + vals := bytes.Split(val, []byte(" ")) + for _, val := range vals { + if bytes.Equal(val, sel.val) { + return true + } + } + } + case '|': + return bytes.Equal(val, sel.val) || bytes.HasPrefix(val, append(sel.val, '-')) + } + return false +} + +func (attr attrSelector) String() string { + sb := strings.Builder{} + sb.Write(attr.attr) + if attr.op != 0 { + sb.WriteByte(attr.op) + if attr.op != '=' { + sb.WriteByte('=') + } + sb.WriteByte('"') + sb.Write(attr.val) + sb.WriteByte('"') + } + return sb.String() +} + +type selectorNode struct { + typ []byte // is * for universal + attrs []attrSelector + op byte // space or >, last is NULL +} + +func (sel selectorNode) AppliesTo(tag *Tag) bool { + if 0 < len(sel.typ) && !bytes.Equal(sel.typ, []byte("*")) && !bytes.Equal(sel.typ, tag.Name) { + return false + } + for _, attr := range sel.attrs { + if !attr.AppliesTo(tag) { + return false + } + } + return true +} + +func (sel selectorNode) String() string { + sb := strings.Builder{} + sb.Write(sel.typ) + for _, attr := range sel.attrs { + if bytes.Equal(attr.attr, []byte("id")) && attr.op == '=' { + sb.WriteByte('#') + sb.Write(attr.val) + } else if bytes.Equal(attr.attr, []byte("class")) && attr.op == '~' { + sb.WriteByte('.') + sb.Write(attr.val) + } else { + sb.WriteByte('[') + sb.WriteString(attr.String()) + sb.WriteByte(']') + } + } + if sel.op != 0 { + sb.WriteByte(' ') + sb.WriteByte(sel.op) + sb.WriteByte(' ') + } + return sb.String() +} + +type token struct { + tt css.TokenType + data []byte +} + +type selector []selectorNode + +func ParseSelector(s string) (selector, error) { + ts := []token{} + l := css.NewLexer(parse.NewInputString(s)) + for { + tt, data := l.Next() + if tt == css.ErrorToken { + if err := l.Err(); err != io.EOF { + return selector{}, err + } + break + } + ts = append(ts, token{ + tt: tt, + data: data, + }) + } + + sel := selector{} + node := selectorNode{} + for i := 0; i < len(ts); i++ { + t := ts[i] + if 0 < i && (t.tt == css.WhitespaceToken || t.tt == css.DelimToken && t.data[0] == '>') { + if t.tt == css.DelimToken { + node.op = '>' + } else { + node.op = ' ' + } + sel = append(sel, node) + node = selectorNode{} + } else if t.tt == css.IdentToken || t.tt == css.DelimToken && t.data[0] == '*' { + node.typ = t.data + } else if t.tt == css.DelimToken && (t.data[0] == '.' || t.data[0] == '#') && i+1 < len(ts) && ts[i+1].tt == css.IdentToken { + if t.data[0] == '#' { + node.attrs = append(node.attrs, attrSelector{op: '=', attr: []byte("id"), val: ts[i+1].data}) + } else { + node.attrs = append(node.attrs, attrSelector{op: '~', attr: []byte("class"), val: ts[i+1].data}) + } + i++ + } else if t.tt == css.DelimToken && t.data[0] == '[' && i+2 < len(ts) && ts[i+1].tt == css.IdentToken && ts[i+2].tt == css.DelimToken { + if ts[i+2].data[0] == ']' { + node.attrs = append(node.attrs, attrSelector{op: 0, attr: ts[i+1].data}) + i += 2 + } else if i+4 < len(ts) && ts[i+3].tt == css.IdentToken && ts[i+4].tt == css.DelimToken && ts[i+4].data[0] == ']' { + node.attrs = append(node.attrs, attrSelector{op: ts[i+2].data[0], attr: ts[i+1].data, val: ts[i+3].data}) + i += 4 + } + } + } + sel = append(sel, node) + return sel, nil +} + +func (sels selector) AppliesTo(tag *Tag) bool { + if len(sels) == 0 { + return true + } else if !sels[len(sels)-1].AppliesTo(tag) { + return false + } + + tag = tag.Parent + isel := len(sels) - 2 + for 0 <= isel && tag != nil { + switch sels[isel].op { + case ' ': + for tag != nil { + if sels[isel].AppliesTo(tag) { + break + } + tag = tag.Parent + } + case '>': + if !sels[isel].AppliesTo(tag) { + return false + } + tag = tag.Parent + default: + return false + } + isel-- + } + return len(sels) != 0 && isel == -1 +} + +func (sels selector) String() string { + if len(sels) == 0 { + return "" + } + sb := strings.Builder{} + for _, sel := range sels { + sb.WriteString(sel.String()) + } + return sb.String()[1:] +} + +var voidTags = map[string]bool{ + "area": true, + "base": true, + "br": true, + "col": true, + "embed": true, + "hr": true, + "img": true, + "input": true, + "link": true, + "meta": true, + "source": true, + "track": true, + "wbr": true, +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 4f7b1eae4..8bd0cfa10 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -672,14 +672,15 @@ github.com/superseriousbusiness/oauth2/v4/generates github.com/superseriousbusiness/oauth2/v4/manage github.com/superseriousbusiness/oauth2/v4/models github.com/superseriousbusiness/oauth2/v4/server -# github.com/tdewolff/minify/v2 v2.20.9 +# github.com/tdewolff/minify/v2 v2.20.12 ## explicit; go 1.18 github.com/tdewolff/minify/v2 github.com/tdewolff/minify/v2/html -# github.com/tdewolff/parse/v2 v2.7.6 +# github.com/tdewolff/parse/v2 v2.7.7 ## explicit; go 1.13 github.com/tdewolff/parse/v2 github.com/tdewolff/parse/v2/buffer +github.com/tdewolff/parse/v2/css github.com/tdewolff/parse/v2/html github.com/tdewolff/parse/v2/strconv # github.com/technologize/otel-go-contrib v1.1.0