// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package strs provides string manipulation functionality specific to protobuf. package strs import ( "go/token" "strings" "unicode" "unicode/utf8" "google.golang.org/protobuf/internal/flags" "google.golang.org/protobuf/reflect/protoreflect" ) // EnforceUTF8 reports whether to enforce strict UTF-8 validation. func EnforceUTF8(fd protoreflect.FieldDescriptor) bool { if flags.ProtoLegacy || fd.Syntax() == protoreflect.Editions { if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok { return fd.EnforceUTF8() } } return fd.Syntax() == protoreflect.Proto3 } // GoCamelCase camel-cases a protobuf name for use as a Go identifier. // // If there is an interior underscore followed by a lower case letter, // drop the underscore and convert the letter to upper case. func GoCamelCase(s string) string { // Invariant: if the next letter is lower case, it must be converted // to upper case. // That is, we process a word at a time, where words are marked by _ or // upper case letter. Digits are treated as words. var b []byte for i := 0; i < len(s); i++ { c := s[i] switch { case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]): // Skip over '.' in ".{{lowercase}}". case c == '.': b = append(b, '_') // convert '.' to '_' case c == '_' && (i == 0 || s[i-1] == '.'): // Convert initial '_' to ensure we start with a capital letter. // Do the same for '_' after '.' to match historic behavior. b = append(b, 'X') // convert '_' to 'X' case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]): // Skip over '_' in "_{{lowercase}}". case isASCIIDigit(c): b = append(b, c) default: // Assume we have a letter now - if not, it's a bogus identifier. // The next word is a sequence of characters that must start upper case. if isASCIILower(c) { c -= 'a' - 'A' // convert lowercase to uppercase } b = append(b, c) // Accept lower case sequence that follows. for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ { b = append(b, s[i+1]) } } } return string(b) } // GoSanitized converts a string to a valid Go identifier. func GoSanitized(s string) string { // Sanitize the input to the set of valid characters, // which must be '_' or be in the Unicode L or N categories. s = strings.Map(func(r rune) rune { if unicode.IsLetter(r) || unicode.IsDigit(r) { return r } return '_' }, s) // Prepend '_' in the event of a Go keyword conflict or if // the identifier is invalid (does not start in the Unicode L category). r, _ := utf8.DecodeRuneInString(s) if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) { return "_" + s } return s } // JSONCamelCase converts a snake_case identifier to a camelCase identifier, // according to the protobuf JSON specification. func JSONCamelCase(s string) string { var b []byte var wasUnderscore bool for i := 0; i < len(s); i++ { // proto identifiers are always ASCII c := s[i] if c != '_' { if wasUnderscore && isASCIILower(c) { c -= 'a' - 'A' // convert to uppercase } b = append(b, c) } wasUnderscore = c == '_' } return string(b) } // JSONSnakeCase converts a camelCase identifier to a snake_case identifier, // according to the protobuf JSON specification. func JSONSnakeCase(s string) string { var b []byte for i := 0; i < len(s); i++ { // proto identifiers are always ASCII c := s[i] if isASCIIUpper(c) { b = append(b, '_') c += 'a' - 'A' // convert to lowercase } b = append(b, c) } return string(b) } // MapEntryName derives the name of the map entry message given the field name. // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057 func MapEntryName(s string) string { var b []byte upperNext := true for _, c := range s { switch { case c == '_': upperNext = true case upperNext: b = append(b, byte(unicode.ToUpper(c))) upperNext = false default: b = append(b, byte(c)) } } b = append(b, "Entry"...) return string(b) } // EnumValueName derives the camel-cased enum value name. // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313 func EnumValueName(s string) string { var b []byte upperNext := true for _, c := range s { switch { case c == '_': upperNext = true case upperNext: b = append(b, byte(unicode.ToUpper(c))) upperNext = false default: b = append(b, byte(unicode.ToLower(c))) upperNext = false } } return string(b) } // TrimEnumPrefix trims the enum name prefix from an enum value name, // where the prefix is all lowercase without underscores. // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375 func TrimEnumPrefix(s, prefix string) string { s0 := s // original input for len(s) > 0 && len(prefix) > 0 { if s[0] == '_' { s = s[1:] continue } if unicode.ToLower(rune(s[0])) != rune(prefix[0]) { return s0 // no prefix match } s, prefix = s[1:], prefix[1:] } if len(prefix) > 0 { return s0 // no prefix match } s = strings.TrimLeft(s, "_") if len(s) == 0 { return s0 // avoid returning empty string } return s } func isASCIILower(c byte) bool { return 'a' <= c && c <= 'z' } func isASCIIUpper(c byte) bool { return 'A' <= c && c <= 'Z' } func isASCIIDigit(c byte) bool { return '0' <= c && c <= '9' }