[feature] make nollamas difficulty configurable (#4119)

Makes the NoLLaMas proof-of-work scraper deterrence difficulty configurable.

Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4119
Co-authored-by: kim <grufwub@gmail.com>
Co-committed-by: kim <grufwub@gmail.com>
This commit is contained in:
kim
2025-05-07 11:59:39 +00:00
committed by kim
parent 1f0c261fd2
commit 4c96e2571d
11 changed files with 161 additions and 75 deletions

View File

@@ -1,7 +1,9 @@
# Scraper Deterrence
GoToSocial provides an optional proof-of-work based scraper and automated HTTP client deterrence that can be enabled on profile and status web views. The way
it works is that it generates a unique but deterministic challenge for each incoming HTTP request based on client information and current time, that-is a hex encoded SHA256 hash, and asks the client to find an addition to a portion of this that will generate a hex encoded SHA256 hash with at least 4 leading '0' characters. This is served to the client as a minimal holding page with a single JavaScript worker that computes a solution to this.
it works is that it generates a unique but deterministic challenge for each incoming HTTP request based on client information and current time, that-is a hex encoded SHA256 hash, and asks the client to find an addition to a portion of this that will generate a hex encoded SHA256 hash with a pre-determined number of leading '0' characters. This is served to the client as a minimal holding page with a single JavaScript worker that computes a solution to this.
The number of required leading '0' characters can be configured to your liking, where higher values take longer to solve, and lower values take less. But this is not exact, as the challenges themselves are random, so you can only effect the **average amount of time** it may take. If your challenges take too long to solve, you may deter users from accessing your web pages. And conversely, the longer it takes for a solution to be found, the more you'll be incurring costs for scrapers (and in some cases, causing their operation to time-out). That balance is up to you to configure, hence why this is an advanced feature.
Once a solution to this challenge has been provided, by refreshing the page with the solution in the query parameter, GoToSocial will verify this solution and on success will return the expected profile / status page with a cookie that provides challenge-less access to the instance for up-to the next hour.

View File

@@ -1264,4 +1264,23 @@ advanced-header-filter-mode: ""
#
# Options: [true, false]
# Default: true
advanced-scraper-deterrence: false
advanced-scraper-deterrence-enabled: false
# Uint. Allows tweaking the difficulty of the proof-of-work algorithm
# used in the scraper deterrence. This determines how many leading '0'
# characters are required to be generated in each solution. Higher
# values will on-average take longer to find solutions for, and the
# inverse is also true.
#
# The downside is that if your deterrence takes too long to solve,
# it may deter some users from viewing your web status / profile page.
# And conversely, the longer it takes for a solution to be found, the
# more you'll be incurring increased CPU usage for scrapers, and possibly
# even cause their operation to time out before completion.
#
# For more details please check the documentation at:
# https://docs.gotosocial.org/en/latest/advanced/scraper_deterrence
#
# Examples: [3, 4, 5]
# Default: 4
advanced-scraper-deterrence-difficulty: 4

View File

@@ -258,9 +258,9 @@ type AdvancedConfig struct {
SenderMultiplier int `name:"sender-multiplier" usage:"Multiplier to use per cpu for batching outgoing fedi messages. 0 or less turns batching off (not recommended)."`
CSPExtraURIs []string `name:"csp-extra-uris" usage:"Additional URIs to allow when building content-security-policy for media + images."`
HeaderFilterMode string `name:"header-filter-mode" usage:"Set incoming request header filtering mode."`
ScraperDeterrence bool `name:"scraper-deterrence" usage:"Enable proof-of-work based scraper deterrence on profile / status pages"`
RateLimit RateLimitConfig `name:"rate-limit"`
Throttling ThrottlingConfig `name:"throttling"`
ScraperDeterrence ScraperDeterrenceConfig `name:"scraper-deterrence"`
}
type RateLimitConfig struct {
@@ -273,7 +273,7 @@ type ThrottlingConfig struct {
RetryAfter time.Duration `name:"retry-after" usage:"Retry-After duration response to send for throttled requests."`
}
// type ScraperDeterrenceConfig struct {
// Enabled bool `name:"enabled" usage:"Enable proof-of-work based scraper deterrence on profile / status pages"`
// Difficulty uint8 `name:"difficulty" usage:"The proof-of-work difficulty, which determines how many leading zeros to try solve in hash solutions."`
// }
type ScraperDeterrenceConfig struct {
Enabled bool `name:"enabled" usage:"Enable proof-of-work based scraper deterrence on profile / status pages"`
Difficulty uint8 `name:"difficulty" usage:"The proof-of-work difficulty, which determines how many leading zeros to try solve in hash solutions."`
}

View File

@@ -175,7 +175,7 @@ func TestCLIParsing(t *testing.T) {
"--config-path", "testdata/test3.yaml",
},
expected: expectedKV(
kv.Field{"advanced-scraper-deterrence", true},
kv.Field{"advanced-scraper-deterrence-enabled", true},
kv.Field{"advanced-rate-limit-requests", 5000},
),
},

View File

@@ -135,7 +135,6 @@ var Defaults = Configuration{
CSPExtraURIs: []string{},
HeaderFilterMode: RequestHeaderFilterModeDisabled,
CookiesSamesite: "lax",
ScraperDeterrence: false,
RateLimit: RateLimitConfig{
Requests: 300, // 1 per second per 5 minutes
@@ -146,6 +145,11 @@ var Defaults = Configuration{
Multiplier: 8, // 8 open requests per CPU
RetryAfter: 30 * time.Second,
},
ScraperDeterrence: ScraperDeterrenceConfig{
Enabled: false,
Difficulty: 4,
},
},
Cache: CacheConfiguration{

View File

@@ -136,11 +136,12 @@ func (cfg *Configuration) RegisterFlags(flags *pflag.FlagSet) {
flags.Int("advanced-sender-multiplier", cfg.Advanced.SenderMultiplier, "Multiplier to use per cpu for batching outgoing fedi messages. 0 or less turns batching off (not recommended).")
flags.StringSlice("advanced-csp-extra-uris", cfg.Advanced.CSPExtraURIs, "Additional URIs to allow when building content-security-policy for media + images.")
flags.String("advanced-header-filter-mode", cfg.Advanced.HeaderFilterMode, "Set incoming request header filtering mode.")
flags.Bool("advanced-scraper-deterrence", cfg.Advanced.ScraperDeterrence, "Enable proof-of-work based scraper deterrence on profile / status pages")
flags.Int("advanced-rate-limit-requests", cfg.Advanced.RateLimit.Requests, "Amount of HTTP requests to permit within a 5 minute window. 0 or less turns rate limiting off.")
flags.StringSlice("advanced-rate-limit-exceptions", cfg.Advanced.RateLimit.Exceptions.Strings(), "Slice of CIDRs to exclude from rate limit restrictions.")
flags.Int("advanced-throttling-multiplier", cfg.Advanced.Throttling.Multiplier, "Multiplier to use per cpu for http request throttling. 0 or less turns throttling off.")
flags.Duration("advanced-throttling-retry-after", cfg.Advanced.Throttling.RetryAfter, "Retry-After duration response to send for throttled requests.")
flags.Bool("advanced-scraper-deterrence-enabled", cfg.Advanced.ScraperDeterrence.Enabled, "Enable proof-of-work based scraper deterrence on profile / status pages")
flags.Uint8("advanced-scraper-deterrence-difficulty", cfg.Advanced.ScraperDeterrence.Difficulty, "The proof-of-work difficulty, which determines how many leading zeros to try solve in hash solutions.")
flags.StringSlice("http-client-allow-ips", cfg.HTTPClient.AllowIPs, "")
flags.StringSlice("http-client-block-ips", cfg.HTTPClient.BlockIPs, "")
flags.Duration("http-client-timeout", cfg.HTTPClient.Timeout, "")
@@ -205,7 +206,7 @@ func (cfg *Configuration) RegisterFlags(flags *pflag.FlagSet) {
}
func (cfg *Configuration) MarshalMap() map[string]any {
cfgmap := make(map[string]any, 180)
cfgmap := make(map[string]any, 181)
cfgmap["log-level"] = cfg.LogLevel
cfgmap["log-timestamp-format"] = cfg.LogTimestampFormat
cfgmap["log-db-queries"] = cfg.LogDbQueries
@@ -313,11 +314,12 @@ func (cfg *Configuration) MarshalMap() map[string]any {
cfgmap["advanced-sender-multiplier"] = cfg.Advanced.SenderMultiplier
cfgmap["advanced-csp-extra-uris"] = cfg.Advanced.CSPExtraURIs
cfgmap["advanced-header-filter-mode"] = cfg.Advanced.HeaderFilterMode
cfgmap["advanced-scraper-deterrence"] = cfg.Advanced.ScraperDeterrence
cfgmap["advanced-rate-limit-requests"] = cfg.Advanced.RateLimit.Requests
cfgmap["advanced-rate-limit-exceptions"] = cfg.Advanced.RateLimit.Exceptions.Strings()
cfgmap["advanced-throttling-multiplier"] = cfg.Advanced.Throttling.Multiplier
cfgmap["advanced-throttling-retry-after"] = cfg.Advanced.Throttling.RetryAfter
cfgmap["advanced-scraper-deterrence-enabled"] = cfg.Advanced.ScraperDeterrence.Enabled
cfgmap["advanced-scraper-deterrence-difficulty"] = cfg.Advanced.ScraperDeterrence.Difficulty
cfgmap["http-client-allow-ips"] = cfg.HTTPClient.AllowIPs
cfgmap["http-client-block-ips"] = cfg.HTTPClient.BlockIPs
cfgmap["http-client-timeout"] = cfg.HTTPClient.Timeout
@@ -1277,14 +1279,6 @@ func (cfg *Configuration) UnmarshalMap(cfgmap map[string]any) error {
}
}
if ival, ok := cfgmap["advanced-scraper-deterrence"]; ok {
var err error
cfg.Advanced.ScraperDeterrence, err = cast.ToBoolE(ival)
if err != nil {
return fmt.Errorf("error casting %#v -> bool for 'advanced-scraper-deterrence': %w", ival, err)
}
}
if ival, ok := cfgmap["advanced-rate-limit-requests"]; ok {
var err error
cfg.Advanced.RateLimit.Requests, err = cast.ToIntE(ival)
@@ -1322,6 +1316,22 @@ func (cfg *Configuration) UnmarshalMap(cfgmap map[string]any) error {
}
}
if ival, ok := cfgmap["advanced-scraper-deterrence-enabled"]; ok {
var err error
cfg.Advanced.ScraperDeterrence.Enabled, err = cast.ToBoolE(ival)
if err != nil {
return fmt.Errorf("error casting %#v -> bool for 'advanced-scraper-deterrence-enabled': %w", ival, err)
}
}
if ival, ok := cfgmap["advanced-scraper-deterrence-difficulty"]; ok {
var err error
cfg.Advanced.ScraperDeterrence.Difficulty, err = cast.ToUint8E(ival)
if err != nil {
return fmt.Errorf("error casting %#v -> uint8 for 'advanced-scraper-deterrence-difficulty': %w", ival, err)
}
}
if ival, ok := cfgmap["http-client-allow-ips"]; ok {
var err error
cfg.HTTPClient.AllowIPs, err = toStringSlice(ival)
@@ -4553,31 +4563,6 @@ func GetAdvancedHeaderFilterMode() string { return global.GetAdvancedHeaderFilte
// SetAdvancedHeaderFilterMode safely sets the value for global configuration 'Advanced.HeaderFilterMode' field
func SetAdvancedHeaderFilterMode(v string) { global.SetAdvancedHeaderFilterMode(v) }
// AdvancedScraperDeterrenceFlag returns the flag name for the 'Advanced.ScraperDeterrence' field
func AdvancedScraperDeterrenceFlag() string { return "advanced-scraper-deterrence" }
// GetAdvancedScraperDeterrence safely fetches the Configuration value for state's 'Advanced.ScraperDeterrence' field
func (st *ConfigState) GetAdvancedScraperDeterrence() (v bool) {
st.mutex.RLock()
v = st.config.Advanced.ScraperDeterrence
st.mutex.RUnlock()
return
}
// SetAdvancedScraperDeterrence safely sets the Configuration value for state's 'Advanced.ScraperDeterrence' field
func (st *ConfigState) SetAdvancedScraperDeterrence(v bool) {
st.mutex.Lock()
defer st.mutex.Unlock()
st.config.Advanced.ScraperDeterrence = v
st.reloadToViper()
}
// GetAdvancedScraperDeterrence safely fetches the value for global configuration 'Advanced.ScraperDeterrence' field
func GetAdvancedScraperDeterrence() bool { return global.GetAdvancedScraperDeterrence() }
// SetAdvancedScraperDeterrence safely sets the value for global configuration 'Advanced.ScraperDeterrence' field
func SetAdvancedScraperDeterrence(v bool) { global.SetAdvancedScraperDeterrence(v) }
// AdvancedRateLimitRequestsFlag returns the flag name for the 'Advanced.RateLimit.Requests' field
func AdvancedRateLimitRequestsFlag() string { return "advanced-rate-limit-requests" }
@@ -4678,6 +4663,62 @@ func GetAdvancedThrottlingRetryAfter() time.Duration { return global.GetAdvanced
// SetAdvancedThrottlingRetryAfter safely sets the value for global configuration 'Advanced.Throttling.RetryAfter' field
func SetAdvancedThrottlingRetryAfter(v time.Duration) { global.SetAdvancedThrottlingRetryAfter(v) }
// AdvancedScraperDeterrenceEnabledFlag returns the flag name for the 'Advanced.ScraperDeterrence.Enabled' field
func AdvancedScraperDeterrenceEnabledFlag() string { return "advanced-scraper-deterrence-enabled" }
// GetAdvancedScraperDeterrenceEnabled safely fetches the Configuration value for state's 'Advanced.ScraperDeterrence.Enabled' field
func (st *ConfigState) GetAdvancedScraperDeterrenceEnabled() (v bool) {
st.mutex.RLock()
v = st.config.Advanced.ScraperDeterrence.Enabled
st.mutex.RUnlock()
return
}
// SetAdvancedScraperDeterrenceEnabled safely sets the Configuration value for state's 'Advanced.ScraperDeterrence.Enabled' field
func (st *ConfigState) SetAdvancedScraperDeterrenceEnabled(v bool) {
st.mutex.Lock()
defer st.mutex.Unlock()
st.config.Advanced.ScraperDeterrence.Enabled = v
st.reloadToViper()
}
// GetAdvancedScraperDeterrenceEnabled safely fetches the value for global configuration 'Advanced.ScraperDeterrence.Enabled' field
func GetAdvancedScraperDeterrenceEnabled() bool { return global.GetAdvancedScraperDeterrenceEnabled() }
// SetAdvancedScraperDeterrenceEnabled safely sets the value for global configuration 'Advanced.ScraperDeterrence.Enabled' field
func SetAdvancedScraperDeterrenceEnabled(v bool) { global.SetAdvancedScraperDeterrenceEnabled(v) }
// AdvancedScraperDeterrenceDifficultyFlag returns the flag name for the 'Advanced.ScraperDeterrence.Difficulty' field
func AdvancedScraperDeterrenceDifficultyFlag() string {
return "advanced-scraper-deterrence-difficulty"
}
// GetAdvancedScraperDeterrenceDifficulty safely fetches the Configuration value for state's 'Advanced.ScraperDeterrence.Difficulty' field
func (st *ConfigState) GetAdvancedScraperDeterrenceDifficulty() (v uint8) {
st.mutex.RLock()
v = st.config.Advanced.ScraperDeterrence.Difficulty
st.mutex.RUnlock()
return
}
// SetAdvancedScraperDeterrenceDifficulty safely sets the Configuration value for state's 'Advanced.ScraperDeterrence.Difficulty' field
func (st *ConfigState) SetAdvancedScraperDeterrenceDifficulty(v uint8) {
st.mutex.Lock()
defer st.mutex.Unlock()
st.config.Advanced.ScraperDeterrence.Difficulty = v
st.reloadToViper()
}
// GetAdvancedScraperDeterrenceDifficulty safely fetches the value for global configuration 'Advanced.ScraperDeterrence.Difficulty' field
func GetAdvancedScraperDeterrenceDifficulty() uint8 {
return global.GetAdvancedScraperDeterrenceDifficulty()
}
// SetAdvancedScraperDeterrenceDifficulty safely sets the value for global configuration 'Advanced.ScraperDeterrence.Difficulty' field
func SetAdvancedScraperDeterrenceDifficulty(v uint8) {
global.SetAdvancedScraperDeterrenceDifficulty(v)
}
// HTTPClientAllowIPsFlag returns the flag name for the 'HTTPClient.AllowIPs' field
func HTTPClientAllowIPsFlag() string { return "http-client-allow-ips" }
@@ -6450,17 +6491,6 @@ func flattenConfigMap(cfgmap map[string]any) {
}
}
for _, key := range [][]string{
{"advanced", "scraper-deterrence"},
} {
ival, ok := mapGet(cfgmap, key...)
if ok {
cfgmap["advanced-scraper-deterrence"] = ival
nestedKeys[key[0]] = struct{}{}
break
}
}
for _, key := range [][]string{
{"advanced-rate-limit", "requests"},
{"advanced", "rate-limit", "requests"},
@@ -6509,6 +6539,30 @@ func flattenConfigMap(cfgmap map[string]any) {
}
}
for _, key := range [][]string{
{"advanced-scraper-deterrence", "enabled"},
{"advanced", "scraper-deterrence", "enabled"},
} {
ival, ok := mapGet(cfgmap, key...)
if ok {
cfgmap["advanced-scraper-deterrence-enabled"] = ival
nestedKeys[key[0]] = struct{}{}
break
}
}
for _, key := range [][]string{
{"advanced-scraper-deterrence", "difficulty"},
{"advanced", "scraper-deterrence", "difficulty"},
} {
ival, ok := mapGet(cfgmap, key...)
if ok {
cfgmap["advanced-scraper-deterrence-difficulty"] = ival
nestedKeys[key[0]] = struct{}{}
break
}
}
for _, key := range [][]string{
{"http-client", "allow-ips"},
} {

View File

@@ -1,4 +1,5 @@
advanced:
scraper-deterrence: true
scraper-deterrence:
enabled: true
rate-limit:
requests: 5000

View File

@@ -55,7 +55,7 @@ func NoLLaMas(
getInstanceV1 func(context.Context) (*apimodel.InstanceV1, gtserror.WithCode),
) gin.HandlerFunc {
if !config.GetAdvancedScraperDeterrence() {
if !config.GetAdvancedScraperDeterrenceEnabled() {
// NoLLaMas middleware disabled.
return func(*gin.Context) {}
}
@@ -72,7 +72,7 @@ func NoLLaMas(
var nollamas nollamas
nollamas.seed = seed
nollamas.ttl = time.Hour
nollamas.diff = 4
nollamas.diff = config.GetAdvancedScraperDeterrenceDifficulty()
nollamas.getInstanceV1 = getInstanceV1
nollamas.policy = cookiePolicy
return nollamas.Serve

View File

@@ -45,7 +45,7 @@ func TestNoLLaMasMiddleware(t *testing.T) {
e := gin.New()
// Setup necessary configuration variables.
config.SetAdvancedScraperDeterrence(true)
config.SetAdvancedScraperDeterrenceEnabled(true)
config.SetWebTemplateBaseDir("../../web/template")
// Load templates into engine.

View File

@@ -19,7 +19,8 @@ EXPECT=$(cat << "EOF"
"127.0.0.1/32"
],
"advanced-rate-limit-requests": 6969,
"advanced-scraper-deterrence": true,
"advanced-scraper-deterrence-difficulty": 5,
"advanced-scraper-deterrence-enabled": true,
"advanced-sender-multiplier": -1,
"advanced-throttling-multiplier": -1,
"advanced-throttling-retry-after": 10000000000,
@@ -302,7 +303,8 @@ GTS_SYSLOG_ADDRESS='127.0.0.1:6969' \
GTS_ADVANCED_COOKIES_SAMESITE='strict' \
GTS_ADVANCED_RATE_LIMIT_EXCEPTIONS="192.0.2.0/24,127.0.0.1/32" \
GTS_ADVANCED_RATE_LIMIT_REQUESTS=6969 \
GTS_ADVANCED_SCRAPER_DETERRENCE=true \
GTS_ADVANCED_SCRAPER_DETERRENCE_DIFFICULTY=5 \
GTS_ADVANCED_SCRAPER_DETERRENCE_ENABLED=true \
GTS_ADVANCED_SENDER_MULTIPLIER=-1 \
GTS_ADVANCED_THROTTLING_MULTIPLIER=-1 \
GTS_ADVANCED_THROTTLING_RETRY_AFTER='10s' \

View File

@@ -164,7 +164,6 @@ func testDefaults() config.Configuration {
Advanced: config.AdvancedConfig{
CookiesSamesite: "lax",
SenderMultiplier: 0, // 1 sender only, regardless of CPU
ScraperDeterrence: envBool("GTS_ADVANCED_SCRAPER_DETERRENCE", false),
RateLimit: config.RateLimitConfig{
Requests: 0, // disabled
@@ -173,6 +172,11 @@ func testDefaults() config.Configuration {
Throttling: config.ThrottlingConfig{
Multiplier: 0, // disabled
},
ScraperDeterrence: config.ScraperDeterrenceConfig{
Enabled: envBool("GTS_ADVANCED_SCRAPER_DETERRENCE_ENABLED", false),
Difficulty: uint8(envInt("GTS_ADVANCED_SCRAPER_DETERRENCE_DIFFICULTY", 4)), //nolint
},
},
SoftwareVersion: "0.0.0-testrig",