[feature] proof of work scraper deterrence (#4043)

This adds a proof-of-work based scraper deterrence to GoToSocial's middleware stack on profile and status web pages. Heavily inspired by https://github.com/TecharoHQ/anubis, but massively stripped back for our own usecase.

Todo:
- ~~add configuration option so this is disabled by default~~
- ~~fix whatever weirdness is preventing this working with CSP (even in debug)~~
- ~~use our standard templating mechanism going through apiutil helper func~~
- ~~probably some absurdly small performance improvements to be made in pooling re-used hex encode / hash encode buffers~~ the web endpoints aren't as hot a path as API / ActivityPub, will leave as-is for now as it is already very minimal and well optimized
- ~~verify the cryptographic assumptions re: using a portion of token as challenge data~~ this isn't a serious application of cryptography, if it turns out to be a problem we'll fix it, but it definitely should not be easily possible to guess a SHA256 hash from the first 1/4 of it even if mathematically it might make it a bit easier
- ~~theme / make look nice??~~
- ~~add a spinner~~
- ~~add entry in example configuration~~
- ~~add documentation~~

Verification page originally based on https://github.com/LucienV1/powtect

Co-authored-by: tobi <tobi.smethurst@protonmail.com>
Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4043
Reviewed-by: tobi <tsmethurst@noreply.codeberg.org>
Co-authored-by: kim <grufwub@gmail.com>
Co-committed-by: kim <grufwub@gmail.com>
This commit is contained in:
kim
2025-04-28 20:12:27 +00:00
committed by kim
parent 2b82fa7481
commit d8c4d9fc5a
16 changed files with 759 additions and 19 deletions

View File

@@ -150,11 +150,6 @@ func isHeaderBlocked(state *state.State, c *gin.Context) (bool, error) {
}
if key != "" {
// if expr != "" {
// // TODO: replace expvar with build
// // taggable metrics types in State{}.
// }
// A header was matched against!
// i.e. this request is blocked.
return true, nil
@@ -185,11 +180,6 @@ func isHeaderAllowed(state *state.State, c *gin.Context) (bool, error) {
}
if key != "" {
// if expr != "" {
// // TODO: replace expvar with build
// // taggable metrics types in State{}.
// }
// A header was matched against!
// i.e. this request is allowed.
return true, nil
@@ -220,11 +210,6 @@ func isHeaderNotAllowed(state *state.State, c *gin.Context) (bool, error) {
}
if key != "" {
// if expr != "" {
// // TODO: replace expvar with build
// // taggable metrics types in State{}.
// }
// A header was matched against!
// i.e. request is NOT allowed.
return true, nil

View File

@@ -0,0 +1,309 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package middleware
import (
"context"
"crypto/rand"
"crypto/sha256"
"crypto/subtle"
"encoding/hex"
"hash"
"io"
"net/http"
"time"
apimodel "code.superseriousbusiness.org/gotosocial/internal/api/model"
apiutil "code.superseriousbusiness.org/gotosocial/internal/api/util"
"code.superseriousbusiness.org/gotosocial/internal/config"
"code.superseriousbusiness.org/gotosocial/internal/gtscontext"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"code.superseriousbusiness.org/gotosocial/internal/log"
"code.superseriousbusiness.org/gotosocial/internal/oauth"
"codeberg.org/gruf/go-byteutil"
"github.com/gin-gonic/gin"
)
// NoLLaMas returns a piece of HTTP middleware that provides a deterrence
// on routes it is applied to, against bots and scrapers. It generates a
// unique but deterministic challenge for each HTTP client within an hour
// TTL that requires a proof-of-work solution to pass onto the next handler.
// On successful solution, the client is provided a cookie that allows them
// to bypass this check within that hour TTL. The outcome of this is that it
// should make scraping of these endpoints economically unfeasible, when enabled,
// and with an absurdly minimal performance impact. The downside is that it
// requires javascript to be enabled on the client to pass the middleware check.
//
// Heavily inspired by: https://github.com/TecharoHQ/anubis
func NoLLaMas(getInstanceV1 func(context.Context) (*apimodel.InstanceV1, gtserror.WithCode)) gin.HandlerFunc {
if !config.GetAdvancedScraperDeterrence() {
// NoLLaMas middleware disabled.
return func(*gin.Context) {}
}
seed := make([]byte, 32)
// Read random data for the token seed.
_, err := io.ReadFull(rand.Reader, seed)
if err != nil {
panic(err)
}
// Configure nollamas.
var nollamas nollamas
nollamas.seed = seed
nollamas.ttl = time.Hour
nollamas.diff = 4
nollamas.getInstanceV1 = getInstanceV1
return nollamas.Serve
}
// hashWithBufs encompasses a hash along
// with the necessary buffers to generate
// a hashsum and then encode that sum.
type hashWithBufs struct {
hash hash.Hash
hbuf []byte
ebuf []byte
}
type nollamas struct {
seed []byte // unique token seed
ttl time.Duration
diff uint8
// extra fields required for
// our template rendering.
getInstanceV1 func(ctx context.Context) (*apimodel.InstanceV1, gtserror.WithCode)
}
func (m *nollamas) Serve(c *gin.Context) {
if c.Request.Method != http.MethodGet {
// Only interested in protecting
// crawlable 'GET' endpoints.
c.Next()
return
}
// Extract request context.
ctx := c.Request.Context()
if ctx.Value(oauth.SessionAuthorizedToken) != nil {
// Don't guard against requests
// providing valid OAuth tokens.
c.Next()
return
}
if gtscontext.HTTPSignature(ctx) != "" {
// Don't guard against requests
// providing HTTP signatures.
c.Next()
return
}
// i.e. outputted hash slice length.
const hashLen = sha256.Size
// i.e. hex.EncodedLen(hashLen).
const encodedHashLen = 2 * hashLen
// Prepare hash + buffers.
hash := hashWithBufs{
hash: sha256.New(),
hbuf: make([]byte, 0, hashLen),
ebuf: make([]byte, encodedHashLen),
}
// Extract client fingerprint data.
userAgent := c.GetHeader("User-Agent")
clientIP := c.ClientIP()
// Generate a unique token for this request,
// only valid for a period of now +- m.ttl.
token := m.token(&hash, userAgent, clientIP)
// For unique challenge string just use a
// single portion of their 'success' token.
// SHA256 is not yet cracked, this is not an
// application of a hash requiring serious
// cryptographic security and it rotates on
// a TTL basis, so it should be fine.
challenge := token[:len(token)/4]
// Check for a provided success token.
cookie, _ := c.Cookie("gts-nollamas")
// Check whether passed cookie
// is the expected success token.
if subtle.ConstantTimeCompare(
byteutil.S2B(token),
byteutil.S2B(cookie),
) == 1 {
// They passed us a valid, expected
// token. They already passed checks.
c.Next()
return
}
// Prepare new log entry.
l := log.WithContext(ctx).
WithField("userAgent", userAgent).
WithField("challenge", challenge)
// Extract and parse query.
query := c.Request.URL.Query()
// Check query to see if an in-progress
// challenge solution has been provided.
nonce := query.Get("nollamas_solution")
if nonce == "" || len(nonce) > 20 {
// noting that here, 20 is
// max integer string len.
//
// An invalid solution string, just
// present them with new challenge.
l.Info("posing new challenge")
m.renderChallenge(c, challenge)
return
}
// Reset the hash.
hash.hash.Reset()
// Check challenge+nonce as possible solution.
if !m.checkChallenge(&hash, challenge, nonce) {
// They failed challenge,
// re-present challenge page.
l.Info("invalid solution provided")
m.renderChallenge(c, challenge)
return
}
l.Infof("challenge passed: %s", nonce)
// Don't pass to further
// handlers, we'll redirect.
c.Abort()
// Drop solution query and encode.
query.Del("nollamas_solution")
c.Request.URL.RawQuery = query.Encode()
// They passed the challenge! Set success token
// cookie and allow them to continue to next handlers.
c.SetCookie("gts-nollamas", token, int(m.ttl/time.Second), "", "", false, false)
c.Redirect(http.StatusTemporaryRedirect, c.Request.URL.RequestURI())
}
func (m *nollamas) renderChallenge(c *gin.Context, challenge string) {
// Don't pass to further
// handlers, they only get
// our challenge page.
c.Abort()
// Fetch current instance information for templating vars.
instance, errWithCode := m.getInstanceV1(c.Request.Context())
if errWithCode != nil {
apiutil.ErrorHandler(c, errWithCode, m.getInstanceV1)
return
}
// Write templated challenge response to client.
apiutil.TemplateWebPage(c, apiutil.WebPage{
Template: "nollamas.tmpl",
Instance: instance,
Stylesheets: []string{
"/assets/dist/nollamas.css",
// Include fork-awesome stylesheet
// to get nice loading spinner.
"/assets/Fork-Awesome/css/fork-awesome.min.css",
},
Extra: map[string]any{
"challenge": challenge,
"difficulty": m.diff,
},
Javascript: []apiutil.JavascriptEntry{
{
Src: "/assets/dist/nollamas.js",
Defer: true,
},
},
})
}
func (m *nollamas) token(hash *hashWithBufs, userAgent, clientIP string) string {
// Use our unique seed to seed hash,
// to ensure we have cryptographically
// unique, yet deterministic, tokens
// generated for a given http client.
hash.hash.Write(m.seed)
// Include difficulty level in
// hash input data so if config
// changes then token invalidates.
hash.hash.Write([]byte{m.diff})
// Also seed the generated input with
// current time rounded to TTL, so our
// single comparison handles expiries.
now := time.Now().Round(m.ttl).Unix()
hash.hash.Write([]byte{
byte(now >> 56),
byte(now >> 48),
byte(now >> 40),
byte(now >> 32),
byte(now >> 24),
byte(now >> 16),
byte(now >> 8),
byte(now),
})
// Finally, append unique client request data.
hash.hash.Write(byteutil.S2B(userAgent))
hash.hash.Write(byteutil.S2B(clientIP))
// Return hex encoded hash output.
hash.hbuf = hash.hash.Sum(hash.hbuf[:0])
hex.Encode(hash.ebuf, hash.hbuf)
return string(hash.ebuf)
}
func (m *nollamas) checkChallenge(hash *hashWithBufs, challenge, nonce string) bool {
// Hash and encode input challenge with
// proposed nonce as a possible solution.
hash.hash.Write(byteutil.S2B(challenge))
hash.hash.Write(byteutil.S2B(nonce))
hash.hbuf = hash.hash.Sum(hash.hbuf[:0])
hex.Encode(hash.ebuf, hash.hbuf)
solution := hash.ebuf
// Check that the first 'diff'
// many chars are indeed zeroes.
for i := range m.diff {
if solution[i] != '0' {
return false
}
}
return true
}

View File

@@ -0,0 +1,178 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package middleware_test
import (
"context"
"crypto/sha256"
"encoding/hex"
"io"
"net/http"
"net/http/httptest"
"slices"
"strconv"
"strings"
"testing"
"code.superseriousbusiness.org/gotosocial/internal/api/model"
"code.superseriousbusiness.org/gotosocial/internal/config"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"code.superseriousbusiness.org/gotosocial/internal/middleware"
"code.superseriousbusiness.org/gotosocial/internal/router"
"codeberg.org/gruf/go-byteutil"
"github.com/gin-gonic/gin"
"github.com/stretchr/testify/assert"
)
func TestNoLLaMasMiddleware(t *testing.T) {
// Gin test engine.
e := gin.New()
// Setup necessary configuration variables.
config.SetAdvancedScraperDeterrence(true)
config.SetWebTemplateBaseDir("../../web/template")
// Load templates into engine.
err := router.LoadTemplates(e)
assert.NoError(t, err)
// Add middleware to the gin engine handler stack.
middleware := middleware.NoLLaMas(getInstanceV1)
e.Use(middleware)
// Set test handler we can
// easily check if was used.
e.Handle("GET", "/", testHandler)
// Test with differing user-agents.
for _, userAgent := range []string{
"CURL",
"Mozilla FireSox",
"Google Gnome",
} {
testNoLLaMasMiddleware(t, e, userAgent)
}
}
func testNoLLaMasMiddleware(t *testing.T, e *gin.Engine, userAgent string) {
// Prepare a test request for gin engine.
r := httptest.NewRequest("GET", "/", nil)
r.Header.Set("User-Agent", userAgent)
rw := httptest.NewRecorder()
// Pass req through
// engine handler.
e.ServeHTTP(rw, r)
// Get http result.
res := rw.Result()
// It should have been stopped
// by middleware and NOT used
// the expected test handler.
ok := usedTestHandler(res)
assert.False(t, ok)
// Read entire response body.
b, err := io.ReadAll(res.Body)
if err != nil {
panic(err)
}
var difficulty uint64
var challenge string
// Parse output body and find the challenge / difficulty.
for _, line := range strings.Split(string(b), "\n") {
line = strings.TrimSpace(line)
switch {
case strings.HasPrefix(line, "data-nollamas-challenge=\""):
line = line[25:]
line = line[:len(line)-1]
challenge = line
case strings.HasPrefix(line, "data-nollamas-difficulty=\""):
line = line[26:]
line = line[:len(line)-1]
var err error
difficulty, err = strconv.ParseUint(line, 10, 8)
assert.NoError(t, err)
}
}
// Ensure valid posed challenge.
assert.NotZero(t, difficulty)
assert.NotEmpty(t, challenge)
// Prepare a test request for gin engine.
r = httptest.NewRequest("GET", "/", nil)
r.Header.Set("User-Agent", userAgent)
rw = httptest.NewRecorder()
// Now compute and set solution query paramater.
solution := computeSolution(challenge, difficulty)
r.URL.RawQuery = "nollamas_solution=" + solution
// Pass req through
// engine handler.
e.ServeHTTP(rw, r)
// Get http result.
res = rw.Result()
// Should have received redirect.
uri, err := res.Location()
assert.NoError(t, err)
assert.Equal(t, uri.String(), "/")
// Ensure our expected solution cookie (to bypass challenge) was set.
ok = slices.ContainsFunc(res.Cookies(), func(c *http.Cookie) bool {
return c.Name == "gts-nollamas"
})
assert.True(t, ok)
}
// computeSolution does the functional equivalent of our nollamas workerTask.js.
func computeSolution(challenge string, difficulty uint64) string {
outer:
for i := 0; ; i++ {
solution := strconv.Itoa(i)
combined := challenge + solution
hash := sha256.Sum256(byteutil.S2B(combined))
encoded := hex.EncodeToString(hash[:])
for i := range difficulty {
if encoded[i] != '0' {
continue outer
}
}
return solution
}
}
// usedTestHandler returns whether testHandler() was used.
func usedTestHandler(res *http.Response) bool {
return res.Header.Get("test-handler") == "ok"
}
func testHandler(c *gin.Context) {
c.Writer.Header().Set("test-handler", "ok")
c.Writer.WriteHeader(http.StatusOK)
}
func getInstanceV1(context.Context) (*model.InstanceV1, gtserror.WithCode) {
return &model.InstanceV1{}, nil
}