[feature] proof of work scraper deterrence (#4043)

This adds a proof-of-work based scraper deterrence to GoToSocial's middleware stack on profile and status web pages. Heavily inspired by https://github.com/TecharoHQ/anubis, but massively stripped back for our own usecase.

Todo:
- ~~add configuration option so this is disabled by default~~
- ~~fix whatever weirdness is preventing this working with CSP (even in debug)~~
- ~~use our standard templating mechanism going through apiutil helper func~~
- ~~probably some absurdly small performance improvements to be made in pooling re-used hex encode / hash encode buffers~~ the web endpoints aren't as hot a path as API / ActivityPub, will leave as-is for now as it is already very minimal and well optimized
- ~~verify the cryptographic assumptions re: using a portion of token as challenge data~~ this isn't a serious application of cryptography, if it turns out to be a problem we'll fix it, but it definitely should not be easily possible to guess a SHA256 hash from the first 1/4 of it even if mathematically it might make it a bit easier
- ~~theme / make look nice??~~
- ~~add a spinner~~
- ~~add entry in example configuration~~
- ~~add documentation~~

Verification page originally based on https://github.com/LucienV1/powtect

Co-authored-by: tobi <tobi.smethurst@protonmail.com>
Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4043
Reviewed-by: tobi <tsmethurst@noreply.codeberg.org>
Co-authored-by: kim <grufwub@gmail.com>
Co-committed-by: kim <grufwub@gmail.com>
This commit is contained in:
kim
2025-04-28 20:12:27 +00:00
committed by kim
parent 2b82fa7481
commit d8c4d9fc5a
16 changed files with 759 additions and 19 deletions

View File

@@ -0,0 +1,178 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package middleware_test
import (
"context"
"crypto/sha256"
"encoding/hex"
"io"
"net/http"
"net/http/httptest"
"slices"
"strconv"
"strings"
"testing"
"code.superseriousbusiness.org/gotosocial/internal/api/model"
"code.superseriousbusiness.org/gotosocial/internal/config"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"code.superseriousbusiness.org/gotosocial/internal/middleware"
"code.superseriousbusiness.org/gotosocial/internal/router"
"codeberg.org/gruf/go-byteutil"
"github.com/gin-gonic/gin"
"github.com/stretchr/testify/assert"
)
func TestNoLLaMasMiddleware(t *testing.T) {
// Gin test engine.
e := gin.New()
// Setup necessary configuration variables.
config.SetAdvancedScraperDeterrence(true)
config.SetWebTemplateBaseDir("../../web/template")
// Load templates into engine.
err := router.LoadTemplates(e)
assert.NoError(t, err)
// Add middleware to the gin engine handler stack.
middleware := middleware.NoLLaMas(getInstanceV1)
e.Use(middleware)
// Set test handler we can
// easily check if was used.
e.Handle("GET", "/", testHandler)
// Test with differing user-agents.
for _, userAgent := range []string{
"CURL",
"Mozilla FireSox",
"Google Gnome",
} {
testNoLLaMasMiddleware(t, e, userAgent)
}
}
func testNoLLaMasMiddleware(t *testing.T, e *gin.Engine, userAgent string) {
// Prepare a test request for gin engine.
r := httptest.NewRequest("GET", "/", nil)
r.Header.Set("User-Agent", userAgent)
rw := httptest.NewRecorder()
// Pass req through
// engine handler.
e.ServeHTTP(rw, r)
// Get http result.
res := rw.Result()
// It should have been stopped
// by middleware and NOT used
// the expected test handler.
ok := usedTestHandler(res)
assert.False(t, ok)
// Read entire response body.
b, err := io.ReadAll(res.Body)
if err != nil {
panic(err)
}
var difficulty uint64
var challenge string
// Parse output body and find the challenge / difficulty.
for _, line := range strings.Split(string(b), "\n") {
line = strings.TrimSpace(line)
switch {
case strings.HasPrefix(line, "data-nollamas-challenge=\""):
line = line[25:]
line = line[:len(line)-1]
challenge = line
case strings.HasPrefix(line, "data-nollamas-difficulty=\""):
line = line[26:]
line = line[:len(line)-1]
var err error
difficulty, err = strconv.ParseUint(line, 10, 8)
assert.NoError(t, err)
}
}
// Ensure valid posed challenge.
assert.NotZero(t, difficulty)
assert.NotEmpty(t, challenge)
// Prepare a test request for gin engine.
r = httptest.NewRequest("GET", "/", nil)
r.Header.Set("User-Agent", userAgent)
rw = httptest.NewRecorder()
// Now compute and set solution query paramater.
solution := computeSolution(challenge, difficulty)
r.URL.RawQuery = "nollamas_solution=" + solution
// Pass req through
// engine handler.
e.ServeHTTP(rw, r)
// Get http result.
res = rw.Result()
// Should have received redirect.
uri, err := res.Location()
assert.NoError(t, err)
assert.Equal(t, uri.String(), "/")
// Ensure our expected solution cookie (to bypass challenge) was set.
ok = slices.ContainsFunc(res.Cookies(), func(c *http.Cookie) bool {
return c.Name == "gts-nollamas"
})
assert.True(t, ok)
}
// computeSolution does the functional equivalent of our nollamas workerTask.js.
func computeSolution(challenge string, difficulty uint64) string {
outer:
for i := 0; ; i++ {
solution := strconv.Itoa(i)
combined := challenge + solution
hash := sha256.Sum256(byteutil.S2B(combined))
encoded := hex.EncodeToString(hash[:])
for i := range difficulty {
if encoded[i] != '0' {
continue outer
}
}
return solution
}
}
// usedTestHandler returns whether testHandler() was used.
func usedTestHandler(res *http.Response) bool {
return res.Header.Get("test-handler") == "ok"
}
func testHandler(c *gin.Context) {
c.Writer.Header().Set("test-handler", "ok")
c.Writer.WriteHeader(http.StatusOK)
}
func getInstanceV1(context.Context) (*model.InstanceV1, gtserror.WithCode) {
return &model.InstanceV1{}, nil
}