mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[feature] proof of work scraper deterrence (#4043)
This adds a proof-of-work based scraper deterrence to GoToSocial's middleware stack on profile and status web pages. Heavily inspired by https://github.com/TecharoHQ/anubis, but massively stripped back for our own usecase. Todo: - ~~add configuration option so this is disabled by default~~ - ~~fix whatever weirdness is preventing this working with CSP (even in debug)~~ - ~~use our standard templating mechanism going through apiutil helper func~~ - ~~probably some absurdly small performance improvements to be made in pooling re-used hex encode / hash encode buffers~~ the web endpoints aren't as hot a path as API / ActivityPub, will leave as-is for now as it is already very minimal and well optimized - ~~verify the cryptographic assumptions re: using a portion of token as challenge data~~ this isn't a serious application of cryptography, if it turns out to be a problem we'll fix it, but it definitely should not be easily possible to guess a SHA256 hash from the first 1/4 of it even if mathematically it might make it a bit easier - ~~theme / make look nice??~~ - ~~add a spinner~~ - ~~add entry in example configuration~~ - ~~add documentation~~ Verification page originally based on https://github.com/LucienV1/powtect Co-authored-by: tobi <tobi.smethurst@protonmail.com> Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4043 Reviewed-by: tobi <tsmethurst@noreply.codeberg.org> Co-authored-by: kim <grufwub@gmail.com> Co-committed-by: kim <grufwub@gmail.com>
This commit is contained in:
309
internal/middleware/nollamas.go
Normal file
309
internal/middleware/nollamas.go
Normal file
@@ -0,0 +1,309 @@
|
||||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"crypto/subtle"
|
||||
"encoding/hex"
|
||||
"hash"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
apimodel "code.superseriousbusiness.org/gotosocial/internal/api/model"
|
||||
apiutil "code.superseriousbusiness.org/gotosocial/internal/api/util"
|
||||
"code.superseriousbusiness.org/gotosocial/internal/config"
|
||||
"code.superseriousbusiness.org/gotosocial/internal/gtscontext"
|
||||
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
|
||||
"code.superseriousbusiness.org/gotosocial/internal/log"
|
||||
"code.superseriousbusiness.org/gotosocial/internal/oauth"
|
||||
"codeberg.org/gruf/go-byteutil"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// NoLLaMas returns a piece of HTTP middleware that provides a deterrence
|
||||
// on routes it is applied to, against bots and scrapers. It generates a
|
||||
// unique but deterministic challenge for each HTTP client within an hour
|
||||
// TTL that requires a proof-of-work solution to pass onto the next handler.
|
||||
// On successful solution, the client is provided a cookie that allows them
|
||||
// to bypass this check within that hour TTL. The outcome of this is that it
|
||||
// should make scraping of these endpoints economically unfeasible, when enabled,
|
||||
// and with an absurdly minimal performance impact. The downside is that it
|
||||
// requires javascript to be enabled on the client to pass the middleware check.
|
||||
//
|
||||
// Heavily inspired by: https://github.com/TecharoHQ/anubis
|
||||
func NoLLaMas(getInstanceV1 func(context.Context) (*apimodel.InstanceV1, gtserror.WithCode)) gin.HandlerFunc {
|
||||
|
||||
if !config.GetAdvancedScraperDeterrence() {
|
||||
// NoLLaMas middleware disabled.
|
||||
return func(*gin.Context) {}
|
||||
}
|
||||
|
||||
seed := make([]byte, 32)
|
||||
|
||||
// Read random data for the token seed.
|
||||
_, err := io.ReadFull(rand.Reader, seed)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Configure nollamas.
|
||||
var nollamas nollamas
|
||||
nollamas.seed = seed
|
||||
nollamas.ttl = time.Hour
|
||||
nollamas.diff = 4
|
||||
nollamas.getInstanceV1 = getInstanceV1
|
||||
return nollamas.Serve
|
||||
}
|
||||
|
||||
// hashWithBufs encompasses a hash along
|
||||
// with the necessary buffers to generate
|
||||
// a hashsum and then encode that sum.
|
||||
type hashWithBufs struct {
|
||||
hash hash.Hash
|
||||
hbuf []byte
|
||||
ebuf []byte
|
||||
}
|
||||
|
||||
type nollamas struct {
|
||||
seed []byte // unique token seed
|
||||
ttl time.Duration
|
||||
diff uint8
|
||||
|
||||
// extra fields required for
|
||||
// our template rendering.
|
||||
getInstanceV1 func(ctx context.Context) (*apimodel.InstanceV1, gtserror.WithCode)
|
||||
}
|
||||
|
||||
func (m *nollamas) Serve(c *gin.Context) {
|
||||
if c.Request.Method != http.MethodGet {
|
||||
// Only interested in protecting
|
||||
// crawlable 'GET' endpoints.
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
|
||||
// Extract request context.
|
||||
ctx := c.Request.Context()
|
||||
|
||||
if ctx.Value(oauth.SessionAuthorizedToken) != nil {
|
||||
// Don't guard against requests
|
||||
// providing valid OAuth tokens.
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
|
||||
if gtscontext.HTTPSignature(ctx) != "" {
|
||||
// Don't guard against requests
|
||||
// providing HTTP signatures.
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
|
||||
// i.e. outputted hash slice length.
|
||||
const hashLen = sha256.Size
|
||||
|
||||
// i.e. hex.EncodedLen(hashLen).
|
||||
const encodedHashLen = 2 * hashLen
|
||||
|
||||
// Prepare hash + buffers.
|
||||
hash := hashWithBufs{
|
||||
hash: sha256.New(),
|
||||
hbuf: make([]byte, 0, hashLen),
|
||||
ebuf: make([]byte, encodedHashLen),
|
||||
}
|
||||
|
||||
// Extract client fingerprint data.
|
||||
userAgent := c.GetHeader("User-Agent")
|
||||
clientIP := c.ClientIP()
|
||||
|
||||
// Generate a unique token for this request,
|
||||
// only valid for a period of now +- m.ttl.
|
||||
token := m.token(&hash, userAgent, clientIP)
|
||||
|
||||
// For unique challenge string just use a
|
||||
// single portion of their 'success' token.
|
||||
// SHA256 is not yet cracked, this is not an
|
||||
// application of a hash requiring serious
|
||||
// cryptographic security and it rotates on
|
||||
// a TTL basis, so it should be fine.
|
||||
challenge := token[:len(token)/4]
|
||||
|
||||
// Check for a provided success token.
|
||||
cookie, _ := c.Cookie("gts-nollamas")
|
||||
|
||||
// Check whether passed cookie
|
||||
// is the expected success token.
|
||||
if subtle.ConstantTimeCompare(
|
||||
byteutil.S2B(token),
|
||||
byteutil.S2B(cookie),
|
||||
) == 1 {
|
||||
|
||||
// They passed us a valid, expected
|
||||
// token. They already passed checks.
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
|
||||
// Prepare new log entry.
|
||||
l := log.WithContext(ctx).
|
||||
WithField("userAgent", userAgent).
|
||||
WithField("challenge", challenge)
|
||||
|
||||
// Extract and parse query.
|
||||
query := c.Request.URL.Query()
|
||||
|
||||
// Check query to see if an in-progress
|
||||
// challenge solution has been provided.
|
||||
nonce := query.Get("nollamas_solution")
|
||||
if nonce == "" || len(nonce) > 20 {
|
||||
|
||||
// noting that here, 20 is
|
||||
// max integer string len.
|
||||
//
|
||||
// An invalid solution string, just
|
||||
// present them with new challenge.
|
||||
l.Info("posing new challenge")
|
||||
m.renderChallenge(c, challenge)
|
||||
return
|
||||
}
|
||||
|
||||
// Reset the hash.
|
||||
hash.hash.Reset()
|
||||
|
||||
// Check challenge+nonce as possible solution.
|
||||
if !m.checkChallenge(&hash, challenge, nonce) {
|
||||
|
||||
// They failed challenge,
|
||||
// re-present challenge page.
|
||||
l.Info("invalid solution provided")
|
||||
m.renderChallenge(c, challenge)
|
||||
return
|
||||
}
|
||||
|
||||
l.Infof("challenge passed: %s", nonce)
|
||||
|
||||
// Don't pass to further
|
||||
// handlers, we'll redirect.
|
||||
c.Abort()
|
||||
|
||||
// Drop solution query and encode.
|
||||
query.Del("nollamas_solution")
|
||||
c.Request.URL.RawQuery = query.Encode()
|
||||
|
||||
// They passed the challenge! Set success token
|
||||
// cookie and allow them to continue to next handlers.
|
||||
c.SetCookie("gts-nollamas", token, int(m.ttl/time.Second), "", "", false, false)
|
||||
c.Redirect(http.StatusTemporaryRedirect, c.Request.URL.RequestURI())
|
||||
}
|
||||
|
||||
func (m *nollamas) renderChallenge(c *gin.Context, challenge string) {
|
||||
// Don't pass to further
|
||||
// handlers, they only get
|
||||
// our challenge page.
|
||||
c.Abort()
|
||||
|
||||
// Fetch current instance information for templating vars.
|
||||
instance, errWithCode := m.getInstanceV1(c.Request.Context())
|
||||
if errWithCode != nil {
|
||||
apiutil.ErrorHandler(c, errWithCode, m.getInstanceV1)
|
||||
return
|
||||
}
|
||||
|
||||
// Write templated challenge response to client.
|
||||
apiutil.TemplateWebPage(c, apiutil.WebPage{
|
||||
Template: "nollamas.tmpl",
|
||||
Instance: instance,
|
||||
Stylesheets: []string{
|
||||
"/assets/dist/nollamas.css",
|
||||
// Include fork-awesome stylesheet
|
||||
// to get nice loading spinner.
|
||||
"/assets/Fork-Awesome/css/fork-awesome.min.css",
|
||||
},
|
||||
Extra: map[string]any{
|
||||
"challenge": challenge,
|
||||
"difficulty": m.diff,
|
||||
},
|
||||
Javascript: []apiutil.JavascriptEntry{
|
||||
{
|
||||
Src: "/assets/dist/nollamas.js",
|
||||
Defer: true,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func (m *nollamas) token(hash *hashWithBufs, userAgent, clientIP string) string {
|
||||
// Use our unique seed to seed hash,
|
||||
// to ensure we have cryptographically
|
||||
// unique, yet deterministic, tokens
|
||||
// generated for a given http client.
|
||||
hash.hash.Write(m.seed)
|
||||
|
||||
// Include difficulty level in
|
||||
// hash input data so if config
|
||||
// changes then token invalidates.
|
||||
hash.hash.Write([]byte{m.diff})
|
||||
|
||||
// Also seed the generated input with
|
||||
// current time rounded to TTL, so our
|
||||
// single comparison handles expiries.
|
||||
now := time.Now().Round(m.ttl).Unix()
|
||||
hash.hash.Write([]byte{
|
||||
byte(now >> 56),
|
||||
byte(now >> 48),
|
||||
byte(now >> 40),
|
||||
byte(now >> 32),
|
||||
byte(now >> 24),
|
||||
byte(now >> 16),
|
||||
byte(now >> 8),
|
||||
byte(now),
|
||||
})
|
||||
|
||||
// Finally, append unique client request data.
|
||||
hash.hash.Write(byteutil.S2B(userAgent))
|
||||
hash.hash.Write(byteutil.S2B(clientIP))
|
||||
|
||||
// Return hex encoded hash output.
|
||||
hash.hbuf = hash.hash.Sum(hash.hbuf[:0])
|
||||
hex.Encode(hash.ebuf, hash.hbuf)
|
||||
return string(hash.ebuf)
|
||||
}
|
||||
|
||||
func (m *nollamas) checkChallenge(hash *hashWithBufs, challenge, nonce string) bool {
|
||||
// Hash and encode input challenge with
|
||||
// proposed nonce as a possible solution.
|
||||
hash.hash.Write(byteutil.S2B(challenge))
|
||||
hash.hash.Write(byteutil.S2B(nonce))
|
||||
hash.hbuf = hash.hash.Sum(hash.hbuf[:0])
|
||||
hex.Encode(hash.ebuf, hash.hbuf)
|
||||
solution := hash.ebuf
|
||||
|
||||
// Check that the first 'diff'
|
||||
// many chars are indeed zeroes.
|
||||
for i := range m.diff {
|
||||
if solution[i] != '0' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
Reference in New Issue
Block a user