2023-03-12 16:00:57 +01:00
|
|
|
// GoToSocial
|
|
|
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
|
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2022-12-14 10:55:36 +01:00
|
|
|
|
|
|
|
package domain
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2023-05-01 12:36:46 +02:00
|
|
|
"strings"
|
|
|
|
"sync/atomic"
|
|
|
|
"unsafe"
|
2022-12-14 10:55:36 +01:00
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
"golang.org/x/exp/slices"
|
2022-12-14 10:55:36 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// BlockCache provides a means of caching domain blocks in memory to reduce load
|
|
|
|
// on an underlying storage mechanism, e.g. a database.
|
|
|
|
//
|
|
|
|
// The in-memory block list is kept up-to-date by means of a passed loader function during every
|
|
|
|
// call to .IsBlocked(). In the case of a nil internal block list, the loader function is called to
|
2023-05-01 12:36:46 +02:00
|
|
|
// hydrate the cache with the latest list of domain blocks. The .Clear() function can be used to
|
|
|
|
// invalidate the cache, e.g. when a domain block is added / deleted from the database.
|
2022-12-14 10:55:36 +01:00
|
|
|
type BlockCache struct {
|
2023-05-01 12:36:46 +02:00
|
|
|
// atomically updated ptr value to the
|
|
|
|
// current domain block cache radix trie.
|
|
|
|
rootptr unsafe.Pointer
|
2022-12-14 10:55:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// IsBlocked checks whether domain is blocked. If the cache is not currently loaded, then the provided load function is used to hydrate it.
|
|
|
|
func (b *BlockCache) IsBlocked(domain string, load func() ([]string, error)) (bool, error) {
|
2023-05-01 12:36:46 +02:00
|
|
|
// Load the current root pointer value.
|
|
|
|
ptr := atomic.LoadPointer(&b.rootptr)
|
2022-12-14 10:55:36 +01:00
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
if ptr == nil {
|
|
|
|
// Cache is not hydrated.
|
2022-12-14 10:55:36 +01:00
|
|
|
//
|
2023-05-01 12:36:46 +02:00
|
|
|
// Load domains from callback.
|
2022-12-14 10:55:36 +01:00
|
|
|
domains, err := load()
|
|
|
|
if err != nil {
|
|
|
|
return false, fmt.Errorf("error reloading cache: %w", err)
|
|
|
|
}
|
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
// Allocate new radix trie
|
|
|
|
// node to store matches.
|
|
|
|
root := new(root)
|
2022-12-14 10:55:36 +01:00
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
// Add each domain to the trie.
|
|
|
|
for _, domain := range domains {
|
|
|
|
root.Add(domain)
|
2022-12-14 10:55:36 +01:00
|
|
|
}
|
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
// Sort the trie.
|
|
|
|
root.Sort()
|
2022-12-14 10:55:36 +01:00
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
// Store the new node ptr.
|
|
|
|
ptr = unsafe.Pointer(root)
|
|
|
|
atomic.StorePointer(&b.rootptr, ptr)
|
2022-12-14 10:55:36 +01:00
|
|
|
}
|
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
// Look for a match in the trie node.
|
|
|
|
return (*root)(ptr).Match(domain), nil
|
2022-12-14 10:55:36 +01:00
|
|
|
}
|
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
// Clear will drop the currently loaded domain list,
|
|
|
|
// triggering a reload on next call to .IsBlocked().
|
2022-12-14 10:55:36 +01:00
|
|
|
func (b *BlockCache) Clear() {
|
2023-05-01 12:36:46 +02:00
|
|
|
atomic.StorePointer(&b.rootptr, nil)
|
|
|
|
}
|
|
|
|
|
2023-05-09 16:18:51 +02:00
|
|
|
// String returns a string representation of stored domains in block cache.
|
|
|
|
func (b *BlockCache) String() string {
|
|
|
|
if ptr := atomic.LoadPointer(&b.rootptr); ptr != nil {
|
|
|
|
return (*root)(ptr).String()
|
|
|
|
}
|
|
|
|
return "<empty>"
|
|
|
|
}
|
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
// root is the root node in the domain
|
|
|
|
// block cache radix trie. this is the
|
|
|
|
// singular access point to the trie.
|
|
|
|
type root struct{ root node }
|
|
|
|
|
|
|
|
// Add will add the given domain to the radix trie.
|
|
|
|
func (r *root) Add(domain string) {
|
|
|
|
r.root.add(strings.Split(domain, "."))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Match will return whether the given domain matches
|
|
|
|
// an existing stored domain block in this radix trie.
|
|
|
|
func (r *root) Match(domain string) bool {
|
|
|
|
return r.root.match(strings.Split(domain, "."))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sort will sort the entire radix trie ensuring that
|
|
|
|
// child nodes are stored in alphabetical order. This
|
|
|
|
// MUST be done to finalize the block cache in order
|
|
|
|
// to speed up the binary search of node child parts.
|
|
|
|
func (r *root) Sort() {
|
|
|
|
r.root.sort()
|
2022-12-14 10:55:36 +01:00
|
|
|
}
|
|
|
|
|
2023-05-09 16:18:51 +02:00
|
|
|
// String returns a string representation of node (and its descendants).
|
|
|
|
func (r *root) String() string {
|
|
|
|
buf := new(strings.Builder)
|
|
|
|
r.root.writestr(buf, "")
|
|
|
|
return buf.String()
|
|
|
|
}
|
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
type node struct {
|
|
|
|
part string
|
|
|
|
child []*node
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *node) add(parts []string) {
|
|
|
|
if len(parts) == 0 {
|
|
|
|
panic("invalid domain")
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
// Pop next domain part.
|
|
|
|
i := len(parts) - 1
|
|
|
|
part := parts[i]
|
|
|
|
parts = parts[:i]
|
|
|
|
|
|
|
|
var nn *node
|
|
|
|
|
|
|
|
// Look for existing child node
|
|
|
|
// that matches next domain part.
|
|
|
|
for _, child := range n.child {
|
|
|
|
if child.part == part {
|
|
|
|
nn = child
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if nn == nil {
|
|
|
|
// Alloc new child node.
|
|
|
|
nn = &node{part: part}
|
|
|
|
n.child = append(n.child, nn)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(parts) == 0 {
|
|
|
|
// Drop all children here as
|
|
|
|
// this is a higher-level block
|
|
|
|
// than that we previously had.
|
|
|
|
nn.child = nil
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Re-iter with
|
|
|
|
// child node.
|
|
|
|
n = nn
|
|
|
|
}
|
2022-12-14 10:55:36 +01:00
|
|
|
}
|
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
func (n *node) match(parts []string) bool {
|
2023-05-09 16:18:51 +02:00
|
|
|
for len(parts) > 0 {
|
2023-05-01 12:36:46 +02:00
|
|
|
// Pop next domain part.
|
|
|
|
i := len(parts) - 1
|
|
|
|
part := parts[i]
|
|
|
|
parts = parts[:i]
|
|
|
|
|
|
|
|
// Look for existing child
|
|
|
|
// that matches next part.
|
|
|
|
nn := n.getChild(part)
|
|
|
|
|
|
|
|
if nn == nil {
|
|
|
|
// No match :(
|
2022-12-14 10:55:36 +01:00
|
|
|
return false
|
|
|
|
}
|
2023-05-01 12:36:46 +02:00
|
|
|
|
|
|
|
if len(nn.child) == 0 {
|
|
|
|
// It's a match!
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// Re-iter with
|
|
|
|
// child node.
|
|
|
|
n = nn
|
|
|
|
}
|
2023-05-09 16:18:51 +02:00
|
|
|
|
|
|
|
// Ran out of parts
|
|
|
|
// without a match.
|
|
|
|
return false
|
2023-05-01 12:36:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// getChild fetches child node with given domain part string
|
|
|
|
// using a binary search. THIS ASSUMES CHILDREN ARE SORTED.
|
|
|
|
func (n *node) getChild(part string) *node {
|
|
|
|
i, j := 0, len(n.child)
|
|
|
|
|
|
|
|
for i < j {
|
|
|
|
// avoid overflow when computing h
|
|
|
|
h := int(uint(i+j) >> 1)
|
|
|
|
// i ≤ h < j
|
|
|
|
|
|
|
|
if n.child[h].part < part {
|
|
|
|
// preserves:
|
|
|
|
// n.child[i-1].part != part
|
|
|
|
i = h + 1
|
|
|
|
} else {
|
|
|
|
// preserves:
|
|
|
|
// n.child[h].part == part
|
|
|
|
j = h
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if i >= len(n.child) || n.child[i].part != part {
|
|
|
|
return nil // no match
|
2022-12-14 10:55:36 +01:00
|
|
|
}
|
|
|
|
|
2023-05-01 12:36:46 +02:00
|
|
|
return n.child[i]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *node) sort() {
|
|
|
|
// Sort this node's slice of child nodes.
|
|
|
|
slices.SortFunc(n.child, func(i, j *node) bool {
|
|
|
|
return i.part < j.part
|
|
|
|
})
|
|
|
|
|
|
|
|
// Sort each child node's children.
|
|
|
|
for _, child := range n.child {
|
|
|
|
child.sort()
|
|
|
|
}
|
2022-12-14 10:55:36 +01:00
|
|
|
}
|
2023-05-09 16:18:51 +02:00
|
|
|
|
|
|
|
func (n *node) writestr(buf *strings.Builder, prefix string) {
|
|
|
|
if prefix != "" {
|
|
|
|
// Suffix joining '.'
|
|
|
|
prefix += "."
|
|
|
|
}
|
|
|
|
|
|
|
|
// Append current part.
|
|
|
|
prefix += n.part
|
|
|
|
|
|
|
|
// Dump current prefix state.
|
|
|
|
buf.WriteString(prefix)
|
|
|
|
buf.WriteByte('\n')
|
|
|
|
|
|
|
|
// Iterate through node children.
|
|
|
|
for _, child := range n.child {
|
|
|
|
child.writestr(buf, prefix)
|
|
|
|
}
|
|
|
|
}
|