[feature] various worker / request queue improvements (#995)

* greatly simplify httpclient request queuing Signed-off-by: kim <grufwub@gmail.com> * improved request queue mutex logic Signed-off-by: kim <grufwub@gmail.com> * use improved hashmap library Signed-off-by: kim <grufwub@gmail.com> * add warn logging when request queues are full Signed-off-by: kim <grufwub@gmail.com> * improve worker pool prefix var naming Signed-off-by: kim <grufwub@gmail.com> * improved worker pool error logging Signed-off-by: kim <grufwub@gmail.com> * move error message into separate field Signed-off-by: kim <grufwub@gmail.com> * remove old log statement Signed-off-by: kim <grufwub@gmail.com> * don't export worker message, it gets very spammy :') Signed-off-by: kim <grufwub@gmail.com> Signed-off-by: kim <grufwub@gmail.com>
2025-02-15 11:10:39 +01:00 · 2022-11-08 09:35:24 +00:00 · 2022-11-08 09:35:24 +00:00 · 0e57246083
commit 0e57246083
parent 7c0bbd3f6a
5 changed files with 88 additions and 208 deletions
--- a/go.mod
+++ b/go.mod
@ -15,6 +15,7 @@ require (
 	codeberg.org/gruf/go-store/v2 v2.0.7
 	github.com/buckket/go-blurhash v1.1.0
 	github.com/coreos/go-oidc/v3 v3.4.0
+	github.com/cornelk/hashmap v1.0.8
 	github.com/disintegration/imaging v1.6.2
 	github.com/gin-contrib/cors v1.4.0
 	github.com/gin-contrib/gzip v0.0.6
@ -67,7 +68,6 @@ require (
 	codeberg.org/gruf/go-pools v1.1.0 // indirect
 	codeberg.org/gruf/go-sched v1.1.1 // indirect
 	github.com/aymerick/douceur v0.2.0 // indirect
-	github.com/cornelk/hashmap v1.0.8 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/dsoprea/go-exif/v3 v3.0.0-20210625224831-a6301f85c82b // indirect
 	github.com/dsoprea/go-iptc v0.0.0-20200610044640-bc9ca208b413 // indirect
--- a/internal/concurrency/workers.go
+++ b/internal/concurrency/workers.go
@ -26,6 +26,7 @@ import (
 	"reflect"
 	"runtime"

+	"codeberg.org/gruf/go-kv"
 	"codeberg.org/gruf/go-runners"
 	"github.com/superseriousbusiness/gotosocial/internal/log"
 )
@ -35,7 +36,7 @@ type WorkerPool[MsgType any] struct {
 	workers runners.WorkerPool
 	process func(context.Context, MsgType) error
 	nw, nq  int
-	prefix  string // contains type prefix for logging
+	wtype   string // contains worker type for logging
 }

 // New returns a new WorkerPool[MsgType] with given number of workers and queue ratio,
@ -61,12 +62,12 @@ func NewWorkerPool[MsgType any](workers int, queueRatio int) *WorkerPool[MsgType
 		process: nil,
 		nw:      workers,
 		nq:      workers * queueRatio,
-		prefix:  fmt.Sprintf("worker.Worker[%s]", msgType),
+		wtype:   fmt.Sprintf("worker.Worker[%s]", msgType),
 	}

-	// Log new worker creation with type prefix
+	// Log new worker creation with worker type prefix
 	log.Infof("%s created with workers=%d queue=%d",
-		w.prefix,
+		w.wtype,
 		workers,
 		workers*queueRatio,
 	)
@ -76,7 +77,7 @@ func NewWorkerPool[MsgType any](workers int, queueRatio int) *WorkerPool[MsgType

 // Start will attempt to start the underlying worker pool, or return error.
 func (w *WorkerPool[MsgType]) Start() error {
-	log.Infof("%s starting", w.prefix)
+	log.Infof("%s starting", w.wtype)

 	// Check processor was set
 	if w.process == nil {
@ -93,7 +94,7 @@ func (w *WorkerPool[MsgType]) Start() error {

 // Stop will attempt to stop the underlying worker pool, or return error.
 func (w *WorkerPool[MsgType]) Stop() error {
-	log.Infof("%s stopping", w.prefix)
+	log.Infof("%s stopping", w.wtype)

 	// Attempt to stop pool
 	if !w.workers.Stop() {
@ -106,19 +107,34 @@ func (w *WorkerPool[MsgType]) Stop() error {
 // SetProcessor will set the Worker's processor function, which is called for each queued message.
 func (w *WorkerPool[MsgType]) SetProcessor(fn func(context.Context, MsgType) error) {
 	if w.process != nil {
-		log.Fatalf("%s Worker.process is already set", w.prefix)
+		log.Panicf("%s Worker.process is already set", w.wtype)
 	}
 	w.process = fn
 }

 // Queue will queue provided message to be processed with there's a free worker.
 func (w *WorkerPool[MsgType]) Queue(msg MsgType) {
-	log.Tracef("%s queueing message (queue=%d): %+v",
-		w.prefix, w.workers.Queue(), msg,
-	)
-	w.workers.Enqueue(func(ctx context.Context) {
+	log.Tracef("%s queueing message: %+v", w.wtype, msg)
+
+	// Create new process function for msg
+	process := func(ctx context.Context) {
 		if err := w.process(ctx, msg); err != nil {
-			log.Errorf("%s %v", w.prefix, err)
+			log.WithFields(kv.Fields{
+				kv.Field{K: "type", V: w.wtype},
+				kv.Field{K: "error", V: err},
+			}...).Error("message processing error")
 		}
-	})
+	}
+
+	// Attempt a fast-enqueue of process
+	if !w.workers.EnqueueNow(process) {
+		// No spot acquired, log warning
+		log.WithFields(kv.Fields{
+			kv.Field{K: "type", V: w.wtype},
+			kv.Field{K: "queue", V: w.workers.Queue()},
+		}...).Warn("full worker queue")
+
+		// Block on enqueuing process func
+		w.workers.Enqueue(process)
+	}
 }
--- a/internal/httpclient/client.go
+++ b/internal/httpclient/client.go
@ -26,6 +26,11 @@ import (
 	"net/netip"
 	"runtime"
 	"time"
+
+	"codeberg.org/gruf/go-bytesize"
+	"codeberg.org/gruf/go-kv"
+	"github.com/cornelk/hashmap"
+	"github.com/superseriousbusiness/gotosocial/internal/log"
 )

 // ErrInvalidRequest is returned if a given HTTP request is invalid and cannot be performed.
@ -42,8 +47,8 @@ var ErrBodyTooLarge = errors.New("body size too large")
 // configuration values passed to initialized http.Transport{}
 // and http.Client{}, along with httpclient.Client{} specific.
 type Config struct {
-	// MaxOpenConns limits the max number of concurrent open connections.
-	MaxOpenConns int
+	// MaxOpenConnsPerHost limits the max number of open connections to a host.
+	MaxOpenConnsPerHost int

 	// MaxIdleConns: see http.Transport{}.MaxIdleConns.
 	MaxIdleConns int
@ -80,8 +85,9 @@ type Config struct {
 //     is available (context channels still respected)
 type Client struct {
 	client http.Client
-	rc     *requestQueue
-	bmax   int64
+	queue  *hashmap.Map[string, chan struct{}]
+	bmax   int64 // max response body size
+	cmax   int   // max open conns per host
 }

 // New returns a new instance of Client initialized using configuration.
@ -94,20 +100,20 @@ func New(cfg Config) *Client {
 		Resolver:  &net.Resolver{},
 	}

-	if cfg.MaxOpenConns <= 0 {
+	if cfg.MaxOpenConnsPerHost <= 0 {
 		// By default base this value on GOMAXPROCS.
 		maxprocs := runtime.GOMAXPROCS(0)
-		cfg.MaxOpenConns = maxprocs * 10
+		cfg.MaxOpenConnsPerHost = maxprocs * 20
 	}

 	if cfg.MaxIdleConns <= 0 {
 		// By default base this value on MaxOpenConns
-		cfg.MaxIdleConns = cfg.MaxOpenConns * 10
+		cfg.MaxIdleConns = cfg.MaxOpenConnsPerHost * 10
 	}

 	if cfg.MaxBodySize <= 0 {
 		// By default set this to a reasonable 40MB
-		cfg.MaxBodySize = 40 * 1024 * 1024
+		cfg.MaxBodySize = int64(40 * bytesize.MiB)
 	}

 	// Protect dialer with IP range sanitizer
@ -117,11 +123,10 @@ func New(cfg Config) *Client {
 	}).Sanitize

 	// Prepare client fields
-	c.bmax = cfg.MaxBodySize
-	c.rc = &requestQueue{
-		maxOpenConns: cfg.MaxOpenConns,
-	}
 	c.client.Timeout = cfg.Timeout
+	c.cmax = cfg.MaxOpenConnsPerHost
+	c.bmax = cfg.MaxBodySize
+	c.queue = hashmap.New[string, chan struct{}]()

 	// Set underlying HTTP client roundtripper
 	c.client.Transport = &http.Transport{
@ -145,17 +150,16 @@ func New(cfg Config) *Client {
 // as the standard http.Client{}.Do() implementation except that response body will
 // be wrapped by an io.LimitReader() to limit response body sizes.
 func (c *Client) Do(req *http.Request) (*http.Response, error) {
-	// request a spot in the wait queue...
-	wait, release := c.rc.getWaitSpot(req.Host, req.Method)
+	// Get host's wait queue
+	wait := c.wait(req.Host)
+
+	var ok bool

-	// ... and wait our turn
 	select {
-	case <-req.Context().Done():
-		// the request was canceled before we
-		// got to our turn: no need to release
-		return nil, req.Context().Err()
+	// Quickly try grab a spot
 	case wait <- struct{}{}:
 		// it's our turn!
+		ok = true

 		// NOTE:
 		// Ideally here we would set the slot release to happen either
@ -167,7 +171,27 @@ func (c *Client) Do(req *http.Request) (*http.Response, error) {
 		// that connections may not be closed until response body is closed.
 		// The current implementation will reduce the viability of denial of
 		// service attacks, but if there are future issues heed this advice :]
-		defer release()
+		defer func() { <-wait }()
+	default:
+	}
+
+	if !ok {
+		// No spot acquired, log warning
+		log.WithFields(kv.Fields{
+			{K: "queue", V: len(wait)},
+			{K: "method", V: req.Method},
+			{K: "host", V: req.Host},
+			{K: "uri", V: req.URL.RequestURI()},
+		}...).Warn("full request queue")
+
+		select {
+		case <-req.Context().Done():
+			// the request was canceled before we
+			// got to our turn: no need to release
+			return nil, req.Context().Err()
+		case wait <- struct{}{}:
+			defer func() { <-wait }()
+		}
 	}

 	// Firstly, ensure this is a valid request
@ -208,3 +232,17 @@ func (c *Client) Do(req *http.Request) (*http.Response, error) {

 	return rsp, nil
 }
+
+// wait acquires the 'wait' queue for the given host string, or allocates new.
+func (c *Client) wait(host string) chan struct{} {
+	// Look for an existing queue
+	queue, ok := c.queue.Get(host)
+	if ok {
+		return queue
+	}
+
+	// Allocate a new host queue (or return a sneaky existing one).
+	queue, _ = c.queue.GetOrInsert(host, make(chan struct{}, c.cmax))
+
+	return queue
+}
--- a/internal/httpclient/queue.go
+++ b/internal/httpclient/queue.go
@ -1,68 +0,0 @@
-/*
-   GoToSocial
-   Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
-
-   This program is free software: you can redistribute it and/or modify
-   it under the terms of the GNU Affero General Public License as published by
-   the Free Software Foundation, either version 3 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU Affero General Public License for more details.
-
-   You should have received a copy of the GNU Affero General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-package httpclient
-
-import (
-	"strings"
-	"sync"
-
-	"github.com/superseriousbusiness/gotosocial/internal/log"
-)
-
-type requestQueue struct {
-	hostQueues   sync.Map // map of `hostQueue`
-	maxOpenConns int      // max open conns per host per request method
-}
-
-type hostQueue struct {
-	slotsByMethod sync.Map
-}
-
-// getWaitSpot returns a wait channel and release function for http clients
-// that want to do requests politely: that is, wait for their turn.
-//
-// To wait, a caller should do a select on an attempted insert into the
-// returned wait channel. Once the insert succeeds, then the caller should
-// proceed with the http request that pertains to the given host + method.
-// It doesn't matter what's put into the wait channel, just any interface{}.
-//
-// When the caller is finished with their http request, they should free up the
-// slot they were occupying in the wait queue, by calling the release function.
-//
-// The reason for the caller needing to provide host and method, is that each
-// remote host has a separate wait queue, and there's a separate wait queue
-// per method for that host as well. This ensures that outgoing requests can still
-// proceed for others hosts and methods while other requests are undergoing,
-// while also preventing one host from being spammed with, for example, a
-// shitload of GET requests all at once.
-func (rc *requestQueue) getWaitSpot(host string, method string) (wait chan<- interface{}, release func()) {
-	hostQueueI, _ := rc.hostQueues.LoadOrStore(host, new(hostQueue))
-	hostQueue, ok := hostQueueI.(*hostQueue)
-	if !ok {
-		log.Panic("hostQueueI was not a *hostQueue")
-	}
-
-	waitSlotI, _ := hostQueue.slotsByMethod.LoadOrStore(strings.ToUpper(method), make(chan interface{}, rc.maxOpenConns))
-	methodQueue, ok := waitSlotI.(chan interface{})
-	if !ok {
-		log.Panic("waitSlotI was not a chan interface{}")
-	}
-
-	return methodQueue, func() { <-methodQueue }
-}
--- a/internal/httpclient/queue_test.go
+++ b/internal/httpclient/queue_test.go
@ -1,106 +0,0 @@
-/*
-   GoToSocial
-   Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
-
-   This program is free software: you can redistribute it and/or modify
-   it under the terms of the GNU Affero General Public License as published by
-   the Free Software Foundation, either version 3 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU Affero General Public License for more details.
-
-   You should have received a copy of the GNU Affero General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-package httpclient
-
-import (
-	"net/http"
-	"testing"
-	"time"
-
-	"github.com/stretchr/testify/suite"
-)
-
-type QueueTestSuite struct {
-	suite.Suite
-}
-
-func (suite *QueueTestSuite) TestQueue() {
-	maxOpenConns := 5
-	waitTimeout := 1 * time.Second
-
-	rc := &requestQueue{
-		maxOpenConns: maxOpenConns,
-	}
-
-	// fill all the open connections
-	var release func()
-	for i, n := range make([]interface{}, maxOpenConns) {
-		w, r := rc.getWaitSpot("example.org", http.MethodPost)
-		w <- n
-		if i == maxOpenConns-1 {
-			// save the last release function
-			release = r
-		}
-	}
-
-	// try to wait again for the same host/method combo, it should timeout
-	waitAgain, _ := rc.getWaitSpot("example.org", "post")
-
-	select {
-	case waitAgain <- struct{}{}:
-		suite.FailNow("first wait did not time out")
-	case <-time.After(waitTimeout):
-		break
-	}
-
-	// now close the final release that we derived earlier
-	release()
-
-	// try waiting again, it should work this time
-	select {
-	case waitAgain <- struct{}{}:
-		break
-	case <-time.After(waitTimeout):
-		suite.FailNow("second wait timed out")
-	}
-
-	// the POST queue is now sitting on full
-	suite.Len(waitAgain, maxOpenConns)
-
-	// we should still be able to make a GET for the same host though
-	getWait, getRelease := rc.getWaitSpot("example.org", http.MethodGet)
-	select {
-	case getWait <- struct{}{}:
-		break
-	case <-time.After(waitTimeout):
-		suite.FailNow("get wait timed out")
-	}
-
-	// the GET queue has one request waiting
-	suite.Len(getWait, 1)
-	// clear it...
-	getRelease()
-	suite.Empty(getWait)
-
-	// even though the POST queue for example.org is full, we
-	// should still be able to make a POST request to another host :)
-	waitForAnotherHost, _ := rc.getWaitSpot("somewhere.else", http.MethodPost)
-	select {
-	case waitForAnotherHost <- struct{}{}:
-		break
-	case <-time.After(waitTimeout):
-		suite.FailNow("get wait timed out")
-	}
-
-	suite.Len(waitForAnotherHost, 1)
-}
-
-func TestQueueTestSuite(t *testing.T) {
-	suite.Run(t, &QueueTestSuite{})
-}