[bugfix] Tidy up rss feed serving; don't error on empty feed (#1970)

* [bugfix] Tidy up rss feed serving; don't error on empty feed

* fall back to account creation time as rss feed update time

* return feed early when account has no eligible statuses
This commit is contained in:
tobi 2023-07-10 17:05:59 +02:00 committed by GitHub
parent a29b5affc8
commit ca5492b65f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 267 additions and 126 deletions

View File

@ -27,82 +27,151 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
const rssFeedLength = 20
const (
rssFeedLength = 20
)
type GetRSSFeed func() (string, gtserror.WithCode)
// GetRSSFeedForUsername returns a function to return the RSS feed of a local account
// with the given username, and the last-modified time (time that the account last
// posted a status eligible to be included in the rss feed).
//
// To save db calls, callers to this function should only call the returned GetRSSFeed
// func if the last-modified time is newer than the last-modified time they have cached.
//
// If the account has not yet posted an RSS-eligible status, the returned last-modified
// time will be zero, and the GetRSSFeed func will return a valid RSS xml with no items.
func (p *Processor) GetRSSFeedForUsername(ctx context.Context, username string) (GetRSSFeed, time.Time, gtserror.WithCode) {
var (
never = time.Time{}
)
// GetRSSFeedForUsername returns RSS feed for the given local username.
func (p *Processor) GetRSSFeedForUsername(ctx context.Context, username string) (func() (string, gtserror.WithCode), time.Time, gtserror.WithCode) {
account, err := p.state.DB.GetAccountByUsernameDomain(ctx, username, "")
if err != nil {
if err == db.ErrNoEntries {
return nil, time.Time{}, gtserror.NewErrorNotFound(errors.New("GetRSSFeedForUsername: account not found"))
}
return nil, time.Time{}, gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error: %s", err))
if errors.Is(err, db.ErrNoEntries) {
// Simply no account with this username.
err = gtserror.New("account not found")
return nil, never, gtserror.NewErrorNotFound(err)
}
// Real db error.
err = gtserror.Newf("db error getting account %s: %w", username, err)
return nil, never, gtserror.NewErrorInternalError(err)
}
// Ensure account has rss feed enabled.
if !*account.EnableRSS {
return nil, time.Time{}, gtserror.NewErrorNotFound(errors.New("GetRSSFeedForUsername: account RSS feed not enabled"))
err = gtserror.New("account RSS feed not enabled")
return nil, never, gtserror.NewErrorNotFound(err)
}
lastModified, err := p.state.DB.GetAccountLastPosted(ctx, account.ID, true)
if err != nil {
return nil, time.Time{}, gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error: %s", err))
// LastModified time is needed by callers to check freshness for cacheing.
// This might be a zero time.Time if account has never posted a status that's
// eligible to appear in the RSS feed; that's fine.
lastPostAt, err := p.state.DB.GetAccountLastPosted(ctx, account.ID, true)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
err = gtserror.Newf("db error getting account %s last posted: %w", username, err)
return nil, never, gtserror.NewErrorInternalError(err)
}
return func() (string, gtserror.WithCode) {
statuses, err := p.state.DB.GetAccountWebStatuses(ctx, account.ID, rssFeedLength, "")
if err != nil && err != db.ErrNoEntries {
return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error: %s", err))
}
// Assemble author namestring once only.
author := "@" + account.Username + "@" + config.GetAccountDomain()
title := "Posts from " + author
description := "Posts from " + author
link := &feeds.Link{Href: account.URL}
var image *feeds.Image
if account.AvatarMediaAttachmentID != "" {
if account.AvatarMediaAttachment == nil {
avatar, err := p.state.DB.GetAttachmentByID(ctx, account.AvatarMediaAttachmentID)
if err != nil {
return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error fetching avatar attachment: %s", err))
}
account.AvatarMediaAttachment = avatar
}
image = &feeds.Image{
Url: account.AvatarMediaAttachment.Thumbnail.URL,
Title: "Avatar for " + author,
Link: account.URL,
}
// Derive image/thumbnail for this account (may be nil).
image, errWithCode := p.rssImageForAccount(ctx, account, author)
if errWithCode != nil {
return "", errWithCode
}
feed := &feeds.Feed{
Title: title,
Description: description,
Link: link,
Title: "Posts from " + author,
Description: "Posts from " + author,
Link: &feeds.Link{Href: account.URL},
Image: image,
}
for i, s := range statuses {
// take the date of the first (ie., latest) status as feed updated value
if i == 0 {
feed.Updated = s.UpdatedAt
// If the account has never posted anything, just use
// account creation time as Updated value for the feed;
// we could use time.Now() here but this would likely
// mess up cacheing; we want something determinate.
//
// We can also return early rather than wasting a db call,
// since we already know there's no eligible statuses.
if lastPostAt.IsZero() {
feed.Updated = account.CreatedAt
return stringifyFeed(feed)
}
item, err := p.tc.StatusToRSSItem(ctx, s)
// Account has posted at least one status that's
// eligible to appear in the RSS feed.
//
// Reuse the lastPostAt value for feed.Updated.
feed.Updated = lastPostAt
// Retrieve latest statuses as they'd be shown on the web view of the account profile.
statuses, err := p.state.DB.GetAccountWebStatuses(ctx, account.ID, rssFeedLength, "")
if err != nil && !errors.Is(err, db.ErrNoEntries) {
err = fmt.Errorf("db error getting account web statuses: %w", err)
return "", gtserror.NewErrorInternalError(err)
}
// Add each status to the rss feed.
for _, status := range statuses {
item, err := p.tc.StatusToRSSItem(ctx, status)
if err != nil {
return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: error converting status to feed item: %s", err))
err = gtserror.Newf("error converting status to feed item: %w", err)
return "", gtserror.NewErrorInternalError(err)
}
feed.Add(item)
}
return stringifyFeed(feed)
}, lastPostAt, nil
}
func (p *Processor) rssImageForAccount(ctx context.Context, account *gtsmodel.Account, author string) (*feeds.Image, gtserror.WithCode) {
if account.AvatarMediaAttachmentID == "" {
// No image, no problem!
return nil, nil
}
// Ensure account avatar attachment populated.
if account.AvatarMediaAttachment == nil {
var err error
account.AvatarMediaAttachment, err = p.state.DB.GetAttachmentByID(ctx, account.AvatarMediaAttachmentID)
if err != nil {
if errors.Is(err, db.ErrNoEntries) {
// No attachment found with this ID (race condition?).
return nil, nil
}
// Real db error.
err = gtserror.Newf("db error fetching avatar media attachment: %w", err)
return nil, gtserror.NewErrorInternalError(err)
}
}
return &feeds.Image{
Url: account.AvatarMediaAttachment.Thumbnail.URL,
Title: "Avatar for " + author,
Link: account.URL,
}, nil
}
func stringifyFeed(feed *feeds.Feed) (string, gtserror.WithCode) {
// Stringify the feed. Even with no statuses,
// this will still produce valid rss xml.
rss, err := feed.ToRss()
if err != nil {
return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: error converting feed to rss string: %s", err))
err := gtserror.Newf("error converting feed to rss string: %w", err)
return "", gtserror.NewErrorInternalError(err)
}
return rss, nil
}, lastModified, nil
}

View File

@ -55,6 +55,34 @@ func (suite *GetRSSTestSuite) TestGetAccountRSSZork() {
suite.Equal("<?xml version=\"1.0\" encoding=\"UTF-8\"?><rss version=\"2.0\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\">\n <channel>\n <title>Posts from @the_mighty_zork@localhost:8080</title>\n <link>http://localhost:8080/@the_mighty_zork</link>\n <description>Posts from @the_mighty_zork@localhost:8080</description>\n <pubDate>Wed, 20 Oct 2021 10:40:37 +0000</pubDate>\n <lastBuildDate>Wed, 20 Oct 2021 10:40:37 +0000</lastBuildDate>\n <image>\n <url>http://localhost:8080/fileserver/01F8MH1H7YV1Z7D2C8K2730QBF/avatar/small/01F8MH58A357CV5K7R7TJMSH6S.jpg</url>\n <title>Avatar for @the_mighty_zork@localhost:8080</title>\n <link>http://localhost:8080/@the_mighty_zork</link>\n </image>\n <item>\n <title>introduction post</title>\n <link>http://localhost:8080/@the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY</link>\n <description>@the_mighty_zork@localhost:8080 made a new post: &#34;hello everyone!&#34;</description>\n <content:encoded><![CDATA[hello everyone!]]></content:encoded>\n <author>@the_mighty_zork@localhost:8080</author>\n <guid>http://localhost:8080/@the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY</guid>\n <pubDate>Wed, 20 Oct 2021 10:40:37 +0000</pubDate>\n <source>http://localhost:8080/@the_mighty_zork/feed.rss</source>\n </item>\n </channel>\n</rss>", feed)
}
func (suite *GetRSSTestSuite) TestGetAccountRSSZorkNoPosts() {
ctx := context.Background()
// Get all of zork's posts.
statuses, err := suite.db.GetAccountStatuses(ctx, suite.testAccounts["local_account_1"].ID, 0, false, false, "", "", false, false)
if err != nil {
suite.FailNow(err.Error())
}
// Now delete them! Hahaha!
for _, status := range statuses {
if err := suite.db.DeleteStatusByID(ctx, status.ID); err != nil {
suite.FailNow(err.Error())
}
}
getFeed, lastModified, err := suite.accountProcessor.GetRSSFeedForUsername(ctx, "the_mighty_zork")
suite.NoError(err)
suite.Empty(lastModified)
feed, err := getFeed()
suite.NoError(err)
fmt.Println(feed)
suite.Equal("<?xml version=\"1.0\" encoding=\"UTF-8\"?><rss version=\"2.0\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\">\n <channel>\n <title>Posts from @the_mighty_zork@localhost:8080</title>\n <link>http://localhost:8080/@the_mighty_zork</link>\n <description>Posts from @the_mighty_zork@localhost:8080</description>\n <pubDate>Fri, 20 May 2022 11:09:18 +0000</pubDate>\n <lastBuildDate>Fri, 20 May 2022 11:09:18 +0000</lastBuildDate>\n <image>\n <url>http://localhost:8080/fileserver/01F8MH1H7YV1Z7D2C8K2730QBF/avatar/small/01F8MH58A357CV5K7R7TJMSH6S.jpg</url>\n <title>Avatar for @the_mighty_zork@localhost:8080</title>\n <link>http://localhost:8080/@the_mighty_zork</link>\n </image>\n </channel>\n</rss>", feed)
}
func TestGetRSSTestSuite(t *testing.T) {
suite.Run(t, new(GetRSSTestSuite))
}

View File

@ -19,8 +19,6 @@ package web
import (
"bytes"
"errors"
"fmt"
"net/http"
"strings"
"time"
@ -31,86 +29,50 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/log"
)
const appRSSUTF8 = string(apiutil.AppRSSXML + "; charset=utf-8")
func (m *Module) GetRSSETag(urlPath string, lastModified time.Time, getRSSFeed func() (string, gtserror.WithCode)) (string, error) {
if cachedETag, ok := m.eTagCache.Get(urlPath); ok && !lastModified.After(cachedETag.lastModified) {
// only return our cached etag if the file wasn't
// modified since last time, otherwise generate a
// new one; eat fresh!
return cachedETag.eTag, nil
}
rssFeed, errWithCode := getRSSFeed()
if errWithCode != nil {
return "", fmt.Errorf("error getting rss feed: %s", errWithCode)
}
eTag, err := generateEtag(bytes.NewReader([]byte(rssFeed)))
if err != nil {
return "", fmt.Errorf("error generating etag: %s", err)
}
// put new entry in cache before we return
m.eTagCache.Set(urlPath, eTagCacheEntry{
eTag: eTag,
lastModified: lastModified,
})
return eTag, nil
}
func extractIfModifiedSince(r *http.Request) time.Time {
hdr := r.Header.Get(ifModifiedSinceHeader)
if hdr == "" {
return time.Time{}
}
t, err := http.ParseTime(hdr)
if err != nil {
log.Errorf(r.Context(), "couldn't parse if-modified-since %s: %s", hdr, err)
return time.Time{}
}
return t
}
const appRSSUTF8 = string(apiutil.AppRSSXML) + "; charset=utf-8"
func (m *Module) rssFeedGETHandler(c *gin.Context) {
// set this Cache-Control header to instruct clients to validate the response with us
// before each reuse (https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control)
c.Header(cacheControlHeader, cacheControlNoCache)
ctx := c.Request.Context()
if _, err := apiutil.NegotiateAccept(c, apiutil.AppRSSXML); err != nil {
apiutil.WebErrorHandler(c, gtserror.NewErrorNotAcceptable(err, err.Error()), m.processor.InstanceGetV1)
return
}
// usernames on our instance will always be lowercase
username := strings.ToLower(c.Param(usernameKey))
if username == "" {
err := errors.New("no account username specified")
apiutil.WebErrorHandler(c, gtserror.NewErrorBadRequest(err, err.Error()), m.processor.InstanceGetV1)
return
}
ifNoneMatch := c.Request.Header.Get(ifNoneMatchHeader)
ifModifiedSince := extractIfModifiedSince(c.Request)
getRssFeed, accountLastPostedPublic, errWithCode := m.processor.Account().GetRSSFeedForUsername(ctx, username)
// Fetch + normalize username from URL.
username, errWithCode := apiutil.ParseWebUsername(c.Param(apiutil.WebUsernameKey))
if errWithCode != nil {
apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
return
}
var rssFeed string
cacheKey := c.Request.URL.Path
cacheEntry, ok := m.eTagCache.Get(cacheKey)
// Usernames on our instance will always be lowercase.
//
// todo: https://github.com/superseriousbusiness/gotosocial/issues/1813
username = strings.ToLower(username)
if !ok || cacheEntry.lastModified.Before(accountLastPostedPublic) {
// we either have no cache entry for this, or we have an expired cache entry; generate a new one
rssFeed, errWithCode = getRssFeed()
// Retrieve the getRSSFeed function from the processor.
// We'll only call the function if we need to, to save db calls.
// lastPostAt may be a zero time if account has never posted.
getRSSFeed, lastPostAt, errWithCode := m.processor.Account().GetRSSFeedForUsername(c.Request.Context(), username)
if errWithCode != nil {
apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
return
}
var (
rssFeed string // Stringified rss feed.
cacheKey = c.Request.URL.Path
cacheEntry, wasCached = m.eTagCache.Get(cacheKey)
)
if !wasCached || unixAfter(lastPostAt, cacheEntry.lastModified) {
// We either have no ETag cache entry for this account's feed,
// or we have an expired cache entry (account has posted since
// the cache entry was last generated).
//
// As such, we need to generate a new ETag, and for that we need
// the string representation of the RSS feed.
rssFeed, errWithCode = getRSSFeed()
if errWithCode != nil {
apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
return
@ -122,29 +84,73 @@ func (m *Module) rssFeedGETHandler(c *gin.Context) {
return
}
cacheEntry.lastModified = accountLastPostedPublic
cacheEntry.eTag = eTag
// We never want lastModified to be zero, so if account
// has never actually posted anything, just use Now as
// the lastModified time instead for cache control.
var lastModified time.Time
if lastPostAt.IsZero() {
lastModified = time.Now()
} else {
lastModified = lastPostAt
}
// Store the new cache entry.
cacheEntry = eTagCacheEntry{
eTag: eTag,
lastModified: lastModified,
}
m.eTagCache.Set(cacheKey, cacheEntry)
}
// Set 'ETag' and 'Last-Modified' headers no matter what;
// even if we return 304 in the next checks, caller may
// want to cache these header values.
c.Header(eTagHeader, cacheEntry.eTag)
c.Header(lastModifiedHeader, accountLastPostedPublic.Format(http.TimeFormat))
c.Header(lastModifiedHeader, cacheEntry.lastModified.Format(http.TimeFormat))
// Instruct caller to validate the response with us before
// each reuse, so that the 'ETag' and 'Last-Modified' headers
// actually take effect.
//
// "The no-cache response directive indicates that the response
// can be stored in caches, but the response must be validated
// with the origin server before each reuse, even when the cache
// is disconnected from the origin server."
//
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
c.Header(cacheControlHeader, cacheControlNoCache)
// Check if caller submitted an ETag via 'If-None-Match'.
// If they did + it matches what we have, that means they've
// already seen the latest version of this feed, so just bail.
ifNoneMatch := c.Request.Header.Get(ifNoneMatchHeader)
if ifNoneMatch == cacheEntry.eTag {
c.AbortWithStatus(http.StatusNotModified)
return
}
lmUnix := cacheEntry.lastModified.Unix()
imsUnix := ifModifiedSince.Unix()
if lmUnix <= imsUnix {
// Check if the caller submitted a time via 'If-Modified-Since'.
// If they did, and our cached ETag entry is not newer than the
// given time, this means the caller has already seen the latest
// version of this feed, so just bail.
ifModifiedSince := extractIfModifiedSince(c.Request)
if !ifModifiedSince.IsZero() &&
!unixAfter(cacheEntry.lastModified, ifModifiedSince) {
c.AbortWithStatus(http.StatusNotModified)
return
}
// At this point we know that the client wants the newest
// representation of the RSS feed, either because they didn't
// submit any 'If-None-Match' / 'If-Modified-Since' cache headers,
// or because they did but the account has posted more recently
// than the values of the submitted headers would suggest.
//
// If we had a cache hit earlier, we may not have called the
// getRSSFeed function yet; if that's the case then do call it
// now because we definitely need it.
if rssFeed == "" {
// we had a cache entry already so we didn't call to get the rss feed yet
rssFeed, errWithCode = getRssFeed()
rssFeed, errWithCode = getRSSFeed()
if errWithCode != nil {
apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
return
@ -153,3 +159,41 @@ func (m *Module) rssFeedGETHandler(c *gin.Context) {
c.Data(http.StatusOK, appRSSUTF8, []byte(rssFeed))
}
// unixAfter returns true if the unix value of t1
// is greater than (ie., after) the unix value of t2.
func unixAfter(t1 time.Time, t2 time.Time) bool {
if t1.IsZero() {
// if t1 is zero then it cannot
// possibly be greater than t2.
return false
}
if t2.IsZero() {
// t1 is not zero but t2 is,
// so t1 is necessarily greater.
return true
}
return t1.Unix() > t2.Unix()
}
// extractIfModifiedSince parses a time.Time from the
// 'If-Modified-Since' header of the given request.
//
// If no time was provided, or the provided time was
// not parseable, it will return a zero time.
func extractIfModifiedSince(r *http.Request) time.Time {
imsStr := r.Header.Get(ifModifiedSinceHeader)
if imsStr == "" {
return time.Time{} // Nothing set.
}
ifModifiedSince, err := http.ParseTime(imsStr)
if err != nil {
log.Errorf(r.Context(), "couldn't parse %s value '%s' as time: %q", ifModifiedSinceHeader, imsStr, err)
return time.Time{}
}
return ifModifiedSince
}