mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[bugfix] Ensure InReplyToID
set properly, update dereference ancestors func (#1921)
This commit is contained in:
@@ -19,6 +19,8 @@ package dereferencing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"codeberg.org/gruf/go-kv"
|
||||
@@ -26,96 +28,184 @@ import (
|
||||
"github.com/superseriousbusiness/activity/streams/vocab"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/ap"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/db"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/uris"
|
||||
)
|
||||
|
||||
// maxIter defines how many iterations of descendants or
|
||||
// ancesters we are willing to follow before returning error.
|
||||
const maxIter = 1000
|
||||
|
||||
// dereferenceThread will dereference statuses both above and below the given status in a thread, it returns no error and is intended to be called asychronously.
|
||||
func (d *deref) dereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {
|
||||
// Ensure that ancestors have been fully dereferenced
|
||||
if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil {
|
||||
log.Error(ctx, err) // log entry and error will include caller prefixes
|
||||
if err := d.DereferenceStatusAncestors(ctx, username, status); err != nil {
|
||||
log.Error(ctx, err)
|
||||
}
|
||||
|
||||
// Ensure that descendants have been fully dereferenced
|
||||
if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil {
|
||||
log.Error(ctx, err) // log entry and error will include caller prefixes
|
||||
if err := d.DereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil {
|
||||
log.Error(ctx, err)
|
||||
}
|
||||
}
|
||||
|
||||
// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
|
||||
func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error {
|
||||
// Take ref to original
|
||||
ogIRI := status.URI
|
||||
|
||||
// Start log entry with fields
|
||||
l := log.WithContext(ctx).
|
||||
WithFields(kv.Fields{
|
||||
{"username", username},
|
||||
{"statusIRI", ogIRI},
|
||||
}...)
|
||||
|
||||
// Log function start
|
||||
l.Trace("beginning")
|
||||
func (d *deref) DereferenceStatusAncestors(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
status *gtsmodel.Status,
|
||||
) error {
|
||||
// Mark given status as the one
|
||||
// we're currently working on.
|
||||
var current = status
|
||||
|
||||
for i := 0; i < maxIter; i++ {
|
||||
if status.InReplyToURI == "" {
|
||||
// status doesn't reply to anything
|
||||
if current.InReplyToURI == "" {
|
||||
// Status has no parent, we've
|
||||
// reached the top of the chain.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse this status's replied IRI
|
||||
replyIRI, err := url.Parse(status.InReplyToURI)
|
||||
if err != nil {
|
||||
return gtserror.Newf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err)
|
||||
l := log.
|
||||
WithContext(ctx).
|
||||
WithFields(kv.Fields{
|
||||
{"username", username},
|
||||
{"originalStatusIRI", status.URI},
|
||||
{"currentStatusURI", current.URI},
|
||||
{"currentInReplyToURI", current.InReplyToURI},
|
||||
}...)
|
||||
|
||||
if current.InReplyToID != "" {
|
||||
// We already have an InReplyToID set. This means
|
||||
// the status's parent has, at some point, been
|
||||
// inserted into the database, either because it
|
||||
// is a status from our instance, or a status from
|
||||
// remote that we've dereferenced before, or found
|
||||
// out about in some other way.
|
||||
//
|
||||
// Working on this assumption, check if the parent
|
||||
// status exists, either as a copy pinned on the
|
||||
// current status, or in the database.
|
||||
|
||||
if current.InReplyTo != nil {
|
||||
// We have the parent already, and the child
|
||||
// doesn't need to be updated; keep iterating
|
||||
// from this parent upwards.
|
||||
current = current.InReplyTo
|
||||
continue
|
||||
}
|
||||
|
||||
// Parent isn't pinned to this status (yet), see
|
||||
// if we can get it from the db (we should be
|
||||
// able to, since it has an ID already).
|
||||
parent, err := d.state.DB.GetStatusByID(
|
||||
gtscontext.SetBarebones(ctx),
|
||||
current.InReplyToID,
|
||||
)
|
||||
if err != nil && !errors.Is(err, db.ErrNoEntries) {
|
||||
// Real db error, stop.
|
||||
return gtserror.Newf("db error getting status %s: %w", current.InReplyToID, err)
|
||||
}
|
||||
|
||||
if parent != nil {
|
||||
// We got the parent from the db, and the child
|
||||
// doesn't need to be updated; keep iterating
|
||||
// from this parent upwards.
|
||||
current.InReplyTo = parent
|
||||
current = parent
|
||||
continue
|
||||
}
|
||||
|
||||
// If we arrive here, we know this child *did* have
|
||||
// a parent at some point, but it no longer exists in
|
||||
// the database, presumably because it's been deleted
|
||||
// by another action.
|
||||
//
|
||||
// TODO: clean this up in a nightly task.
|
||||
l.Warnf("current status has been orphaned (parent %s no longer exists in database)", current.InReplyToID)
|
||||
return nil // Cannot iterate further.
|
||||
}
|
||||
|
||||
if replyIRI.Host == config.GetHost() {
|
||||
l.Tracef("following local status ancestors: %s", status.InReplyToURI)
|
||||
// If we reach this point, we know the status has
|
||||
// an InReplyToURI set, but it doesn't yet have an
|
||||
// InReplyToID, which means that the parent status
|
||||
// has not yet been dereferenced.
|
||||
inReplyToURI, err := url.Parse(current.InReplyToURI)
|
||||
if err != nil || inReplyToURI == nil {
|
||||
// Parent URI is not something we can handle.
|
||||
l.Debug("current status has been orphaned (invalid InReplyToURI)")
|
||||
return nil //nolint:nilerr
|
||||
}
|
||||
|
||||
// This is our status, extract ID from path
|
||||
_, id, err := uris.ParseStatusesPath(replyIRI)
|
||||
if err != nil {
|
||||
return gtserror.Newf("invalid local status IRI %q: %w", status.InReplyToURI, err)
|
||||
// Parent URI is valid, try to get it.
|
||||
// getStatusByURI guards against the following conditions:
|
||||
//
|
||||
// - remote domain is blocked (will return unretrievable)
|
||||
// - domain is local (will try to return something, or
|
||||
// return unretrievable).
|
||||
parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI)
|
||||
if err == nil {
|
||||
// We successfully fetched the parent.
|
||||
// Update current status with new info.
|
||||
current.InReplyToID = parent.ID
|
||||
current.InReplyToAccountID = parent.AccountID
|
||||
if err := d.state.DB.UpdateStatus(
|
||||
ctx, current,
|
||||
"in_reply_to_id",
|
||||
"in_reply_to_account_id",
|
||||
); err != nil {
|
||||
return gtserror.Newf("db error updating status %s: %w", current.ID, err)
|
||||
}
|
||||
|
||||
// Fetch this status from the database
|
||||
localStatus, err := d.state.DB.GetStatusByID(ctx, id)
|
||||
if err != nil {
|
||||
return gtserror.Newf("error fetching local status %q: %w", id, err)
|
||||
// Mark parent as next status to
|
||||
// work on, and keep iterating.
|
||||
current = parent
|
||||
continue
|
||||
}
|
||||
|
||||
// We could not fetch the parent, check if we can do anything
|
||||
// useful with the error. For example, HTTP status code returned
|
||||
// from remote may indicate that the parent has been deleted.
|
||||
switch code := gtserror.StatusCode(err); {
|
||||
case code == http.StatusGone || code == http.StatusNotFound:
|
||||
// 410 means the status has definitely been deleted.
|
||||
// 404 means the status has *probably* been deleted.
|
||||
// Update this status to reflect that, then bail.
|
||||
l.Debugf("current status has been orphaned (call to parent returned code %d)", code)
|
||||
|
||||
current.InReplyToURI = ""
|
||||
if err := d.state.DB.UpdateStatus(
|
||||
ctx, current,
|
||||
"in_reply_to_uri",
|
||||
); err != nil {
|
||||
return gtserror.Newf("db error updating status %s: %w", current.ID, err)
|
||||
}
|
||||
return nil
|
||||
|
||||
// Set the fetched status
|
||||
status = localStatus
|
||||
case code != 0:
|
||||
// We had a code, but not one indicating deletion,
|
||||
// log the code but don't return error or update the
|
||||
// status; we can try again later.
|
||||
l.Warnf("cannot dereference parent (%q)", err)
|
||||
return nil
|
||||
|
||||
} else {
|
||||
l.Tracef("following remote status ancestors: %s", status.InReplyToURI)
|
||||
case gtserror.Unretrievable(err):
|
||||
// Not retrievable for some other reason, so just
|
||||
// bail; we can try again later if necessary.
|
||||
l.Debugf("parent unretrievable (%q)", err)
|
||||
return nil
|
||||
|
||||
// Fetch the remote status found at this IRI
|
||||
remoteStatus, _, err := d.getStatusByURI(
|
||||
ctx,
|
||||
username,
|
||||
replyIRI,
|
||||
)
|
||||
if err != nil {
|
||||
return gtserror.Newf("error fetching remote status %q: %w", status.InReplyToURI, err)
|
||||
}
|
||||
|
||||
// Set the fetched status
|
||||
status = remoteStatus
|
||||
default:
|
||||
// Some other error that stops us in our tracks.
|
||||
return gtserror.Newf("error dereferencing parent %s: %w", current.InReplyToURI, err)
|
||||
}
|
||||
}
|
||||
|
||||
return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, ogIRI)
|
||||
return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, status.URI)
|
||||
}
|
||||
|
||||
func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
|
||||
func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
|
||||
// Take ref to original
|
||||
ogIRI := statusIRI
|
||||
|
||||
@@ -256,9 +346,17 @@ stackLoop:
|
||||
}
|
||||
|
||||
// Dereference the remote status and store in the database.
|
||||
// getStatusByURI guards against the following conditions:
|
||||
//
|
||||
// - remote domain is blocked (will return unretrievable)
|
||||
// - domain is local (will try to return something, or
|
||||
// return unretrievable).
|
||||
_, statusable, err := d.getStatusByURI(ctx, username, itemIRI)
|
||||
if err != nil {
|
||||
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
|
||||
if !gtserror.Unretrievable(err) {
|
||||
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
|
||||
}
|
||||
|
||||
continue itemLoop
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user