2023-03-12 16:00:57 +01:00
|
|
|
// GoToSocial
|
|
|
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
|
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2021-08-10 13:32:39 +02:00
|
|
|
|
|
|
|
package dereferencing
|
|
|
|
|
|
|
|
import (
|
2021-08-25 15:34:33 +02:00
|
|
|
"context"
|
2023-06-24 09:32:10 +02:00
|
|
|
"errors"
|
|
|
|
"net/http"
|
2021-08-10 13:32:39 +02:00
|
|
|
"net/url"
|
|
|
|
|
2022-07-19 10:47:55 +02:00
|
|
|
"codeberg.org/gruf/go-kv"
|
2023-06-03 11:35:15 +02:00
|
|
|
"github.com/superseriousbusiness/activity/pub"
|
2022-09-25 13:09:41 +02:00
|
|
|
"github.com/superseriousbusiness/activity/streams/vocab"
|
2021-08-10 13:32:39 +02:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/ap"
|
2021-12-07 13:31:39 +01:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/config"
|
2023-06-24 09:32:10 +02:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/db"
|
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
|
2023-05-28 14:08:35 +02:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
2022-09-25 13:09:41 +02:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
2022-07-19 10:47:55 +02:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/log"
|
2021-08-10 13:32:39 +02:00
|
|
|
)
|
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
// maxIter defines how many iterations of descendants or
|
|
|
|
// ancesters we are willing to follow before returning error.
|
|
|
|
const maxIter = 1000
|
|
|
|
|
2023-05-12 11:15:54 +02:00
|
|
|
func (d *deref) dereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {
|
2022-09-25 13:09:41 +02:00
|
|
|
// Ensure that ancestors have been fully dereferenced
|
2023-06-24 09:32:10 +02:00
|
|
|
if err := d.DereferenceStatusAncestors(ctx, username, status); err != nil {
|
|
|
|
log.Error(ctx, err)
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
// Ensure that descendants have been fully dereferenced
|
2023-06-24 09:32:10 +02:00
|
|
|
if err := d.DereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil {
|
|
|
|
log.Error(ctx, err)
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-24 09:32:10 +02:00
|
|
|
func (d *deref) DereferenceStatusAncestors(
|
|
|
|
ctx context.Context,
|
|
|
|
username string,
|
|
|
|
status *gtsmodel.Status,
|
|
|
|
) error {
|
|
|
|
// Mark given status as the one
|
|
|
|
// we're currently working on.
|
|
|
|
var current = status
|
2021-08-10 13:32:39 +02:00
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
for i := 0; i < maxIter; i++ {
|
2023-06-24 09:32:10 +02:00
|
|
|
if current.InReplyToURI == "" {
|
|
|
|
// Status has no parent, we've
|
|
|
|
// reached the top of the chain.
|
2021-08-10 13:32:39 +02:00
|
|
|
return nil
|
|
|
|
}
|
2022-06-11 16:25:41 +02:00
|
|
|
|
2023-06-24 09:32:10 +02:00
|
|
|
l := log.
|
|
|
|
WithContext(ctx).
|
|
|
|
WithFields(kv.Fields{
|
|
|
|
{"username", username},
|
|
|
|
{"originalStatusIRI", status.URI},
|
|
|
|
{"currentStatusURI", current.URI},
|
|
|
|
{"currentInReplyToURI", current.InReplyToURI},
|
|
|
|
}...)
|
|
|
|
|
|
|
|
if current.InReplyToID != "" {
|
|
|
|
// We already have an InReplyToID set. This means
|
|
|
|
// the status's parent has, at some point, been
|
|
|
|
// inserted into the database, either because it
|
|
|
|
// is a status from our instance, or a status from
|
|
|
|
// remote that we've dereferenced before, or found
|
|
|
|
// out about in some other way.
|
|
|
|
//
|
|
|
|
// Working on this assumption, check if the parent
|
|
|
|
// status exists, either as a copy pinned on the
|
|
|
|
// current status, or in the database.
|
|
|
|
|
|
|
|
if current.InReplyTo != nil {
|
|
|
|
// We have the parent already, and the child
|
|
|
|
// doesn't need to be updated; keep iterating
|
|
|
|
// from this parent upwards.
|
|
|
|
current = current.InReplyTo
|
|
|
|
continue
|
|
|
|
}
|
2021-08-10 13:32:39 +02:00
|
|
|
|
2023-06-24 09:32:10 +02:00
|
|
|
// Parent isn't pinned to this status (yet), see
|
|
|
|
// if we can get it from the db (we should be
|
|
|
|
// able to, since it has an ID already).
|
|
|
|
parent, err := d.state.DB.GetStatusByID(
|
|
|
|
gtscontext.SetBarebones(ctx),
|
|
|
|
current.InReplyToID,
|
|
|
|
)
|
|
|
|
if err != nil && !errors.Is(err, db.ErrNoEntries) {
|
|
|
|
// Real db error, stop.
|
|
|
|
return gtserror.Newf("db error getting status %s: %w", current.InReplyToID, err)
|
2022-09-25 13:09:41 +02:00
|
|
|
}
|
|
|
|
|
2023-06-24 09:32:10 +02:00
|
|
|
if parent != nil {
|
|
|
|
// We got the parent from the db, and the child
|
|
|
|
// doesn't need to be updated; keep iterating
|
|
|
|
// from this parent upwards.
|
|
|
|
current.InReplyTo = parent
|
|
|
|
current = parent
|
|
|
|
continue
|
2022-09-25 13:09:41 +02:00
|
|
|
}
|
|
|
|
|
2023-06-24 09:32:10 +02:00
|
|
|
// If we arrive here, we know this child *did* have
|
|
|
|
// a parent at some point, but it no longer exists in
|
|
|
|
// the database, presumably because it's been deleted
|
|
|
|
// by another action.
|
|
|
|
//
|
|
|
|
// TODO: clean this up in a nightly task.
|
|
|
|
l.Warnf("current status has been orphaned (parent %s no longer exists in database)", current.InReplyToID)
|
|
|
|
return nil // Cannot iterate further.
|
|
|
|
}
|
2022-09-25 13:09:41 +02:00
|
|
|
|
2023-06-24 09:32:10 +02:00
|
|
|
// If we reach this point, we know the status has
|
|
|
|
// an InReplyToURI set, but it doesn't yet have an
|
|
|
|
// InReplyToID, which means that the parent status
|
|
|
|
// has not yet been dereferenced.
|
|
|
|
inReplyToURI, err := url.Parse(current.InReplyToURI)
|
|
|
|
if err != nil || inReplyToURI == nil {
|
|
|
|
// Parent URI is not something we can handle.
|
|
|
|
l.Debug("current status has been orphaned (invalid InReplyToURI)")
|
|
|
|
return nil //nolint:nilerr
|
|
|
|
}
|
2022-09-25 13:09:41 +02:00
|
|
|
|
2023-06-24 09:32:10 +02:00
|
|
|
// Parent URI is valid, try to get it.
|
|
|
|
// getStatusByURI guards against the following conditions:
|
|
|
|
//
|
|
|
|
// - remote domain is blocked (will return unretrievable)
|
|
|
|
// - domain is local (will try to return something, or
|
|
|
|
// return unretrievable).
|
|
|
|
parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI)
|
|
|
|
if err == nil {
|
|
|
|
// We successfully fetched the parent.
|
|
|
|
// Update current status with new info.
|
|
|
|
current.InReplyToID = parent.ID
|
|
|
|
current.InReplyToAccountID = parent.AccountID
|
|
|
|
if err := d.state.DB.UpdateStatus(
|
|
|
|
ctx, current,
|
|
|
|
"in_reply_to_id",
|
|
|
|
"in_reply_to_account_id",
|
|
|
|
); err != nil {
|
|
|
|
return gtserror.Newf("db error updating status %s: %w", current.ID, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mark parent as next status to
|
|
|
|
// work on, and keep iterating.
|
|
|
|
current = parent
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// We could not fetch the parent, check if we can do anything
|
|
|
|
// useful with the error. For example, HTTP status code returned
|
|
|
|
// from remote may indicate that the parent has been deleted.
|
|
|
|
switch code := gtserror.StatusCode(err); {
|
2023-06-24 13:59:28 +02:00
|
|
|
case code == http.StatusGone:
|
2023-06-24 09:32:10 +02:00
|
|
|
// 410 means the status has definitely been deleted.
|
|
|
|
// Update this status to reflect that, then bail.
|
2023-06-24 13:59:28 +02:00
|
|
|
l.Debug("current status has been orphaned (call to parent returned code 410 Gone)")
|
2023-06-24 09:32:10 +02:00
|
|
|
|
|
|
|
current.InReplyToURI = ""
|
|
|
|
if err := d.state.DB.UpdateStatus(
|
|
|
|
ctx, current,
|
|
|
|
"in_reply_to_uri",
|
|
|
|
); err != nil {
|
|
|
|
return gtserror.Newf("db error updating status %s: %w", current.ID, err)
|
2022-09-25 13:09:41 +02:00
|
|
|
}
|
2023-06-24 09:32:10 +02:00
|
|
|
return nil
|
|
|
|
|
|
|
|
case code != 0:
|
|
|
|
// We had a code, but not one indicating deletion,
|
|
|
|
// log the code but don't return error or update the
|
|
|
|
// status; we can try again later.
|
|
|
|
l.Warnf("cannot dereference parent (%q)", err)
|
|
|
|
return nil
|
2021-08-10 13:32:39 +02:00
|
|
|
|
2023-06-24 09:32:10 +02:00
|
|
|
case gtserror.Unretrievable(err):
|
|
|
|
// Not retrievable for some other reason, so just
|
|
|
|
// bail; we can try again later if necessary.
|
|
|
|
l.Debugf("parent unretrievable (%q)", err)
|
|
|
|
return nil
|
|
|
|
|
|
|
|
default:
|
|
|
|
// Some other error that stops us in our tracks.
|
|
|
|
return gtserror.Newf("error dereferencing parent %s: %w", current.InReplyToURI, err)
|
2022-09-25 13:09:41 +02:00
|
|
|
}
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
|
|
|
|
2023-06-24 09:32:10 +02:00
|
|
|
return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, status.URI)
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
|
|
|
|
2023-06-24 09:32:10 +02:00
|
|
|
func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
|
2022-09-25 13:09:41 +02:00
|
|
|
// Take ref to original
|
|
|
|
ogIRI := statusIRI
|
2022-07-19 10:47:55 +02:00
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
// Start log entry with fields
|
2023-02-17 12:02:29 +01:00
|
|
|
l := log.WithContext(ctx).
|
|
|
|
WithFields(kv.Fields{
|
|
|
|
{"username", username},
|
|
|
|
{"statusIRI", ogIRI},
|
|
|
|
}...)
|
2021-08-10 13:32:39 +02:00
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
// Log function start
|
|
|
|
l.Trace("beginning")
|
|
|
|
|
|
|
|
// frame represents a single stack frame when iteratively
|
|
|
|
// dereferencing status descendants. where statusIRI and
|
|
|
|
// statusable are of the status whose children we are to
|
|
|
|
// descend, page is the current activity streams collection
|
|
|
|
// page of entities we are on (as we often push a frame to
|
|
|
|
// stack mid-paging), and item___ are entity iterators for
|
|
|
|
// this activity streams collection page.
|
|
|
|
type frame struct {
|
|
|
|
statusIRI *url.URL
|
|
|
|
statusable ap.Statusable
|
|
|
|
page ap.CollectionPageable
|
|
|
|
itemIter vocab.ActivityStreamsItemsPropertyIterator
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
var (
|
|
|
|
// current is the current stack frame
|
|
|
|
current *frame
|
|
|
|
|
|
|
|
// stack is a list of "shelved" descendand iterator
|
|
|
|
// frames. this is pushed to when a child status frame
|
|
|
|
// is found that we need to further iterate down, and
|
|
|
|
// popped from into 'current' when that child's tree
|
|
|
|
// of further descendants is exhausted.
|
|
|
|
stack = []*frame{
|
|
|
|
{
|
|
|
|
// Starting input is first frame
|
|
|
|
statusIRI: statusIRI,
|
|
|
|
statusable: parent,
|
|
|
|
},
|
|
|
|
}
|
2021-08-10 13:32:39 +02:00
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
// popStack will remove and return the top frame
|
|
|
|
// from the stack, or nil if currently empty.
|
|
|
|
popStack = func() *frame {
|
|
|
|
if len(stack) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
2021-08-10 13:32:39 +02:00
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
// Get frame index
|
|
|
|
idx := len(stack) - 1
|
2021-08-10 13:32:39 +02:00
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
// Pop last frame
|
|
|
|
frame := stack[idx]
|
|
|
|
stack = stack[:idx]
|
2021-08-10 13:32:39 +02:00
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
return frame
|
|
|
|
}
|
|
|
|
)
|
2021-08-10 13:32:39 +02:00
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
stackLoop:
|
|
|
|
for i := 0; i < maxIter; i++ {
|
|
|
|
// Pop next frame, nil means we are at end
|
|
|
|
if current = popStack(); current == nil {
|
|
|
|
return nil
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
if current.page == nil {
|
|
|
|
if current.statusIRI.Host == config.GetHost() {
|
2023-06-03 11:35:15 +02:00
|
|
|
// This is a local status, no looping to do
|
2022-09-25 13:09:41 +02:00
|
|
|
continue stackLoop
|
|
|
|
}
|
|
|
|
|
|
|
|
l.Tracef("following remote status descendants: %s", current.statusIRI)
|
|
|
|
|
|
|
|
// Look for an attached status replies (as collection)
|
|
|
|
replies := current.statusable.GetActivityStreamsReplies()
|
2022-09-26 10:14:36 +02:00
|
|
|
if replies == nil {
|
2022-09-25 13:09:41 +02:00
|
|
|
continue stackLoop
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the status replies collection
|
|
|
|
collection := replies.GetActivityStreamsCollection()
|
2022-09-26 10:14:36 +02:00
|
|
|
if collection == nil {
|
|
|
|
continue stackLoop
|
|
|
|
}
|
2022-09-25 13:09:41 +02:00
|
|
|
|
|
|
|
// Get the "first" property of the replies collection
|
|
|
|
first := collection.GetActivityStreamsFirst()
|
2022-09-26 10:14:36 +02:00
|
|
|
if first == nil {
|
2022-09-25 13:09:41 +02:00
|
|
|
continue stackLoop
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the first activity stream collection page
|
|
|
|
current.page = first.GetActivityStreamsCollectionPage()
|
2022-09-26 10:14:36 +02:00
|
|
|
if current.page == nil {
|
|
|
|
continue stackLoop
|
|
|
|
}
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
|
|
|
|
2022-09-26 10:14:36 +02:00
|
|
|
pageLoop:
|
|
|
|
for {
|
2022-09-25 13:09:41 +02:00
|
|
|
if current.itemIter == nil {
|
2022-09-26 10:14:36 +02:00
|
|
|
// Get the items associated with this page
|
2022-09-25 13:09:41 +02:00
|
|
|
items := current.page.GetActivityStreamsItems()
|
2022-09-26 10:14:36 +02:00
|
|
|
if items == nil {
|
|
|
|
continue stackLoop
|
|
|
|
}
|
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
// Start off the item iterator
|
|
|
|
current.itemIter = items.Begin()
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
itemLoop:
|
2022-09-26 10:50:14 +02:00
|
|
|
for {
|
2023-06-03 11:35:15 +02:00
|
|
|
// Check for remaining iter
|
2022-09-26 10:50:14 +02:00
|
|
|
if current.itemIter == nil {
|
|
|
|
break itemLoop
|
|
|
|
}
|
2021-08-10 13:32:39 +02:00
|
|
|
|
2023-06-03 11:35:15 +02:00
|
|
|
// Get current item iterator
|
|
|
|
itemIter := current.itemIter
|
|
|
|
|
|
|
|
// Set the next available iterator
|
|
|
|
current.itemIter = itemIter.Next()
|
2022-09-25 13:09:41 +02:00
|
|
|
|
2023-06-03 11:35:15 +02:00
|
|
|
// Check for available IRI on item
|
|
|
|
itemIRI, _ := pub.ToId(itemIter)
|
2022-09-25 13:09:41 +02:00
|
|
|
if itemIRI == nil {
|
|
|
|
continue itemLoop
|
|
|
|
}
|
|
|
|
|
|
|
|
if itemIRI.Host == config.GetHost() {
|
|
|
|
// This child is one of ours,
|
|
|
|
continue itemLoop
|
|
|
|
}
|
|
|
|
|
2023-05-12 11:15:54 +02:00
|
|
|
// Dereference the remote status and store in the database.
|
2023-06-24 09:32:10 +02:00
|
|
|
// getStatusByURI guards against the following conditions:
|
|
|
|
//
|
|
|
|
// - remote domain is blocked (will return unretrievable)
|
|
|
|
// - domain is local (will try to return something, or
|
|
|
|
// return unretrievable).
|
2023-05-12 11:15:54 +02:00
|
|
|
_, statusable, err := d.getStatusByURI(ctx, username, itemIRI)
|
2022-09-25 13:09:41 +02:00
|
|
|
if err != nil {
|
2023-06-24 09:32:10 +02:00
|
|
|
if !gtserror.Unretrievable(err) {
|
|
|
|
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
|
|
|
|
}
|
|
|
|
|
2023-05-12 11:15:54 +02:00
|
|
|
continue itemLoop
|
|
|
|
}
|
|
|
|
|
|
|
|
if statusable == nil {
|
|
|
|
// Already up-to-date.
|
2022-09-25 13:09:41 +02:00
|
|
|
continue itemLoop
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
2022-09-25 13:09:41 +02:00
|
|
|
|
|
|
|
// Put current and next frame at top of stack
|
|
|
|
stack = append(stack, current, &frame{
|
|
|
|
statusIRI: itemIRI,
|
|
|
|
statusable: statusable,
|
|
|
|
})
|
2022-09-26 09:39:59 +02:00
|
|
|
|
|
|
|
// Now start at top of loop
|
|
|
|
continue stackLoop
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
|
|
|
|
2022-09-25 13:09:41 +02:00
|
|
|
// Get the current page's "next" property
|
|
|
|
pageNext := current.page.GetActivityStreamsNext()
|
2023-06-22 21:46:36 +02:00
|
|
|
if pageNext == nil || !pageNext.IsIRI() {
|
2022-09-25 13:09:41 +02:00
|
|
|
continue stackLoop
|
|
|
|
}
|
|
|
|
|
2023-06-22 21:46:36 +02:00
|
|
|
// Get the IRI of the "next" property.
|
2022-09-25 13:09:41 +02:00
|
|
|
pageNextIRI := pageNext.GetIRI()
|
2023-06-22 21:46:36 +02:00
|
|
|
|
|
|
|
// Ensure this isn't a self-referencing page...
|
|
|
|
// We don't need to store / check against a map of IRIs
|
|
|
|
// as our getStatusByIRI() function above prevents iter'ing
|
|
|
|
// over statuses that have been dereferenced recently, due to
|
|
|
|
// the `fetched_at` field preventing frequent refetches.
|
|
|
|
if id := current.page.GetJSONLDId(); id != nil &&
|
|
|
|
pageNextIRI.String() == id.Get().String() {
|
|
|
|
log.Warnf(ctx, "self referencing collection page: %s", pageNextIRI)
|
2022-09-26 10:14:36 +02:00
|
|
|
continue stackLoop
|
|
|
|
}
|
2022-09-25 13:09:41 +02:00
|
|
|
|
|
|
|
// Dereference this next collection page by its IRI
|
2023-05-12 11:15:54 +02:00
|
|
|
collectionPage, err := d.dereferenceCollectionPage(ctx,
|
|
|
|
username,
|
|
|
|
pageNextIRI,
|
|
|
|
)
|
2022-09-25 13:09:41 +02:00
|
|
|
if err != nil {
|
|
|
|
l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err)
|
|
|
|
continue stackLoop
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the updated collection page
|
|
|
|
current.page = collectionPage
|
2022-09-26 10:14:36 +02:00
|
|
|
continue pageLoop
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-28 14:08:35 +02:00
|
|
|
return gtserror.Newf("reached %d descendant iterations for %q", maxIter, ogIRI.String())
|
2021-08-10 13:32:39 +02:00
|
|
|
}
|