GoToSocial/internal/processing/search.go

303 lines
10 KiB
Go
Raw Normal View History

// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package processing
import (
"context"
"errors"
2021-05-29 20:35:03 +02:00
"fmt"
"net/url"
"strings"
[chore] use our own logging implementation (#716) * first commit Signed-off-by: kim <grufwub@gmail.com> * replace logging with our own log library Signed-off-by: kim <grufwub@gmail.com> * fix imports Signed-off-by: kim <grufwub@gmail.com> * fix log imports Signed-off-by: kim <grufwub@gmail.com> * add license text Signed-off-by: kim <grufwub@gmail.com> * fix package import cycle between config and log package Signed-off-by: kim <grufwub@gmail.com> * fix empty kv.Fields{} being passed to WithFields() Signed-off-by: kim <grufwub@gmail.com> * fix uses of log.WithFields() with whitespace issues and empty slices Signed-off-by: kim <grufwub@gmail.com> * *linter related grumbling* Signed-off-by: kim <grufwub@gmail.com> * gofmt the codebase! also fix more log.WithFields() formatting issues Signed-off-by: kim <grufwub@gmail.com> * update testrig code to match new changes Signed-off-by: kim <grufwub@gmail.com> * fix error wrapping in non fmt.Errorf function Signed-off-by: kim <grufwub@gmail.com> * add benchmarking of log.Caller() vs non-cached Signed-off-by: kim <grufwub@gmail.com> * fix syslog tests, add standard build tags to test runner to ensure consistency Signed-off-by: kim <grufwub@gmail.com> * make syslog tests more robust Signed-off-by: kim <grufwub@gmail.com> * fix caller depth arithmatic (is that how you spell it?) Signed-off-by: kim <grufwub@gmail.com> * update to use unkeyed fields in kv.Field{} instances Signed-off-by: kim <grufwub@gmail.com> * update go-kv library Signed-off-by: kim <grufwub@gmail.com> * update libraries list Signed-off-by: kim <grufwub@gmail.com> * fuck you linter get nerfed Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tobi <31960611+tsmethurst@users.noreply.github.com>
2022-07-19 10:47:55 +02:00
"codeberg.org/gruf/go-kv"
"github.com/superseriousbusiness/gotosocial/internal/ap"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
[chore] use our own logging implementation (#716) * first commit Signed-off-by: kim <grufwub@gmail.com> * replace logging with our own log library Signed-off-by: kim <grufwub@gmail.com> * fix imports Signed-off-by: kim <grufwub@gmail.com> * fix log imports Signed-off-by: kim <grufwub@gmail.com> * add license text Signed-off-by: kim <grufwub@gmail.com> * fix package import cycle between config and log package Signed-off-by: kim <grufwub@gmail.com> * fix empty kv.Fields{} being passed to WithFields() Signed-off-by: kim <grufwub@gmail.com> * fix uses of log.WithFields() with whitespace issues and empty slices Signed-off-by: kim <grufwub@gmail.com> * *linter related grumbling* Signed-off-by: kim <grufwub@gmail.com> * gofmt the codebase! also fix more log.WithFields() formatting issues Signed-off-by: kim <grufwub@gmail.com> * update testrig code to match new changes Signed-off-by: kim <grufwub@gmail.com> * fix error wrapping in non fmt.Errorf function Signed-off-by: kim <grufwub@gmail.com> * add benchmarking of log.Caller() vs non-cached Signed-off-by: kim <grufwub@gmail.com> * fix syslog tests, add standard build tags to test runner to ensure consistency Signed-off-by: kim <grufwub@gmail.com> * make syslog tests more robust Signed-off-by: kim <grufwub@gmail.com> * fix caller depth arithmatic (is that how you spell it?) Signed-off-by: kim <grufwub@gmail.com> * update to use unkeyed fields in kv.Field{} instances Signed-off-by: kim <grufwub@gmail.com> * update go-kv library Signed-off-by: kim <grufwub@gmail.com> * update libraries list Signed-off-by: kim <grufwub@gmail.com> * fuck you linter get nerfed Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tobi <31960611+tsmethurst@users.noreply.github.com>
2022-07-19 10:47:55 +02:00
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/oauth"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
// Implementation note: in this function, we tend to log errors
// at debug level rather than return them. This is because the
// search has a sort of fallthrough logic: if we can't get a result
// with x search, we should try with y search rather than returning.
//
// If we get to the end and still haven't found anything, even then
// we shouldn't return an error, just return an empty search result.
//
// The only exception to this is when we get a malformed query, in
// which case we return a bad request error so the user knows they
// did something funky.
func (p *Processor) SearchGet(ctx context.Context, authed *oauth.Auth, search *apimodel.SearchQuery) (*apimodel.SearchResult, gtserror.WithCode) {
// tidy up the query and make sure it wasn't just spaces
query := strings.TrimSpace(search.Query)
if query == "" {
err := errors.New("search query was empty string after trimming space")
return nil, gtserror.NewErrorBadRequest(err, err.Error())
}
l := log.WithContext(ctx).
WithFields(kv.Fields{{"query", query}}...)
searchResult := &apimodel.SearchResult{
Accounts: []apimodel.Account{},
Statuses: []apimodel.Status{},
Hashtags: []apimodel.Tag{},
}
// currently the search will only ever return one result,
// so return nothing if the offset is greater than 0
if search.Offset > 0 {
return searchResult, nil
}
foundAccounts := []*gtsmodel.Account{}
foundStatuses := []*gtsmodel.Status{}
2021-05-29 20:35:03 +02:00
var foundOne bool
/*
SEARCH BY MENTION
check if the query is something like @whatever_username@example.org -- this means it's likely a remote account
*/
maybeNamestring := query
if maybeNamestring[0] != '@' {
maybeNamestring = "@" + maybeNamestring
}
if username, domain, err := util.ExtractNamestringParts(maybeNamestring); err == nil {
l.Trace("search term is a mention, looking it up...")
blocked, err := p.state.DB.IsDomainBlocked(ctx, domain)
if err != nil {
return nil, gtserror.NewErrorInternalError(fmt.Errorf("error checking domain block: %w", err))
}
if blocked {
l.Debug("domain is blocked")
return searchResult, nil
}
foundAccount, err := p.searchAccountByUsernameDomain(ctx, authed, username, domain, search.Resolve)
if err != nil {
var errNotRetrievable *dereferencing.ErrNotRetrievable
if !errors.As(err, &errNotRetrievable) {
// return a proper error only if it wasn't just not retrievable
return nil, gtserror.NewErrorInternalError(fmt.Errorf("error looking up account: %w", err))
}
return searchResult, nil
2021-05-29 20:35:03 +02:00
}
foundAccounts = append(foundAccounts, foundAccount)
foundOne = true
l.Trace("got an account by searching by mention")
2021-05-29 20:35:03 +02:00
}
/*
SEARCH BY URI
check if the query is a URI with a recognizable scheme and dereference it
*/
if !foundOne {
if uri, err := url.Parse(query); err == nil {
if uri.Scheme == "https" || uri.Scheme == "http" {
l.Trace("search term is a uri, looking it up...")
blocked, err := p.state.DB.IsURIBlocked(ctx, uri)
if err != nil {
return nil, gtserror.NewErrorInternalError(fmt.Errorf("error checking domain block: %w", err))
}
if blocked {
l.Debug("domain is blocked")
return searchResult, nil
}
// check if it's a status...
foundStatus, err := p.searchStatusByURI(ctx, authed, uri)
if err != nil {
// Check for semi-expected error types.
var (
errNotRetrievable *dereferencing.ErrNotRetrievable
errWrongType *ap.ErrWrongType
)
if !errors.As(err, &errNotRetrievable) && !errors.As(err, &errWrongType) {
return nil, gtserror.NewErrorInternalError(fmt.Errorf("error looking up status: %w", err))
}
} else {
foundStatuses = append(foundStatuses, foundStatus)
foundOne = true
l.Trace("got a status by searching by URI")
}
// ... or an account
if !foundOne {
foundAccount, err := p.searchAccountByURI(ctx, authed, uri, search.Resolve)
if err != nil {
// Check for semi-expected error types.
var (
errNotRetrievable *dereferencing.ErrNotRetrievable
errWrongType *ap.ErrWrongType
)
if !errors.As(err, &errNotRetrievable) && !errors.As(err, &errWrongType) {
return nil, gtserror.NewErrorInternalError(fmt.Errorf("error looking up account: %w", err))
}
} else {
foundAccounts = append(foundAccounts, foundAccount)
foundOne = true
l.Trace("got an account by searching by URI")
}
}
}
}
}
if !foundOne {
// we got nothing, we can return early
l.Trace("found nothing, returning")
return searchResult, nil
}
/*
FROM HERE ON we have our search results, it's just a matter of filtering them according to what this user is allowed to see,
and then converting them into our frontend format.
*/
for _, foundAccount := range foundAccounts {
// make sure there's no block in either direction between the account and the requester
[performance] refactoring + add fave / follow / request / visibility caching (#1607) * refactor visibility checking, add caching for visibility * invalidate visibility cache items on account / status deletes * fix requester ID passed to visibility cache nil ptr * de-interface caches, fix home / public timeline caching + visibility * finish adding code comments for visibility filter * fix angry goconst linter warnings * actually finish adding filter visibility code comments for timeline functions * move home timeline status author check to after visibility * remove now-unused code * add more code comments * add TODO code comment, update printed cache start names * update printed cache names on stop * start adding separate follow(request) delete db functions, add specific visibility cache tests * add relationship type caching * fix getting local account follows / followed-bys, other small codebase improvements * simplify invalidation using cache hooks, add more GetAccountBy___() functions * fix boosting to return 404 if not boostable but no error (to not leak status ID) * remove dead code * improved placement of cache invalidation * update license headers * add example follow, follow-request config entries * add example visibility cache configuration to config file * use specific PutFollowRequest() instead of just Put() * add tests for all GetAccountBy() * add GetBlockBy() tests * update block to check primitive fields * update and finish adding Get{Account,Block,Follow,FollowRequest}By() tests * fix copy-pasted code * update envparsing test * whitespace * fix bun struct tag * add license header to gtscontext * fix old license header * improved error creation to not use fmt.Errorf() when not needed * fix various rebase conflicts, fix account test * remove commented-out code, fix-up mention caching * fix mention select bun statement * ensure mention target account populated, pass in context to customrenderer logging * remove more uncommented code, fix typeutil test * add statusfave database model caching * add status fave cache configuration * add status fave cache example config * woops, catch missed error. nice catch linter! * add back testrig panic on nil db * update example configuration to match defaults, slight tweak to cache configuration defaults * update envparsing test with new defaults * fetch followingget to use the follow target account * use accounnt.IsLocal() instead of empty domain check * use constants for the cache visibility type check * use bun.In() for notification type restriction in db query * include replies when fetching PublicTimeline() (to account for single-author threads in Visibility{}.StatusPublicTimelineable()) * use bun query building for nested select statements to ensure working with postgres * update public timeline future status checks to match visibility filter * same as previous, for home timeline * update public timeline tests to dynamically check for appropriate statuses * migrate accounts to allow unique constraint on public_key * provide minimal account with publicKey --------- Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
2023-03-28 15:03:14 +02:00
blocked, err := p.state.DB.IsEitherBlocked(ctx, authed.Account.ID, foundAccount.ID)
if err != nil {
err = fmt.Errorf("SearchGet: error checking block between %s and %s: %s", authed.Account.ID, foundAccount.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
if blocked {
l.Tracef("block exists between %s and %s, skipping this result", authed.Account.ID, foundAccount.ID)
continue
}
apiAcct, err := p.tc.AccountToAPIAccountPublic(ctx, foundAccount)
if err != nil {
err = fmt.Errorf("SearchGet: error converting account %s to api account: %s", foundAccount.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
searchResult.Accounts = append(searchResult.Accounts, *apiAcct)
}
for _, foundStatus := range foundStatuses {
// make sure each found status is visible to the requester
[performance] refactoring + add fave / follow / request / visibility caching (#1607) * refactor visibility checking, add caching for visibility * invalidate visibility cache items on account / status deletes * fix requester ID passed to visibility cache nil ptr * de-interface caches, fix home / public timeline caching + visibility * finish adding code comments for visibility filter * fix angry goconst linter warnings * actually finish adding filter visibility code comments for timeline functions * move home timeline status author check to after visibility * remove now-unused code * add more code comments * add TODO code comment, update printed cache start names * update printed cache names on stop * start adding separate follow(request) delete db functions, add specific visibility cache tests * add relationship type caching * fix getting local account follows / followed-bys, other small codebase improvements * simplify invalidation using cache hooks, add more GetAccountBy___() functions * fix boosting to return 404 if not boostable but no error (to not leak status ID) * remove dead code * improved placement of cache invalidation * update license headers * add example follow, follow-request config entries * add example visibility cache configuration to config file * use specific PutFollowRequest() instead of just Put() * add tests for all GetAccountBy() * add GetBlockBy() tests * update block to check primitive fields * update and finish adding Get{Account,Block,Follow,FollowRequest}By() tests * fix copy-pasted code * update envparsing test * whitespace * fix bun struct tag * add license header to gtscontext * fix old license header * improved error creation to not use fmt.Errorf() when not needed * fix various rebase conflicts, fix account test * remove commented-out code, fix-up mention caching * fix mention select bun statement * ensure mention target account populated, pass in context to customrenderer logging * remove more uncommented code, fix typeutil test * add statusfave database model caching * add status fave cache configuration * add status fave cache example config * woops, catch missed error. nice catch linter! * add back testrig panic on nil db * update example configuration to match defaults, slight tweak to cache configuration defaults * update envparsing test with new defaults * fetch followingget to use the follow target account * use accounnt.IsLocal() instead of empty domain check * use constants for the cache visibility type check * use bun.In() for notification type restriction in db query * include replies when fetching PublicTimeline() (to account for single-author threads in Visibility{}.StatusPublicTimelineable()) * use bun query building for nested select statements to ensure working with postgres * update public timeline future status checks to match visibility filter * same as previous, for home timeline * update public timeline tests to dynamically check for appropriate statuses * migrate accounts to allow unique constraint on public_key * provide minimal account with publicKey --------- Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
2023-03-28 15:03:14 +02:00
visible, err := p.filter.StatusVisible(ctx, authed.Account, foundStatus)
if err != nil {
err = fmt.Errorf("SearchGet: error checking visibility of status %s for account %s: %s", foundStatus.ID, authed.Account.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
if !visible {
l.Tracef("status %s is not visible to account %s, skipping this result", foundStatus.ID, authed.Account.ID)
continue
}
apiStatus, err := p.tc.StatusToAPIStatus(ctx, foundStatus, authed.Account)
if err != nil {
err = fmt.Errorf("SearchGet: error converting status %s to api status: %s", foundStatus.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
searchResult.Statuses = append(searchResult.Statuses, *apiStatus)
}
return searchResult, nil
}
func (p *Processor) searchStatusByURI(ctx context.Context, authed *oauth.Auth, uri *url.URL) (*gtsmodel.Status, error) {
status, statusable, err := p.federator.GetStatus(gtscontext.SetFastFail(ctx), authed.Account.Username, uri, true, true)
if err != nil {
return nil, err
}
if !*status.Local && statusable != nil {
// Attempt to dereference the status thread while we are here
p.federator.DereferenceThread(gtscontext.SetFastFail(ctx), authed.Account.Username, uri, status, statusable)
}
return status, nil
}
func (p *Processor) searchAccountByURI(ctx context.Context, authed *oauth.Auth, uri *url.URL, resolve bool) (*gtsmodel.Account, error) {
if !resolve {
var (
account *gtsmodel.Account
err error
uriStr = uri.String()
)
// Search the database for existing account with ID URI.
account, err = p.state.DB.GetAccountByURI(ctx, uriStr)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, fmt.Errorf("searchAccountByURI: error checking database for account %s: %w", uriStr, err)
}
if account == nil {
// Else, search the database for existing by ID URL.
account, err = p.state.DB.GetAccountByURL(ctx, uriStr)
if err != nil {
if !errors.Is(err, db.ErrNoEntries) {
return nil, fmt.Errorf("searchAccountByURI: error checking database for account %s: %w", uriStr, err)
}
return nil, dereferencing.NewErrNotRetrievable(err)
}
}
return account, nil
}
return p.federator.GetAccountByURI(
gtscontext.SetFastFail(ctx),
authed.Account.Username,
uri, false,
)
}
func (p *Processor) searchAccountByUsernameDomain(ctx context.Context, authed *oauth.Auth, username string, domain string, resolve bool) (*gtsmodel.Account, error) {
if !resolve {
if domain == config.GetHost() || domain == config.GetAccountDomain() {
// We do local lookups using an empty domain,
// else it will fail the db search below.
domain = ""
}
// Search the database for existing account with USERNAME@DOMAIN
account, err := p.state.DB.GetAccountByUsernameDomain(ctx, username, domain)
if err != nil {
if !errors.Is(err, db.ErrNoEntries) {
return nil, fmt.Errorf("searchAccountByUsernameDomain: error checking database for account %s@%s: %w", username, domain, err)
}
return nil, dereferencing.NewErrNotRetrievable(err)
}
return account, nil
}
return p.federator.GetAccountByUsernameDomain(
gtscontext.SetFastFail(ctx),
authed.Account.Username,
username, domain, false,
)
}