mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[feature] Federate status language in and out (#2366)
* [feature] Federate status language in + out * go fmt * tests, little fix * improve comments * unnest a bit * avoid unnecessary nil check * use more descriptive variable for contentMap * prefer instance languages when selecting from contentMap * update docs to reflect lang selection * rename rdfLangString -> rdfLangs * update comments to mention Pollable * iter through slice instead of map
This commit is contained in:
@@ -244,9 +244,15 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
|
||||
}
|
||||
|
||||
// status.Content
|
||||
// status.Language
|
||||
//
|
||||
// The (html-formatted) content of this status.
|
||||
status.Content = ap.ExtractContent(statusable)
|
||||
// Many implementations set both content
|
||||
// and contentMap; we can use these to
|
||||
// infer the language of the status.
|
||||
status.Content, status.Language = ContentToContentLanguage(
|
||||
ctx,
|
||||
ap.ExtractContent(statusable),
|
||||
)
|
||||
|
||||
// status.Attachments
|
||||
//
|
||||
@@ -396,9 +402,6 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
|
||||
return &s
|
||||
}()
|
||||
|
||||
// language
|
||||
// TODO: we might be able to extract this from the contentMap field
|
||||
|
||||
// ActivityStreamsType
|
||||
status.ActivityStreamsType = statusable.GetTypeName()
|
||||
|
||||
@@ -707,7 +710,7 @@ func (c *Converter) ASFlagToReport(ctx context.Context, flaggable ap.Flaggable)
|
||||
// For Mastodon, this will just be a string, or nothing.
|
||||
// In Misskey's case, it may also contain the URLs of
|
||||
// one or more reported statuses, so extract these too.
|
||||
content := ap.ExtractContent(flaggable)
|
||||
content := ap.ExtractContent(flaggable).Content
|
||||
statusURIs := []*url.URL{}
|
||||
inlineURLs := misskeyReportInlineURLs(content)
|
||||
statusURIs = append(statusURIs, inlineURLs...)
|
||||
|
@@ -45,6 +45,10 @@ func (suite *ASToInternalTestSuite) jsonToType(in string) vocab.Type {
|
||||
suite.FailNow(err.Error())
|
||||
}
|
||||
|
||||
if statusable, ok := t.(ap.Statusable); ok {
|
||||
ap.NormalizeIncomingContent(statusable, m)
|
||||
}
|
||||
|
||||
return t
|
||||
}
|
||||
|
||||
@@ -103,7 +107,8 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatus() {
|
||||
suite.NoError(err)
|
||||
|
||||
suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning)
|
||||
suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
|
||||
suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
|
||||
suite.Equal("en", status.Language)
|
||||
}
|
||||
|
||||
func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {
|
||||
@@ -117,7 +122,7 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {
|
||||
suite.NoError(err)
|
||||
|
||||
suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning)
|
||||
suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
|
||||
suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
|
||||
|
||||
// on statuses with no URL in them (like ones we get from pleroma sometimes) we should use the AP URI of the status as URL
|
||||
suite.Equal("http://fossbros-anonymous.io/users/foss_satan/statuses/108138763199405167", status.URL)
|
||||
|
@@ -607,9 +607,17 @@ func (c *Converter) StatusToAS(ctx context.Context, s *gtsmodel.Status) (ap.Stat
|
||||
// conversation
|
||||
// TODO
|
||||
|
||||
// content -- the actual post itself
|
||||
// content -- the actual post
|
||||
// itself, plus the language
|
||||
contentProp := streams.NewActivityStreamsContentProperty()
|
||||
contentProp.AppendXMLSchemaString(s.Content)
|
||||
|
||||
if s.Language != "" {
|
||||
contentProp.AppendRDFLangString(map[string]string{
|
||||
s.Language: s.Content,
|
||||
})
|
||||
}
|
||||
|
||||
status.SetActivityStreamsContent(contentProp)
|
||||
|
||||
// attachments
|
||||
|
@@ -340,6 +340,9 @@ func (suite *InternalToASTestSuite) TestStatusToAS() {
|
||||
"attributedTo": "http://localhost:8080/users/the_mighty_zork",
|
||||
"cc": "http://localhost:8080/users/the_mighty_zork/followers",
|
||||
"content": "hello everyone!",
|
||||
"contentMap": {
|
||||
"en": "hello everyone!"
|
||||
},
|
||||
"id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
|
||||
"published": "2021-10-20T12:40:37+02:00",
|
||||
"replies": {
|
||||
@@ -379,16 +382,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASWithIDs() {
|
||||
// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --
|
||||
// will appear, so trim them out of the string for consistency
|
||||
trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1]
|
||||
suite.Equal(` {
|
||||
"blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "Black and white image of some 50's style text saying: Welcome On Board",
|
||||
"type": "Document",
|
||||
"url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
|
||||
},
|
||||
suite.Equal(` [
|
||||
{
|
||||
"blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "Black and white image of some 50's style text saying: Welcome On Board",
|
||||
"type": "Document",
|
||||
"url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
|
||||
}
|
||||
],
|
||||
"attributedTo": "http://localhost:8080/users/admin",
|
||||
"cc": "http://localhost:8080/users/admin/followers",
|
||||
"content": "hello world! #welcome ! first post on the instance :rainbow: !",
|
||||
"contentMap": {
|
||||
"en": "hello world! #welcome ! first post on the instance :rainbow: !"
|
||||
},
|
||||
"id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",
|
||||
"published": "2021-10-20T11:36:45Z",
|
||||
"replies": {
|
||||
@@ -446,16 +454,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASFromDB() {
|
||||
// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --
|
||||
// will appear, so trim them out of the string for consistency
|
||||
trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1]
|
||||
suite.Equal(` {
|
||||
"blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "Black and white image of some 50's style text saying: Welcome On Board",
|
||||
"type": "Document",
|
||||
"url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
|
||||
},
|
||||
suite.Equal(` [
|
||||
{
|
||||
"blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "Black and white image of some 50's style text saying: Welcome On Board",
|
||||
"type": "Document",
|
||||
"url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
|
||||
}
|
||||
],
|
||||
"attributedTo": "http://localhost:8080/users/admin",
|
||||
"cc": "http://localhost:8080/users/admin/followers",
|
||||
"content": "hello world! #welcome ! first post on the instance :rainbow: !",
|
||||
"contentMap": {
|
||||
"en": "hello world! #welcome ! first post on the instance :rainbow: !"
|
||||
},
|
||||
"id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",
|
||||
"published": "2021-10-20T11:36:45Z",
|
||||
"replies": {
|
||||
@@ -519,6 +532,9 @@ func (suite *InternalToASTestSuite) TestStatusToASWithMentions() {
|
||||
"http://localhost:8080/users/the_mighty_zork"
|
||||
],
|
||||
"content": "hi @the_mighty_zork welcome to the instance!",
|
||||
"contentMap": {
|
||||
"en": "hi @the_mighty_zork welcome to the instance!"
|
||||
},
|
||||
"id": "http://localhost:8080/users/admin/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
|
||||
"inReplyTo": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
|
||||
"published": "2021-11-20T13:32:16Z",
|
||||
|
@@ -31,6 +31,8 @@ import (
|
||||
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/language"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/text"
|
||||
)
|
||||
@@ -184,3 +186,102 @@ func placeholdUnknownAttachments(arr []apimodel.Attachment) (string, []apimodel.
|
||||
|
||||
return text.SanitizeToHTML(aside.String()), arr
|
||||
}
|
||||
|
||||
// ContentToContentLanguage tries to
|
||||
// extract a content string and language
|
||||
// tag string from the given intermediary
|
||||
// content.
|
||||
//
|
||||
// Either/both of the returned strings may
|
||||
// be empty, depending on how things go.
|
||||
func ContentToContentLanguage(
|
||||
ctx context.Context,
|
||||
content gtsmodel.Content,
|
||||
) (
|
||||
string, // content
|
||||
string, // language
|
||||
) {
|
||||
var (
|
||||
contentStr string
|
||||
langTagStr string
|
||||
)
|
||||
|
||||
switch contentMap := content.ContentMap; {
|
||||
// Simplest case: no `contentMap`.
|
||||
// Return `content`, even if empty.
|
||||
case contentMap == nil:
|
||||
return content.Content, ""
|
||||
|
||||
// `content` and `contentMap` set.
|
||||
// Try to infer "primary" language.
|
||||
case content.Content != "":
|
||||
// Assume `content` is intended
|
||||
// primary content, and look for
|
||||
// corresponding language tag.
|
||||
contentStr = content.Content
|
||||
|
||||
for t, c := range contentMap {
|
||||
if contentStr == c {
|
||||
langTagStr = t
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// `content` not set; `contentMap`
|
||||
// is set with only one value.
|
||||
// This must be the "primary" lang.
|
||||
case len(contentMap) == 1:
|
||||
// Use an empty loop to
|
||||
// get the values we want.
|
||||
// nolint:revive
|
||||
for langTagStr, contentStr = range contentMap {
|
||||
}
|
||||
|
||||
// Only `contentMap` is set, with more
|
||||
// than one value. Map order is not
|
||||
// guaranteed so we can't know the
|
||||
// "primary" language.
|
||||
//
|
||||
// Try to select content using our
|
||||
// instance's configured languages.
|
||||
//
|
||||
// In case of no hits, just take the
|
||||
// first tag and content in the map.
|
||||
default:
|
||||
instanceLangs := config.GetInstanceLanguages()
|
||||
for _, langTagStr = range instanceLangs.TagStrs() {
|
||||
if contentStr = contentMap[langTagStr]; contentStr != "" {
|
||||
// Hit!
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If nothing found, just take
|
||||
// the first entry we can get by
|
||||
// breaking after the first iter.
|
||||
if contentStr == "" {
|
||||
for langTagStr, contentStr = range contentMap {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if langTagStr != "" {
|
||||
// Found a lang tag for this content,
|
||||
// make sure it's valid / parseable.
|
||||
lang, err := language.Parse(langTagStr)
|
||||
if err != nil {
|
||||
log.Warnf(
|
||||
ctx,
|
||||
"could not parse %s as BCP47 language tag in status contentMap: %v",
|
||||
langTagStr, err,
|
||||
)
|
||||
} else {
|
||||
// Inferred the language!
|
||||
// Use normalized version.
|
||||
langTagStr = lang.TagStr
|
||||
}
|
||||
}
|
||||
|
||||
return contentStr, langTagStr
|
||||
}
|
||||
|
@@ -18,7 +18,12 @@
|
||||
package typeutils
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/language"
|
||||
)
|
||||
|
||||
func TestMisskeyReportContentURLs1(t *testing.T) {
|
||||
@@ -44,3 +49,112 @@ misskey-formatted`
|
||||
t.Fatalf("wanted 0 urls, got %d", l)
|
||||
}
|
||||
}
|
||||
|
||||
func TestContentToContentLanguage(t *testing.T) {
|
||||
type testcase struct {
|
||||
content gtsmodel.Content
|
||||
instanceLanguages language.Languages
|
||||
expectedContent string
|
||||
expectedLang string
|
||||
}
|
||||
|
||||
ctx, cncl := context.WithCancel(context.Background())
|
||||
defer cncl()
|
||||
|
||||
for i, testcase := range []testcase{
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "hello world",
|
||||
ContentMap: nil,
|
||||
},
|
||||
expectedContent: "hello world",
|
||||
expectedLang: "",
|
||||
},
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "",
|
||||
ContentMap: map[string]string{
|
||||
"en": "hello world",
|
||||
},
|
||||
},
|
||||
expectedContent: "hello world",
|
||||
expectedLang: "en",
|
||||
},
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "bonjour le monde",
|
||||
ContentMap: map[string]string{
|
||||
"en": "hello world",
|
||||
"fr": "bonjour le monde",
|
||||
},
|
||||
},
|
||||
expectedContent: "bonjour le monde",
|
||||
expectedLang: "fr",
|
||||
},
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "bonjour le monde",
|
||||
ContentMap: map[string]string{
|
||||
"en": "hello world",
|
||||
},
|
||||
},
|
||||
expectedContent: "bonjour le monde",
|
||||
expectedLang: "",
|
||||
},
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "",
|
||||
ContentMap: map[string]string{
|
||||
"en": "hello world",
|
||||
"ru": "Привет, мир!",
|
||||
"nl": "hallo wereld!",
|
||||
"ca": "Hola món!",
|
||||
},
|
||||
},
|
||||
instanceLanguages: language.Languages{
|
||||
{TagStr: "en"},
|
||||
{TagStr: "ca"},
|
||||
},
|
||||
expectedContent: "hello world",
|
||||
expectedLang: "en",
|
||||
},
|
||||
{
|
||||
content: gtsmodel.Content{
|
||||
Content: "",
|
||||
ContentMap: map[string]string{
|
||||
"en": "hello world",
|
||||
"ru": "Привет, мир!",
|
||||
"nl": "hallo wereld!",
|
||||
"ca": "Hola món!",
|
||||
},
|
||||
},
|
||||
instanceLanguages: language.Languages{
|
||||
{TagStr: "ca"},
|
||||
{TagStr: "en"},
|
||||
},
|
||||
expectedContent: "Hola món!",
|
||||
expectedLang: "ca",
|
||||
},
|
||||
} {
|
||||
langs, err := language.InitLangs(testcase.instanceLanguages.TagStrs())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
config.SetInstanceLanguages(langs)
|
||||
|
||||
content, language := ContentToContentLanguage(ctx, testcase.content)
|
||||
if content != testcase.expectedContent {
|
||||
t.Errorf(
|
||||
"test %d expected content '%s' got '%s'",
|
||||
i, testcase.expectedContent, content,
|
||||
)
|
||||
}
|
||||
|
||||
if language != testcase.expectedLang {
|
||||
t.Errorf(
|
||||
"test %d expected language '%s' got '%s'",
|
||||
i, testcase.expectedLang, language,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -85,6 +85,9 @@ func (suite *WrapTestSuite) TestWrapNoteInCreate() {
|
||||
"attributedTo": "http://localhost:8080/users/the_mighty_zork",
|
||||
"cc": "http://localhost:8080/users/the_mighty_zork/followers",
|
||||
"content": "hello everyone!",
|
||||
"contentMap": {
|
||||
"en": "hello everyone!"
|
||||
},
|
||||
"id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
|
||||
"published": "2021-10-20T12:40:37+02:00",
|
||||
"replies": {
|
||||
|
Reference in New Issue
Block a user