testing hashtag extraction

This commit is contained in:
Nicolas Constant
2020-07-31 22:49:00 -04:00
parent bfc4dcb4fd
commit 83507614a4
4 changed files with 134 additions and 52 deletions

View File

@ -6,6 +6,7 @@ using System.Text.RegularExpressions;
using BirdsiteLive.ActivityPub;
using BirdsiteLive.ActivityPub.Models;
using BirdsiteLive.Common.Settings;
using BirdsiteLive.Domain.Tools;
using BirdsiteLive.Twitter.Models;
using Tweetinvi.Models;
using Tweetinvi.Models.Entities;
@ -20,11 +21,13 @@ namespace BirdsiteLive.Domain
public class StatusService : IStatusService
{
private readonly InstanceSettings _instanceSettings;
private readonly IStatusExtractor _statusExtractor;
#region Ctor
public StatusService(InstanceSettings instanceSettings)
public StatusService(InstanceSettings instanceSettings, IStatusExtractor statusExtractor)
{
_instanceSettings = instanceSettings;
_statusExtractor = statusExtractor;
}
#endregion
@ -37,7 +40,7 @@ namespace BirdsiteLive.Domain
var to = $"{actorUrl}/followers";
var apPublic = "https://www.w3.org/ns/activitystreams#Public";
var extractedTags = ExtractTags(tweet.MessageContent);
var extractedTags = _statusExtractor.ExtractTags(tweet.MessageContent);
var note = new Note
{
@ -64,32 +67,6 @@ namespace BirdsiteLive.Domain
return note;
}
private (string content, Tag[] tags) ExtractTags(string messageContent)
{
var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)");
var match = regex.Matches(messageContent);
var tags = new List<Tag>();
foreach (var m in match)
{
var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
tags.Add(new Tag
{
name = $"#{tag}",
href = url,
type = "Hashtag"
});
messageContent = messageContent.Replace(
$"#{tag}",
$@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
}
return (messageContent, new Tag[0]);
}
private Attachment[] Convert(ExtractedMedia[] media)
{
if(media == null) return new Attachment[0];