diff --git a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
index c5e8ed7..99b2f32 100644
--- a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
+++ b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
@@ -4,6 +4,7 @@ namespace BirdsiteLive.Common.Regexes
{
public class HashtagRegexes
{
- public static readonly Regex Hashtag = new Regex(@"(.)(#[a-zA-Z0-9]+)(\s|$|[.,;:!?/|-])");
+ public static readonly Regex HashtagName = new Regex(@"^[a-zA-Z0-9_]+$");
+ public static readonly Regex Hashtag = new Regex(@"(.?)#([a-zA-Z0-9_]+)(\s|$|[<.,;:!?/|-])");
}
}
\ No newline at end of file
diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
index a181ac2..9429096 100644
--- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
+++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
@@ -4,6 +4,8 @@ using System.Text.RegularExpressions;
using BirdsiteLive.ActivityPub.Models;
using BirdsiteLive.Common.Regexes;
using BirdsiteLive.Common.Settings;
+using BirdsiteLive.Twitter;
+using Microsoft.Extensions.Logging;
namespace BirdsiteLive.Domain.Tools
{
@@ -14,7 +16,7 @@ namespace BirdsiteLive.Domain.Tools
public class StatusExtractor : IStatusExtractor
{
- private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)");
+ //private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)");
//private readonly Regex _hastagRegex = new Regex(@"#\w+");
//private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)");
//private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)");
@@ -27,29 +29,31 @@ namespace BirdsiteLive.Domain.Tools
private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)");
private readonly InstanceSettings _instanceSettings;
+ private readonly ILogger _logger;
#region Ctor
- public StatusExtractor(InstanceSettings instanceSettings)
+ public StatusExtractor(InstanceSettings instanceSettings, ILogger logger)
{
_instanceSettings = instanceSettings;
+ _logger = logger;
}
#endregion
public (string content, Tag[] tags) Extract(string messageContent, bool extractMentions = true)
{
var tags = new List();
- messageContent = $" {messageContent} ";
+ //messageContent = $" {messageContent} ";
// Replace return lines
- messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "
");
- messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
");
- messageContent = Regex.Replace(messageContent, @"\(@", "( @");
- messageContent = Regex.Replace(messageContent, @"\(#", "( #");
+ messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "
");
+ messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
");
+ //messageContent = Regex.Replace(messageContent, @"\(@", "( @");
+ //messageContent = Regex.Replace(messageContent, @"\(#", "( #");
- // Secure emojis
- var emojiMatch = EmojiRegexes.Emoji.Matches(messageContent);
- foreach (Match m in emojiMatch)
- messageContent = Regex.Replace(messageContent, m.ToString(), $" {m} ");
+ //// Secure emojis
+ //var emojiMatch = EmojiRegexes.Emoji.Matches(messageContent);
+ //foreach (Match m in emojiMatch)
+ // messageContent = Regex.Replace(messageContent, m.ToString(), $" {m} ");
// Extract Urls
var urlMatch = _urlRegex.Matches(messageContent);
@@ -83,12 +87,19 @@ namespace BirdsiteLive.Domain.Tools
}
// Extract Hashtags
- var hashtagMatch = OrderByLength(_hastagRegex.Matches(messageContent));
+ var hashtagMatch = OrderByLength(HashtagRegexes.Hashtag.Matches(messageContent));
foreach (Match m in hashtagMatch.OrderByDescending(x => x.Length))
{
- var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
- var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
+ var tag = m.Groups[2].ToString();
+ //var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
+ if (!HashtagRegexes.HashtagName.IsMatch(tag))
+ {
+ _logger.LogError("Parsing Hashtag failed: {Tag} on {Content}", tag, messageContent);
+ continue;
+ }
+
+ var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
tags.Add(new Tag
{
name = $"#{tag}",
@@ -96,8 +107,11 @@ namespace BirdsiteLive.Domain.Tools
type = "Hashtag"
});
- messageContent = Regex.Replace(messageContent, m.ToString(),
- $@" #{tag}");
+ //messageContent = Regex.Replace(messageContent, m.ToString(),
+ // $@" #{tag}");
+
+ messageContent = Regex.Replace(messageContent, m.Groups[0].ToString(),
+ $@"{m.Groups[1]}#{tag}{m.Groups[3]}");
}
// Extract Mentions
diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
index f5dc91a..f126b12 100644
--- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
+++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
@@ -3,7 +3,9 @@ using System.Linq;
using BirdsiteLive.Common.Settings;
using BirdsiteLive.Domain.Tools;
using BirdsiteLive.Twitter.Models;
+using Microsoft.Extensions.Logging;
using Microsoft.VisualStudio.TestTools.UnitTesting;
+using Moq;
namespace BirdsiteLive.Domain.Tests.Tools
{
@@ -28,11 +30,16 @@ namespace BirdsiteLive.Domain.Tests.Tools
#region Stubs
var message = "Bla.\n\n@Mention blo. https://t.co/pgtrJi9600";
#endregion
-
- var service = new StatusExtractor(_settings);
+
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.IsTrue(result.content.Contains("Bla."));
Assert.IsTrue(result.content.Contains("
"));
#endregion
@@ -45,10 +52,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = "Bla.\n@Mention blo. https://t.co/pgtrJi9600";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.IsTrue(result.content.Contains("Bla."));
Assert.IsTrue(result.content.Contains("
"));
#endregion
@@ -61,10 +73,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}https://t.co/L8BpyHgg25";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(0, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
@@ -79,10 +96,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(0, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
@@ -97,10 +119,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(0, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
@@ -115,10 +142,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"https://t.co/L8BpyHgg25 Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(0, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
@@ -132,13 +164,18 @@ namespace BirdsiteLive.Domain.Tests.Tools
public void Extract_SingleHashTag_Test()
{
#region Stubs
- var message = $"Bla!{Environment.NewLine}#mytag";
+ var message = $"Bla!{Environment.NewLine}#mytag";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.AreEqual("#mytag", result.tags.First().name);
Assert.AreEqual("Hashtag", result.tags.First().type);
@@ -153,13 +190,18 @@ namespace BirdsiteLive.Domain.Tests.Tools
public void Extract_SingleHashTag_AtStart_Test()
{
#region Stubs
- var message = $"#mytag Bla!";
+ var message = "#mytag Bla!";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.AreEqual("#mytag", result.tags.First().name);
Assert.AreEqual("Hashtag", result.tags.First().type);
@@ -174,20 +216,25 @@ namespace BirdsiteLive.Domain.Tests.Tools
public void Extract_SingleHashTag_SpecialChar_Test()
{
#region Stubs
- var message = $"Bla!{Environment.NewLine}#COVIDー19";
+ var message = $"Bla!{Environment.NewLine}#COVID_19";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
- Assert.AreEqual("#COVIDー19", result.tags.First().name);
+ Assert.AreEqual("#COVID_19", result.tags.First().name);
Assert.AreEqual("Hashtag", result.tags.First().type);
- Assert.AreEqual("https://domain.name/tags/COVIDー19", result.tags.First().href);
+ Assert.AreEqual("https://domain.name/tags/COVID_19", result.tags.First().href);
Assert.IsTrue(result.content.Contains("Bla!"));
- Assert.IsTrue(result.content.Contains(@"#COVIDー19"));
+ Assert.IsTrue(result.content.Contains(@"#COVID_19"));
#endregion
}
@@ -195,13 +242,18 @@ namespace BirdsiteLive.Domain.Tests.Tools
public void Extract_MultiHashTags_Test()
{
#region Stubs
- var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3{Environment.NewLine}Test #bal Test";
+ var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3{Environment.NewLine}Test #bal Test";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(4, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
Assert.IsTrue(result.content.Contains(@"#mytag"));
@@ -218,10 +270,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}@mynickname";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.AreEqual("@mynickname@domain.name", result.tags.First().name);
Assert.AreEqual("Mention", result.tags.First().type);
@@ -239,10 +296,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}@my___nickname";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.AreEqual("@my___nickname@domain.name", result.tags.First().name);
Assert.AreEqual("Mention", result.tags.First().type);
@@ -260,10 +322,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"@mynickname Bla!";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.AreEqual("@mynickname@domain.name", result.tags.First().name);
Assert.AreEqual("Mention", result.tags.First().type);
@@ -281,10 +348,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}@mynickname @mynickname2 @mynickname3{Environment.NewLine}Test @dada Test";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(4, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
Assert.IsTrue(result.content.Contains(@"@mynickname"));
@@ -301,10 +373,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}@mynickname #mytag2 @mynickname3{Environment.NewLine}Test @dada #dada Test";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(5, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
Assert.IsTrue(result.content.Contains(@"@mynickname"));
@@ -324,10 +401,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
//var message = $"tests@mynickname";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.IsTrue(result.content.Contains(
@"😤 @mynickname"));
@@ -344,10 +426,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
//var message = $"tests@mynickname";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.IsTrue(result.content.Equals(@"bla ( @mynickname test)"));
#endregion