diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs index c8067c1..722f369 100644 --- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs +++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs @@ -18,6 +18,7 @@ namespace BirdsiteLive.Domain.Tools private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)"); //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); + private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); private readonly InstanceSettings _instanceSettings; #region Ctor @@ -32,6 +33,38 @@ namespace BirdsiteLive.Domain.Tools var tags = new List(); messageContent = $" {messageContent} "; + // Extract Urls + var urlMatch = _urlRegex.Matches(messageContent); + foreach (var m in urlMatch) + { + var url = m.ToString().Replace("\n", string.Empty).Trim(); + + var protocol = "https://"; + if (url.StartsWith("http://")) protocol = "http://"; + else if (url.StartsWith("ftp://")) protocol = "ftp://"; + + var truncatedUrl = url.Replace(protocol, string.Empty); + + if (truncatedUrl.StartsWith("www.")) + { + protocol += "www."; + truncatedUrl = truncatedUrl.Replace("www.", string.Empty); + } + + var firstPart = truncatedUrl; + var secondPart = string.Empty; + + if (truncatedUrl.Length > 30) + { + firstPart = truncatedUrl.Substring(0, 30); + secondPart = truncatedUrl.Substring(30); + } + + messageContent = Regex.Replace(messageContent, m.ToString(), + $@" {protocol}{firstPart}{secondPart}"); + } + + // Extract Hashtags var hashtagMatch = _hastagRegex.Matches(messageContent); foreach (var m in hashtagMatch) { @@ -49,6 +82,7 @@ namespace BirdsiteLive.Domain.Tools $@" #{tag}"); } + // Extract Mentions var mentionMatch = _mentionRegex.Matches(messageContent); foreach (var m in mentionMatch) { diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs index d2f3bc9..790ba62 100644 --- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs +++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs @@ -22,6 +22,80 @@ namespace BirdsiteLive.Domain.Tests.Tools } #endregion + [TestMethod] + public void Extract_FormatUrl_Test() + { + #region Stubs + var message = $"Bla!{Environment.NewLine}https://t.co/L8BpyHgg25"; + #endregion + + var service = new StatusExtractor(_settings); + var result = service.ExtractTags(message); + + #region Validations + Assert.AreEqual(0, result.tags.Length); + + Assert.IsTrue(result.content.Contains("Bla!")); + Assert.IsTrue(result.content.Contains(@"https://t.co/L8BpyHgg25")); + #endregion + } + + [TestMethod] + public void Extract_FormatUrl_Long_Test() + { + #region Stubs + var message = $"Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content"; + #endregion + + var service = new StatusExtractor(_settings); + var result = service.ExtractTags(message); + + #region Validations + Assert.AreEqual(0, result.tags.Length); + + Assert.IsTrue(result.content.Contains("Bla!")); + Assert.IsTrue(result.content.Contains(@"https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content")); + #endregion + } + + [TestMethod] + public void Extract_FormatUrl_Exact_Test() + { + #region Stubs + var message = $"Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact"; + #endregion + + var service = new StatusExtractor(_settings); + var result = service.ExtractTags(message); + + #region Validations + Assert.AreEqual(0, result.tags.Length); + + Assert.IsTrue(result.content.Contains("Bla!")); + Assert.IsTrue(result.content.Contains(@"https://www.eff.org/deeplinks/2020/07/pact")); + #endregion + } + + [TestMethod] + public void Extract_MultiUrls__Test() + { + #region Stubs + var message = $"https://t.co/L8BpyHgg25 Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content"; + #endregion + + var service = new StatusExtractor(_settings); + var result = service.ExtractTags(message); + + #region Validations + Assert.AreEqual(0, result.tags.Length); + + Assert.IsTrue(result.content.Contains("Bla!")); + Assert.IsTrue(result.content.Contains(@"https://t.co/L8BpyHgg25")); + + Assert.IsTrue(result.content.Contains(@"https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content")); + #endregion + } + [TestMethod] public void Extract_SingleHashTag_Test() {