mirror of
				https://github.com/NicolasConstant/BirdsiteLive
				synced 2025-06-05 21:49:16 +02:00 
			
		
		
		
	fix return line parsing
This commit is contained in:
		| @@ -1,4 +1,5 @@ | |||||||
| using System.Collections.Generic; | using System.Collections.Generic; | ||||||
|  | using System.Linq; | ||||||
| using System.Text.RegularExpressions; | using System.Text.RegularExpressions; | ||||||
| using BirdsiteLive.ActivityPub.Models; | using BirdsiteLive.ActivityPub.Models; | ||||||
| using BirdsiteLive.Common.Settings; | using BirdsiteLive.Common.Settings; | ||||||
| @@ -13,11 +14,15 @@ namespace BirdsiteLive.Domain.Tools | |||||||
|     public class StatusExtractor : IStatusExtractor |     public class StatusExtractor : IStatusExtractor | ||||||
|     { |     { | ||||||
|         private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)"); |         private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)"); | ||||||
|  |         //private readonly Regex _hastagRegex = new Regex(@"#\w+"); | ||||||
|         //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); |         //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); | ||||||
|         //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); |         //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); | ||||||
|  |  | ||||||
|         private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)"); |         private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)"); | ||||||
|  |         //private readonly Regex _mentionRegex = new Regex(@"@\w+"); | ||||||
|         //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); |         //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); | ||||||
|         //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); |         //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); | ||||||
|  |  | ||||||
|         private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); |         private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); | ||||||
|         private readonly InstanceSettings _instanceSettings; |         private readonly InstanceSettings _instanceSettings; | ||||||
|  |  | ||||||
| @@ -34,12 +39,12 @@ namespace BirdsiteLive.Domain.Tools | |||||||
|             messageContent = $" {messageContent} "; |             messageContent = $" {messageContent} "; | ||||||
|  |  | ||||||
|             // Replace return lines |             // Replace return lines | ||||||
|             messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "</p><p>"); |             messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "</p><p> "); | ||||||
|             messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "<br/>"); |             messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "<br/> "); | ||||||
|  |  | ||||||
|             // Extract Urls |             // Extract Urls | ||||||
|             var urlMatch = _urlRegex.Matches(messageContent); |             var urlMatch = _urlRegex.Matches(messageContent); | ||||||
|             foreach (var m in urlMatch) |             foreach (Match m in urlMatch) | ||||||
|             { |             { | ||||||
|                 var url = m.ToString().Replace("\n", string.Empty).Trim(); |                 var url = m.ToString().Replace("\n", string.Empty).Trim(); | ||||||
|  |  | ||||||
| @@ -69,8 +74,8 @@ namespace BirdsiteLive.Domain.Tools | |||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Extract Hashtags |             // Extract Hashtags | ||||||
|             var hashtagMatch = _hastagRegex.Matches(messageContent); |             var hashtagMatch = OrderByLength(_hastagRegex.Matches(messageContent)); | ||||||
|             foreach (var m in hashtagMatch) |             foreach (Match m in hashtagMatch) | ||||||
|             { |             { | ||||||
|                 var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim(); |                 var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim(); | ||||||
|                 var url = $"https://{_instanceSettings.Domain}/tags/{tag}"; |                 var url = $"https://{_instanceSettings.Domain}/tags/{tag}"; | ||||||
| @@ -87,8 +92,8 @@ namespace BirdsiteLive.Domain.Tools | |||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Extract Mentions |             // Extract Mentions | ||||||
|             var mentionMatch = _mentionRegex.Matches(messageContent); |             var mentionMatch = OrderByLength(_mentionRegex.Matches(messageContent)); | ||||||
|             foreach (var m in mentionMatch) |             foreach (Match m in mentionMatch) | ||||||
|             { |             { | ||||||
|                 var mention = m.ToString().Replace("@", string.Empty).Replace("\n", string.Empty).Trim(); |                 var mention = m.ToString().Replace("@", string.Empty).Replace("\n", string.Empty).Trim(); | ||||||
|                 var url = $"https://{_instanceSettings.Domain}/users/{mention}"; |                 var url = $"https://{_instanceSettings.Domain}/users/{mention}"; | ||||||
| @@ -105,7 +110,21 @@ namespace BirdsiteLive.Domain.Tools | |||||||
|                     $@" <span class=""h-card""><a href=""https://{_instanceSettings.Domain}/@{mention}"" class=""u-url mention"">@<span>{mention}</span></a></span>"); |                     $@" <span class=""h-card""><a href=""https://{_instanceSettings.Domain}/@{mention}"" class=""u-url mention"">@<span>{mention}</span></a></span>"); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|  |             // Clean up return lines | ||||||
|  |             messageContent = Regex.Replace(messageContent, @"<p> ", "<p>"); | ||||||
|  |             messageContent = Regex.Replace(messageContent, @"<br/> ", "<br/>"); | ||||||
|  |  | ||||||
|             return (messageContent.Trim(), tags.ToArray()); |             return (messageContent.Trim(), tags.ToArray()); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         private IEnumerable<Match> OrderByLength(MatchCollection matches) | ||||||
|  |         { | ||||||
|  |             var result = new List<Match>(); | ||||||
|  |  | ||||||
|  |             foreach (Match m in matches) result.Add(m); | ||||||
|  |             result = result.OrderByDescending(x => x.Length).ToList(); | ||||||
|  |  | ||||||
|  |             return result; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -22,6 +22,38 @@ namespace BirdsiteLive.Domain.Tests.Tools | |||||||
|         } |         } | ||||||
|         #endregion |         #endregion | ||||||
|  |  | ||||||
|  |         [TestMethod] | ||||||
|  |         public void Extract_ReturnLines_Test() | ||||||
|  |         { | ||||||
|  |             #region Stubs | ||||||
|  |             var message = "Bla.\n\n@Mention blo. https://t.co/pgtrJi9600"; | ||||||
|  |             #endregion | ||||||
|  |              | ||||||
|  |             var service = new StatusExtractor(_settings); | ||||||
|  |             var result = service.ExtractTags(message); | ||||||
|  |  | ||||||
|  |             #region Validations | ||||||
|  |             Assert.IsTrue(result.content.Contains("Bla.")); | ||||||
|  |             Assert.IsTrue(result.content.Contains("</p><p>")); | ||||||
|  |             #endregion | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         [TestMethod] | ||||||
|  |         public void Extract_ReturnSingleLines_Test() | ||||||
|  |         { | ||||||
|  |             #region Stubs | ||||||
|  |             var message = "Bla.\n@Mention blo. https://t.co/pgtrJi9600"; | ||||||
|  |             #endregion | ||||||
|  |  | ||||||
|  |             var service = new StatusExtractor(_settings); | ||||||
|  |             var result = service.ExtractTags(message); | ||||||
|  |  | ||||||
|  |             #region Validations | ||||||
|  |             Assert.IsTrue(result.content.Contains("Bla.")); | ||||||
|  |             Assert.IsTrue(result.content.Contains("<br/>")); | ||||||
|  |             #endregion | ||||||
|  |         } | ||||||
|  |  | ||||||
|         [TestMethod] |         [TestMethod] | ||||||
|         public void Extract_FormatUrl_Test() |         public void Extract_FormatUrl_Test() | ||||||
|         { |         { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user