De-duplicate also if episodes have different but similar media type (#6776)

This commit is contained in:
ByteHamster 2023-11-26 11:15:14 +01:00 committed by GitHub
parent 95f431fec9
commit 6177cc2460
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 1 deletions

View File

@ -28,7 +28,7 @@ public class FeedItemDuplicateGuesser {
return titlesLookSimilar(item1, item2)
&& datesLookSimilar(item1, item2)
&& durationsLookSimilar(media1, media2)
&& TextUtils.equals(media1.getMime_type(), media2.getMime_type());
&& mimeTypeLooksSimilar(media1, media2);
}
private static boolean sameAndNotEmpty(String string1, String string2) {
@ -52,6 +52,19 @@ public class FeedItemDuplicateGuesser {
return Math.abs(media1.getDuration() - media2.getDuration()) < 10 * 60L * 1000L;
}
private static boolean mimeTypeLooksSimilar(FeedMedia media1, FeedMedia media2) {
String mimeType1 = media1.getMime_type();
String mimeType2 = media2.getMime_type();
if (mimeType1 == null || mimeType2 == null) {
return true;
}
if (mimeType1.contains("/") && mimeType2.contains("/")) {
mimeType1 = mimeType1.substring(0, mimeType1.indexOf("/"));
mimeType2 = mimeType2.substring(0, mimeType2.indexOf("/"));
}
return TextUtils.equals(mimeType1, mimeType2);
}
private static boolean titlesLookSimilar(FeedItem item1, FeedItem item2) {
return sameAndNotEmpty(canonicalizeTitle(item1.getTitle()), canonicalizeTitle(item2.getTitle()));
}

View File

@ -44,6 +44,9 @@ public class FeedItemDuplicateGuesserTest {
assertFalse(FeedItemDuplicateGuesser.seemDuplicates(
item("id1", "Title", "example.com/episode1", 10, 5 * MINUTES, "audio/*"),
item("id2", "Title", "example.com/episode2", 10, 5 * MINUTES, "video/*")));
assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
item("id1", "Title", "example.com/episode1", 10, 5 * MINUTES, "audio/mpeg"),
item("id2", "Title", "example.com/episode2", 10, 5 * MINUTES, "audio/mp3")));
assertFalse(FeedItemDuplicateGuesser.seemDuplicates(
item("id1", "Title", "example.com/episode1", 5 * DAYS, 5 * MINUTES, "audio/*"),
item("id2", "Title", "example.com/episode2", 2 * DAYS, 5 * MINUTES, "audio/*")));