Only mark items as duplicates if duration and date are similar
This commit is contained in:
parent
19dfa08905
commit
dde499f5b1
|
@ -352,42 +352,13 @@ public final class DBTasks {
|
|||
*/
|
||||
private static FeedItem searchFeedItemGuessDuplicate(List<FeedItem> items, FeedItem searchItem) {
|
||||
for (FeedItem item : items) {
|
||||
if ((item.getMedia() != null)
|
||||
&& (searchItem.getMedia() != null)
|
||||
&& !TextUtils.isEmpty(item.getMedia().getStreamUrl())
|
||||
&& !TextUtils.isEmpty(searchItem.getMedia().getStreamUrl())
|
||||
&& TextUtils.equals(item.getMedia().getStreamUrl(), searchItem.getMedia().getStreamUrl())) {
|
||||
if (FeedItemDuplicateGuesser.seemDuplicates(item, searchItem)) {
|
||||
return item;
|
||||
} else if (titlesLookSimilar(item.getTitle(), searchItem.getTitle())) {
|
||||
if (searchItem.getPubDate() == null || item.getPubDate() == null) {
|
||||
continue;
|
||||
}
|
||||
long dateOriginal = item.getPubDate().getTime();
|
||||
long dateNew = searchItem.getPubDate().getTime();
|
||||
if (Math.abs(dateOriginal - dateNew) < 7L * 24L * 3600L * 1000L) { // Same week
|
||||
return item;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static boolean titlesLookSimilar(String title1, String title2) {
|
||||
if (TextUtils.isEmpty(title1) || TextUtils.isEmpty(title2)) {
|
||||
return false;
|
||||
}
|
||||
return canonicalizeTitle(title1).equals(canonicalizeTitle(title2));
|
||||
}
|
||||
|
||||
private static String canonicalizeTitle(String title) {
|
||||
return title
|
||||
.trim()
|
||||
.replace('“', '"')
|
||||
.replace('”', '"')
|
||||
.replace('„', '"')
|
||||
.replace('—', '-');
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds new Feeds to the database or updates the old versions if they already exists. If another Feed with the same
|
||||
* identifying value already exists, this method will add new FeedItems from the new Feed to the existing Feed.
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
package de.danoeh.antennapod.core.storage;
|
||||
|
||||
import android.text.TextUtils;
|
||||
import de.danoeh.antennapod.model.feed.FeedItem;
|
||||
import de.danoeh.antennapod.model.feed.FeedMedia;
|
||||
|
||||
import java.text.DateFormat;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Publishers sometimes mess up their feed by adding episodes twice or by changing the ID of existing episodes.
|
||||
* This class tries to guess if publishers actually meant another episode,
|
||||
* even if their feed explicitly says that the episodes are different.
|
||||
*/
|
||||
public class FeedItemDuplicateGuesser {
|
||||
public static boolean seemDuplicates(FeedItem item1, FeedItem item2) {
|
||||
if (sameAndNotEmpty(item1.getItemIdentifier(), item2.getItemIdentifier())) {
|
||||
return true;
|
||||
}
|
||||
FeedMedia media1 = item1.getMedia();
|
||||
FeedMedia media2 = item2.getMedia();
|
||||
if (media1 == null || media2 == null) {
|
||||
return false;
|
||||
}
|
||||
if (sameAndNotEmpty(media1.getStreamUrl(), media2.getStreamUrl())) {
|
||||
return true;
|
||||
}
|
||||
return titlesLookSimilar(item1, item2)
|
||||
&& datesLookSimilar(item1, item2)
|
||||
&& durationsLookSimilar(media1, media2)
|
||||
&& TextUtils.equals(media1.getMime_type(), media2.getMime_type());
|
||||
}
|
||||
|
||||
private static boolean sameAndNotEmpty(String string1, String string2) {
|
||||
if (TextUtils.isEmpty(string1) || TextUtils.isEmpty(string2)) {
|
||||
return false;
|
||||
}
|
||||
return string1.equals(string2);
|
||||
}
|
||||
|
||||
private static boolean datesLookSimilar(FeedItem item1, FeedItem item2) {
|
||||
if (item1.getPubDate() == null || item2.getPubDate() == null) {
|
||||
return false;
|
||||
}
|
||||
DateFormat dateFormat = DateFormat.getDateInstance(DateFormat.SHORT, Locale.US); // MM/DD/YY
|
||||
String dateOriginal = dateFormat.format(item2.getPubDate());
|
||||
String dateNew = dateFormat.format(item1.getPubDate());
|
||||
return TextUtils.equals(dateOriginal, dateNew); // Same date; time is ignored.
|
||||
}
|
||||
|
||||
private static boolean durationsLookSimilar(FeedMedia media1, FeedMedia media2) {
|
||||
return Math.abs(media1.getDuration() - media2.getDuration()) < 10 * 60L * 1000L;
|
||||
}
|
||||
|
||||
private static boolean titlesLookSimilar(FeedItem item1, FeedItem item2) {
|
||||
return sameAndNotEmpty(canonicalizeTitle(item1.getTitle()), canonicalizeTitle(item2.getTitle()));
|
||||
}
|
||||
|
||||
private static String canonicalizeTitle(String title) {
|
||||
if (title == null) {
|
||||
return "";
|
||||
}
|
||||
return title
|
||||
.trim()
|
||||
.replace('“', '"')
|
||||
.replace('”', '"')
|
||||
.replace('„', '"')
|
||||
.replace('—', '-');
|
||||
}
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
package de.danoeh.antennapod.core.storage;
|
||||
|
||||
import de.danoeh.antennapod.model.feed.FeedItem;
|
||||
import de.danoeh.antennapod.model.feed.FeedMedia;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
* Test class for {@link FeedItemDuplicateGuesser}.
|
||||
*/
|
||||
public class FeedItemDuplicateGuesserTest {
|
||||
private static final long MINUTES = 1000 * 60;
|
||||
private static final long DAYS = 24 * 60 * MINUTES;
|
||||
|
||||
@Test
|
||||
public void testSameId() {
|
||||
assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
|
||||
item("id", "Title1", "example.com/episode1", 0, 5 * MINUTES, "audio/*"),
|
||||
item("id", "Title2", "example.com/episode2", 0, 20 * MINUTES, "video/*")));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDuplicateDownloadUrl() {
|
||||
assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
|
||||
item("id1", "Title1", "example.com/episode", 0, 5 * MINUTES, "audio/*"),
|
||||
item("id2", "Title2", "example.com/episode", 0, 5 * MINUTES, "audio/*")));
|
||||
assertFalse(FeedItemDuplicateGuesser.seemDuplicates(
|
||||
item("id1", "Title1", "example.com/episode1", 0, 5 * MINUTES, "audio/*"),
|
||||
item("id2", "Title2", "example.com/episode2", 0, 5 * MINUTES, "audio/*")));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOtherAttributes() {
|
||||
assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
|
||||
item("id1", "Title", "example.com/episode1", 10, 5 * MINUTES, "audio/*"),
|
||||
item("id2", "Title", "example.com/episode2", 10, 5 * MINUTES, "audio/*")));
|
||||
assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
|
||||
item("id1", "Title", "example.com/episode1", 10, 5 * MINUTES, "audio/*"),
|
||||
item("id2", "Title", "example.com/episode2", 20, 6 * MINUTES, "audio/*")));
|
||||
assertFalse(FeedItemDuplicateGuesser.seemDuplicates(
|
||||
item("id1", "Title", "example.com/episode1", 10, 5 * MINUTES, "audio/*"),
|
||||
item("id2", "Title", "example.com/episode2", 10, 5 * MINUTES, "video/*")));
|
||||
assertFalse(FeedItemDuplicateGuesser.seemDuplicates(
|
||||
item("id1", "Title", "example.com/episode1", 5 * DAYS, 5 * MINUTES, "audio/*"),
|
||||
item("id2", "Title", "example.com/episode2", 2 * DAYS, 5 * MINUTES, "audio/*")));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoMediaType() {
|
||||
assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
|
||||
item("id1", "Title", "example.com/episode1", 2 * DAYS, 5 * MINUTES, ""),
|
||||
item("id2", "Title", "example.com/episode2", 2 * DAYS, 5 * MINUTES, "")));
|
||||
}
|
||||
|
||||
private FeedItem item(String guid, String title, String downloadUrl,
|
||||
long date, long duration, String mime) {
|
||||
FeedItem item = new FeedItem(0, title, guid, "link", new Date(date), FeedItem.PLAYED, null);
|
||||
FeedMedia media = new FeedMedia(item, downloadUrl, duration, mime);
|
||||
item.setMedia(media);
|
||||
return item;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue