Parse podcast:transcript url and store in SQLite (#6739)

This commit is contained in:
Tony Tam 2023-12-06 10:26:20 -10:00 committed by ByteHamster
parent 59c5042a65
commit 8adbad9b66
7 changed files with 106 additions and 4 deletions

View File

@ -43,6 +43,8 @@ public class FeedItem implements Serializable {
private transient Feed feed;
private long feedId;
private String podcastIndexChapterUrl;
private String podcastIndexTranscriptUrl;
private String podcastIndexTranscriptType;
private int state;
public static final int NEW = -1;
@ -83,7 +85,8 @@ public class FeedItem implements Serializable {
* */
public FeedItem(long id, String title, String link, Date pubDate, String paymentLink, long feedId,
boolean hasChapters, String imageUrl, int state,
String itemIdentifier, boolean autoDownloadEnabled, String podcastIndexChapterUrl) {
String itemIdentifier, boolean autoDownloadEnabled, String podcastIndexChapterUrl,
String transcriptType, String transcriptUrl) {
this.id = id;
this.title = title;
this.link = link;
@ -96,6 +99,10 @@ public class FeedItem implements Serializable {
this.itemIdentifier = itemIdentifier;
this.autoDownloadEnabled = autoDownloadEnabled;
this.podcastIndexChapterUrl = podcastIndexChapterUrl;
if (transcriptUrl != null) {
this.podcastIndexTranscriptUrl = transcriptUrl;
this.podcastIndexTranscriptType = transcriptType;
}
}
/**
@ -162,6 +169,9 @@ public class FeedItem implements Serializable {
if (other.podcastIndexChapterUrl != null) {
podcastIndexChapterUrl = other.podcastIndexChapterUrl;
}
if (other.getPodcastIndexTranscriptUrl() != null) {
podcastIndexTranscriptUrl = other.podcastIndexTranscriptUrl;
}
}
public long getId() {
@ -413,6 +423,45 @@ public class FeedItem implements Serializable {
podcastIndexChapterUrl = url;
}
public void setPodcastIndexTranscriptUrl(String type, String url) {
updateTranscriptPreferredFormat(type, url);
}
public String getPodcastIndexTranscriptUrl() {
return podcastIndexTranscriptUrl;
}
public String getPodcastIndexTranscriptType() {
return podcastIndexTranscriptType;
}
public void updateTranscriptPreferredFormat(String type, String url) {
if (StringUtils.isEmpty(type) || StringUtils.isEmpty(url)) {
return;
}
String canonicalSrr = "application/srr";
String jsonType = "application/json";
switch (type) {
case "application/json":
podcastIndexTranscriptUrl = url;
podcastIndexTranscriptType = type;
break;
case "application/srr":
case "application/srt":
case "application/x-subrip":
if (podcastIndexTranscriptUrl == null || !podcastIndexTranscriptType.equals(jsonType)) {
podcastIndexTranscriptUrl = url;
podcastIndexTranscriptType = canonicalSrr;
}
break;
default:
System.out.println("Invalid format for transcript " + type);
break;
}
}
@NonNull
@Override
public String toString() {

View File

@ -14,6 +14,8 @@ public class PodcastIndex extends Namespace {
private static final String URL = "url";
private static final String FUNDING = "funding";
private static final String CHAPTERS = "chapters";
private static final String TRANSCRIPT = "transcript";
private static final String TYPE = "type";
@Override
public SyndElement handleElementStart(String localName, HandlerState state,
@ -28,6 +30,12 @@ public class PodcastIndex extends Namespace {
if (!TextUtils.isEmpty(href)) {
state.getCurrentItem().setPodcastIndexChapterUrl(href);
}
} else if (TRANSCRIPT.equals(localName)) {
String href = attributes.getValue(URL);
String type = attributes.getValue(TYPE);
if (!TextUtils.isEmpty(href) && !TextUtils.isEmpty(type)) {
state.getCurrentItem().setPodcastIndexTranscriptUrl(type, href);
}
}
return new SyndElement(localName, this);
}

View File

@ -97,6 +97,14 @@ public class RssParserTest {
assertEquals("https://example.com/funding3", feed.getPaymentLinks().get(2).url);
}
@Test
public void testPodcastIndexTranscript() throws Exception {
File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testPodcastIndexTranscript.xml");
Feed feed = FeedParserTestHelper.runFeedParser(feedFile);
assertEquals("https://podnews.net/audio/podnews231011.mp3.json", feed.getItems().get(0).getPodcastIndexTranscriptUrl());
assertEquals("application/json", feed.getItems().get(0).getPodcastIndexTranscriptType());
}
@Test
public void testUnsupportedElements() throws Exception {
File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testUnsupportedElements.xml");

View File

@ -0,0 +1,13 @@
<?xml version='1.0' encoding='UTF-8' ?>
<rss version="2.0" xmlns:podcast="https://podcastindex.org/namespace/1.0">
<channel>
<title>title</title>
<item>
<title>Podcasts in YouTube make it to the UK</title>
<link>https://podnews.net/update/youtube-podcasts-uk</link>
<pubDate>Tue, 10 Oct 2023 08:46:31 +0000</pubDate>
<podcast:transcript url="https://podnews.net/audio/podnews231011.mp3.json" type="application/json" />
<podcast:transcript url="https://podnews.net/audio/podnews231011.mp3.srt" type="application/srt" />
</item>
</channel>
</rss>

View File

@ -342,6 +342,10 @@ class DBUpgrader {
if (oldVersion < 3050000) {
db.execSQL("ALTER TABLE " + PodDBAdapter.TABLE_NAME_FEEDS
+ " ADD COLUMN " + PodDBAdapter.KEY_STATE + " INTEGER DEFAULT " + Feed.STATE_SUBSCRIBED);
db.execSQL("ALTER TABLE " + PodDBAdapter.TABLE_NAME_FEED_ITEMS
+ " ADD COLUMN " + PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_URL + " TEXT");
db.execSQL("ALTER TABLE " + PodDBAdapter.TABLE_NAME_FEED_ITEMS
+ " ADD COLUMN " + PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_TYPE + " TEXT");
}
}

View File

@ -122,6 +122,8 @@ public class PodDBAdapter {
public static final String KEY_NEW_EPISODES_ACTION = "new_episodes_action";
public static final String KEY_PODCASTINDEX_CHAPTER_URL = "podcastindex_chapter_url";
public static final String KEY_STATE = "state";
public static final String KEY_PODCASTINDEX_TRANSCRIPT_URL = "podcastindex_transcript_url";
public static final String KEY_PODCASTINDEX_TRANSCRIPT_TYPE = "podcastindex_transcript_type";
// Table names
public static final String TABLE_NAME_FEEDS = "Feeds";
@ -184,7 +186,9 @@ public class PodDBAdapter {
+ KEY_HAS_CHAPTERS + " INTEGER," + KEY_ITEM_IDENTIFIER + " TEXT,"
+ KEY_IMAGE_URL + " TEXT,"
+ KEY_AUTO_DOWNLOAD_ENABLED + " INTEGER,"
+ KEY_PODCASTINDEX_CHAPTER_URL + " TEXT)";
+ KEY_PODCASTINDEX_CHAPTER_URL + " TEXT,"
+ KEY_PODCASTINDEX_TRANSCRIPT_TYPE + " TEXT,"
+ KEY_PODCASTINDEX_TRANSCRIPT_URL + " TEXT" + ")";
private static final String CREATE_TABLE_FEED_MEDIA = "CREATE TABLE "
+ TABLE_NAME_FEED_MEDIA + " (" + TABLE_PRIMARY_KEY + KEY_DURATION
@ -272,7 +276,9 @@ public class PodDBAdapter {
+ TABLE_NAME_FEED_ITEMS + "." + KEY_ITEM_IDENTIFIER + ", "
+ TABLE_NAME_FEED_ITEMS + "." + KEY_IMAGE_URL + ", "
+ TABLE_NAME_FEED_ITEMS + "." + KEY_AUTO_DOWNLOAD_ENABLED + ", "
+ TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_CHAPTER_URL;
+ TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_CHAPTER_URL + ", "
+ TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_TRANSCRIPT_TYPE + ", "
+ TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_TRANSCRIPT_URL;
private static final String KEYS_FEED_MEDIA =
TABLE_NAME_FEED_MEDIA + "." + KEY_ID + " AS " + SELECT_KEY_MEDIA_ID + ", "
@ -675,6 +681,14 @@ public class PodDBAdapter {
values.put(KEY_IMAGE_URL, item.getImageUrl());
values.put(KEY_PODCASTINDEX_CHAPTER_URL, item.getPodcastIndexChapterUrl());
// We only store one transcript url, we prefer JSON if it exists
String type = item.getPodcastIndexTranscriptType();
String url = item.getPodcastIndexTranscriptUrl();
if (url != null) {
values.put(KEY_PODCASTINDEX_TRANSCRIPT_TYPE, type);
values.put(KEY_PODCASTINDEX_TRANSCRIPT_URL, url);
}
if (item.getId() == 0) {
item.setId(db.insert(TABLE_NAME_FEED_ITEMS, null, values));
} else {

View File

@ -26,6 +26,8 @@ public class FeedItemCursor extends CursorWrapper {
private final int indexImageUrl;
private final int indexPodcastIndexChapterUrl;
private final int indexMediaId;
private final int indexPodcastIndexTranscriptUrl;
private final int indexPodcastIndexTranscriptType;
public FeedItemCursor(Cursor cursor) {
super(new FeedMediaCursor(cursor));
@ -43,6 +45,8 @@ public class FeedItemCursor extends CursorWrapper {
indexImageUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_IMAGE_URL);
indexPodcastIndexChapterUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_CHAPTER_URL);
indexMediaId = cursor.getColumnIndexOrThrow(PodDBAdapter.SELECT_KEY_MEDIA_ID);
indexPodcastIndexTranscriptUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_URL);
indexPodcastIndexTranscriptType = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_TYPE);
}
/**
@ -62,7 +66,9 @@ public class FeedItemCursor extends CursorWrapper {
getInt(indexRead),
getString(indexItemIdentifier),
getLong(indexAutoDownload) > 0,
getString(indexPodcastIndexChapterUrl));
getString(indexPodcastIndexChapterUrl),
getString(indexPodcastIndexTranscriptUrl),
getString(indexPodcastIndexTranscriptType));
if (!isNull(indexMediaId)) {
item.setMedia(feedMediaCursor.getFeedMedia());
}