Parse podcast:transcript url and store in SQLite (#6739)
This commit is contained in:
parent
59c5042a65
commit
8adbad9b66
|
@ -43,6 +43,8 @@ public class FeedItem implements Serializable {
|
|||
private transient Feed feed;
|
||||
private long feedId;
|
||||
private String podcastIndexChapterUrl;
|
||||
private String podcastIndexTranscriptUrl;
|
||||
private String podcastIndexTranscriptType;
|
||||
|
||||
private int state;
|
||||
public static final int NEW = -1;
|
||||
|
@ -83,7 +85,8 @@ public class FeedItem implements Serializable {
|
|||
* */
|
||||
public FeedItem(long id, String title, String link, Date pubDate, String paymentLink, long feedId,
|
||||
boolean hasChapters, String imageUrl, int state,
|
||||
String itemIdentifier, boolean autoDownloadEnabled, String podcastIndexChapterUrl) {
|
||||
String itemIdentifier, boolean autoDownloadEnabled, String podcastIndexChapterUrl,
|
||||
String transcriptType, String transcriptUrl) {
|
||||
this.id = id;
|
||||
this.title = title;
|
||||
this.link = link;
|
||||
|
@ -96,6 +99,10 @@ public class FeedItem implements Serializable {
|
|||
this.itemIdentifier = itemIdentifier;
|
||||
this.autoDownloadEnabled = autoDownloadEnabled;
|
||||
this.podcastIndexChapterUrl = podcastIndexChapterUrl;
|
||||
if (transcriptUrl != null) {
|
||||
this.podcastIndexTranscriptUrl = transcriptUrl;
|
||||
this.podcastIndexTranscriptType = transcriptType;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -162,6 +169,9 @@ public class FeedItem implements Serializable {
|
|||
if (other.podcastIndexChapterUrl != null) {
|
||||
podcastIndexChapterUrl = other.podcastIndexChapterUrl;
|
||||
}
|
||||
if (other.getPodcastIndexTranscriptUrl() != null) {
|
||||
podcastIndexTranscriptUrl = other.podcastIndexTranscriptUrl;
|
||||
}
|
||||
}
|
||||
|
||||
public long getId() {
|
||||
|
@ -413,6 +423,45 @@ public class FeedItem implements Serializable {
|
|||
podcastIndexChapterUrl = url;
|
||||
}
|
||||
|
||||
public void setPodcastIndexTranscriptUrl(String type, String url) {
|
||||
updateTranscriptPreferredFormat(type, url);
|
||||
}
|
||||
|
||||
public String getPodcastIndexTranscriptUrl() {
|
||||
return podcastIndexTranscriptUrl;
|
||||
}
|
||||
|
||||
public String getPodcastIndexTranscriptType() {
|
||||
return podcastIndexTranscriptType;
|
||||
}
|
||||
|
||||
public void updateTranscriptPreferredFormat(String type, String url) {
|
||||
if (StringUtils.isEmpty(type) || StringUtils.isEmpty(url)) {
|
||||
return;
|
||||
}
|
||||
|
||||
String canonicalSrr = "application/srr";
|
||||
String jsonType = "application/json";
|
||||
|
||||
switch (type) {
|
||||
case "application/json":
|
||||
podcastIndexTranscriptUrl = url;
|
||||
podcastIndexTranscriptType = type;
|
||||
break;
|
||||
case "application/srr":
|
||||
case "application/srt":
|
||||
case "application/x-subrip":
|
||||
if (podcastIndexTranscriptUrl == null || !podcastIndexTranscriptType.equals(jsonType)) {
|
||||
podcastIndexTranscriptUrl = url;
|
||||
podcastIndexTranscriptType = canonicalSrr;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
System.out.println("Invalid format for transcript " + type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@NonNull
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -14,6 +14,8 @@ public class PodcastIndex extends Namespace {
|
|||
private static final String URL = "url";
|
||||
private static final String FUNDING = "funding";
|
||||
private static final String CHAPTERS = "chapters";
|
||||
private static final String TRANSCRIPT = "transcript";
|
||||
private static final String TYPE = "type";
|
||||
|
||||
@Override
|
||||
public SyndElement handleElementStart(String localName, HandlerState state,
|
||||
|
@ -28,6 +30,12 @@ public class PodcastIndex extends Namespace {
|
|||
if (!TextUtils.isEmpty(href)) {
|
||||
state.getCurrentItem().setPodcastIndexChapterUrl(href);
|
||||
}
|
||||
} else if (TRANSCRIPT.equals(localName)) {
|
||||
String href = attributes.getValue(URL);
|
||||
String type = attributes.getValue(TYPE);
|
||||
if (!TextUtils.isEmpty(href) && !TextUtils.isEmpty(type)) {
|
||||
state.getCurrentItem().setPodcastIndexTranscriptUrl(type, href);
|
||||
}
|
||||
}
|
||||
return new SyndElement(localName, this);
|
||||
}
|
||||
|
|
|
@ -97,6 +97,14 @@ public class RssParserTest {
|
|||
assertEquals("https://example.com/funding3", feed.getPaymentLinks().get(2).url);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPodcastIndexTranscript() throws Exception {
|
||||
File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testPodcastIndexTranscript.xml");
|
||||
Feed feed = FeedParserTestHelper.runFeedParser(feedFile);
|
||||
assertEquals("https://podnews.net/audio/podnews231011.mp3.json", feed.getItems().get(0).getPodcastIndexTranscriptUrl());
|
||||
assertEquals("application/json", feed.getItems().get(0).getPodcastIndexTranscriptType());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnsupportedElements() throws Exception {
|
||||
File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testUnsupportedElements.xml");
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
<?xml version='1.0' encoding='UTF-8' ?>
|
||||
<rss version="2.0" xmlns:podcast="https://podcastindex.org/namespace/1.0">
|
||||
<channel>
|
||||
<title>title</title>
|
||||
<item>
|
||||
<title>Podcasts in YouTube make it to the UK</title>
|
||||
<link>https://podnews.net/update/youtube-podcasts-uk</link>
|
||||
<pubDate>Tue, 10 Oct 2023 08:46:31 +0000</pubDate>
|
||||
<podcast:transcript url="https://podnews.net/audio/podnews231011.mp3.json" type="application/json" />
|
||||
<podcast:transcript url="https://podnews.net/audio/podnews231011.mp3.srt" type="application/srt" />
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -342,6 +342,10 @@ class DBUpgrader {
|
|||
if (oldVersion < 3050000) {
|
||||
db.execSQL("ALTER TABLE " + PodDBAdapter.TABLE_NAME_FEEDS
|
||||
+ " ADD COLUMN " + PodDBAdapter.KEY_STATE + " INTEGER DEFAULT " + Feed.STATE_SUBSCRIBED);
|
||||
db.execSQL("ALTER TABLE " + PodDBAdapter.TABLE_NAME_FEED_ITEMS
|
||||
+ " ADD COLUMN " + PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_URL + " TEXT");
|
||||
db.execSQL("ALTER TABLE " + PodDBAdapter.TABLE_NAME_FEED_ITEMS
|
||||
+ " ADD COLUMN " + PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_TYPE + " TEXT");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -122,6 +122,8 @@ public class PodDBAdapter {
|
|||
public static final String KEY_NEW_EPISODES_ACTION = "new_episodes_action";
|
||||
public static final String KEY_PODCASTINDEX_CHAPTER_URL = "podcastindex_chapter_url";
|
||||
public static final String KEY_STATE = "state";
|
||||
public static final String KEY_PODCASTINDEX_TRANSCRIPT_URL = "podcastindex_transcript_url";
|
||||
public static final String KEY_PODCASTINDEX_TRANSCRIPT_TYPE = "podcastindex_transcript_type";
|
||||
|
||||
// Table names
|
||||
public static final String TABLE_NAME_FEEDS = "Feeds";
|
||||
|
@ -184,7 +186,9 @@ public class PodDBAdapter {
|
|||
+ KEY_HAS_CHAPTERS + " INTEGER," + KEY_ITEM_IDENTIFIER + " TEXT,"
|
||||
+ KEY_IMAGE_URL + " TEXT,"
|
||||
+ KEY_AUTO_DOWNLOAD_ENABLED + " INTEGER,"
|
||||
+ KEY_PODCASTINDEX_CHAPTER_URL + " TEXT)";
|
||||
+ KEY_PODCASTINDEX_CHAPTER_URL + " TEXT,"
|
||||
+ KEY_PODCASTINDEX_TRANSCRIPT_TYPE + " TEXT,"
|
||||
+ KEY_PODCASTINDEX_TRANSCRIPT_URL + " TEXT" + ")";
|
||||
|
||||
private static final String CREATE_TABLE_FEED_MEDIA = "CREATE TABLE "
|
||||
+ TABLE_NAME_FEED_MEDIA + " (" + TABLE_PRIMARY_KEY + KEY_DURATION
|
||||
|
@ -272,7 +276,9 @@ public class PodDBAdapter {
|
|||
+ TABLE_NAME_FEED_ITEMS + "." + KEY_ITEM_IDENTIFIER + ", "
|
||||
+ TABLE_NAME_FEED_ITEMS + "." + KEY_IMAGE_URL + ", "
|
||||
+ TABLE_NAME_FEED_ITEMS + "." + KEY_AUTO_DOWNLOAD_ENABLED + ", "
|
||||
+ TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_CHAPTER_URL;
|
||||
+ TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_CHAPTER_URL + ", "
|
||||
+ TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_TRANSCRIPT_TYPE + ", "
|
||||
+ TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_TRANSCRIPT_URL;
|
||||
|
||||
private static final String KEYS_FEED_MEDIA =
|
||||
TABLE_NAME_FEED_MEDIA + "." + KEY_ID + " AS " + SELECT_KEY_MEDIA_ID + ", "
|
||||
|
@ -675,6 +681,14 @@ public class PodDBAdapter {
|
|||
values.put(KEY_IMAGE_URL, item.getImageUrl());
|
||||
values.put(KEY_PODCASTINDEX_CHAPTER_URL, item.getPodcastIndexChapterUrl());
|
||||
|
||||
// We only store one transcript url, we prefer JSON if it exists
|
||||
String type = item.getPodcastIndexTranscriptType();
|
||||
String url = item.getPodcastIndexTranscriptUrl();
|
||||
if (url != null) {
|
||||
values.put(KEY_PODCASTINDEX_TRANSCRIPT_TYPE, type);
|
||||
values.put(KEY_PODCASTINDEX_TRANSCRIPT_URL, url);
|
||||
}
|
||||
|
||||
if (item.getId() == 0) {
|
||||
item.setId(db.insert(TABLE_NAME_FEED_ITEMS, null, values));
|
||||
} else {
|
||||
|
|
|
@ -26,6 +26,8 @@ public class FeedItemCursor extends CursorWrapper {
|
|||
private final int indexImageUrl;
|
||||
private final int indexPodcastIndexChapterUrl;
|
||||
private final int indexMediaId;
|
||||
private final int indexPodcastIndexTranscriptUrl;
|
||||
private final int indexPodcastIndexTranscriptType;
|
||||
|
||||
public FeedItemCursor(Cursor cursor) {
|
||||
super(new FeedMediaCursor(cursor));
|
||||
|
@ -43,6 +45,8 @@ public class FeedItemCursor extends CursorWrapper {
|
|||
indexImageUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_IMAGE_URL);
|
||||
indexPodcastIndexChapterUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_CHAPTER_URL);
|
||||
indexMediaId = cursor.getColumnIndexOrThrow(PodDBAdapter.SELECT_KEY_MEDIA_ID);
|
||||
indexPodcastIndexTranscriptUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_URL);
|
||||
indexPodcastIndexTranscriptType = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_TYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -62,7 +66,9 @@ public class FeedItemCursor extends CursorWrapper {
|
|||
getInt(indexRead),
|
||||
getString(indexItemIdentifier),
|
||||
getLong(indexAutoDownload) > 0,
|
||||
getString(indexPodcastIndexChapterUrl));
|
||||
getString(indexPodcastIndexChapterUrl),
|
||||
getString(indexPodcastIndexTranscriptUrl),
|
||||
getString(indexPodcastIndexTranscriptType));
|
||||
if (!isNull(indexMediaId)) {
|
||||
item.setMedia(feedMediaCursor.getFeedMedia());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue