From 8adbad9b6692ea41136212f5a2e189a49e6169f0 Mon Sep 17 00:00:00 2001 From: Tony Tam <149837+tonytamsf@users.noreply.github.com> Date: Wed, 6 Dec 2023 10:26:20 -1000 Subject: [PATCH 1/4] Parse podcast:transcript url and store in SQLite (#6739) --- .../antennapod/model/feed/FeedItem.java | 51 ++++++++++++++++++- .../parser/feed/namespace/PodcastIndex.java | 8 +++ .../feed/element/namespace/RssParserTest.java | 8 +++ .../feed-rss-testPodcastIndexTranscript.xml | 13 +++++ .../storage/database/DBUpgrader.java | 4 ++ .../storage/database/PodDBAdapter.java | 18 ++++++- .../database/mapper/FeedItemCursor.java | 8 ++- 7 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 parser/feed/src/test/resources/feed-rss-testPodcastIndexTranscript.xml diff --git a/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java b/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java index df4cc8f9c..bc95dc85f 100644 --- a/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java +++ b/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java @@ -43,6 +43,8 @@ public class FeedItem implements Serializable { private transient Feed feed; private long feedId; private String podcastIndexChapterUrl; + private String podcastIndexTranscriptUrl; + private String podcastIndexTranscriptType; private int state; public static final int NEW = -1; @@ -83,7 +85,8 @@ public class FeedItem implements Serializable { * */ public FeedItem(long id, String title, String link, Date pubDate, String paymentLink, long feedId, boolean hasChapters, String imageUrl, int state, - String itemIdentifier, boolean autoDownloadEnabled, String podcastIndexChapterUrl) { + String itemIdentifier, boolean autoDownloadEnabled, String podcastIndexChapterUrl, + String transcriptType, String transcriptUrl) { this.id = id; this.title = title; this.link = link; @@ -96,6 +99,10 @@ public class FeedItem implements Serializable { this.itemIdentifier = itemIdentifier; this.autoDownloadEnabled = autoDownloadEnabled; this.podcastIndexChapterUrl = podcastIndexChapterUrl; + if (transcriptUrl != null) { + this.podcastIndexTranscriptUrl = transcriptUrl; + this.podcastIndexTranscriptType = transcriptType; + } } /** @@ -162,6 +169,9 @@ public class FeedItem implements Serializable { if (other.podcastIndexChapterUrl != null) { podcastIndexChapterUrl = other.podcastIndexChapterUrl; } + if (other.getPodcastIndexTranscriptUrl() != null) { + podcastIndexTranscriptUrl = other.podcastIndexTranscriptUrl; + } } public long getId() { @@ -413,6 +423,45 @@ public class FeedItem implements Serializable { podcastIndexChapterUrl = url; } + public void setPodcastIndexTranscriptUrl(String type, String url) { + updateTranscriptPreferredFormat(type, url); + } + + public String getPodcastIndexTranscriptUrl() { + return podcastIndexTranscriptUrl; + } + + public String getPodcastIndexTranscriptType() { + return podcastIndexTranscriptType; + } + + public void updateTranscriptPreferredFormat(String type, String url) { + if (StringUtils.isEmpty(type) || StringUtils.isEmpty(url)) { + return; + } + + String canonicalSrr = "application/srr"; + String jsonType = "application/json"; + + switch (type) { + case "application/json": + podcastIndexTranscriptUrl = url; + podcastIndexTranscriptType = type; + break; + case "application/srr": + case "application/srt": + case "application/x-subrip": + if (podcastIndexTranscriptUrl == null || !podcastIndexTranscriptType.equals(jsonType)) { + podcastIndexTranscriptUrl = url; + podcastIndexTranscriptType = canonicalSrr; + } + break; + default: + System.out.println("Invalid format for transcript " + type); + break; + } + } + @NonNull @Override public String toString() { diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java index 1f543a5ae..dd11fb4e7 100644 --- a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java @@ -14,6 +14,8 @@ public class PodcastIndex extends Namespace { private static final String URL = "url"; private static final String FUNDING = "funding"; private static final String CHAPTERS = "chapters"; + private static final String TRANSCRIPT = "transcript"; + private static final String TYPE = "type"; @Override public SyndElement handleElementStart(String localName, HandlerState state, @@ -28,6 +30,12 @@ public class PodcastIndex extends Namespace { if (!TextUtils.isEmpty(href)) { state.getCurrentItem().setPodcastIndexChapterUrl(href); } + } else if (TRANSCRIPT.equals(localName)) { + String href = attributes.getValue(URL); + String type = attributes.getValue(TYPE); + if (!TextUtils.isEmpty(href) && !TextUtils.isEmpty(type)) { + state.getCurrentItem().setPodcastIndexTranscriptUrl(type, href); + } } return new SyndElement(localName, this); } diff --git a/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java index bc30f2d7c..983766f12 100644 --- a/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java +++ b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java @@ -97,6 +97,14 @@ public class RssParserTest { assertEquals("https://example.com/funding3", feed.getPaymentLinks().get(2).url); } + @Test + public void testPodcastIndexTranscript() throws Exception { + File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testPodcastIndexTranscript.xml"); + Feed feed = FeedParserTestHelper.runFeedParser(feedFile); + assertEquals("https://podnews.net/audio/podnews231011.mp3.json", feed.getItems().get(0).getPodcastIndexTranscriptUrl()); + assertEquals("application/json", feed.getItems().get(0).getPodcastIndexTranscriptType()); + } + @Test public void testUnsupportedElements() throws Exception { File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testUnsupportedElements.xml"); diff --git a/parser/feed/src/test/resources/feed-rss-testPodcastIndexTranscript.xml b/parser/feed/src/test/resources/feed-rss-testPodcastIndexTranscript.xml new file mode 100644 index 000000000..f972ee207 --- /dev/null +++ b/parser/feed/src/test/resources/feed-rss-testPodcastIndexTranscript.xml @@ -0,0 +1,13 @@ + + + + title + + Podcasts in YouTube make it to the UK + https://podnews.net/update/youtube-podcasts-uk + Tue, 10 Oct 2023 08:46:31 +0000 + + + + + diff --git a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/DBUpgrader.java b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/DBUpgrader.java index be09a5132..a3b86be74 100644 --- a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/DBUpgrader.java +++ b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/DBUpgrader.java @@ -342,6 +342,10 @@ class DBUpgrader { if (oldVersion < 3050000) { db.execSQL("ALTER TABLE " + PodDBAdapter.TABLE_NAME_FEEDS + " ADD COLUMN " + PodDBAdapter.KEY_STATE + " INTEGER DEFAULT " + Feed.STATE_SUBSCRIBED); + db.execSQL("ALTER TABLE " + PodDBAdapter.TABLE_NAME_FEED_ITEMS + + " ADD COLUMN " + PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_URL + " TEXT"); + db.execSQL("ALTER TABLE " + PodDBAdapter.TABLE_NAME_FEED_ITEMS + + " ADD COLUMN " + PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_TYPE + " TEXT"); } } diff --git a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/PodDBAdapter.java b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/PodDBAdapter.java index aec136457..c7204b567 100644 --- a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/PodDBAdapter.java +++ b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/PodDBAdapter.java @@ -122,6 +122,8 @@ public class PodDBAdapter { public static final String KEY_NEW_EPISODES_ACTION = "new_episodes_action"; public static final String KEY_PODCASTINDEX_CHAPTER_URL = "podcastindex_chapter_url"; public static final String KEY_STATE = "state"; + public static final String KEY_PODCASTINDEX_TRANSCRIPT_URL = "podcastindex_transcript_url"; + public static final String KEY_PODCASTINDEX_TRANSCRIPT_TYPE = "podcastindex_transcript_type"; // Table names public static final String TABLE_NAME_FEEDS = "Feeds"; @@ -184,7 +186,9 @@ public class PodDBAdapter { + KEY_HAS_CHAPTERS + " INTEGER," + KEY_ITEM_IDENTIFIER + " TEXT," + KEY_IMAGE_URL + " TEXT," + KEY_AUTO_DOWNLOAD_ENABLED + " INTEGER," - + KEY_PODCASTINDEX_CHAPTER_URL + " TEXT)"; + + KEY_PODCASTINDEX_CHAPTER_URL + " TEXT," + + KEY_PODCASTINDEX_TRANSCRIPT_TYPE + " TEXT," + + KEY_PODCASTINDEX_TRANSCRIPT_URL + " TEXT" + ")"; private static final String CREATE_TABLE_FEED_MEDIA = "CREATE TABLE " + TABLE_NAME_FEED_MEDIA + " (" + TABLE_PRIMARY_KEY + KEY_DURATION @@ -272,7 +276,9 @@ public class PodDBAdapter { + TABLE_NAME_FEED_ITEMS + "." + KEY_ITEM_IDENTIFIER + ", " + TABLE_NAME_FEED_ITEMS + "." + KEY_IMAGE_URL + ", " + TABLE_NAME_FEED_ITEMS + "." + KEY_AUTO_DOWNLOAD_ENABLED + ", " - + TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_CHAPTER_URL; + + TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_CHAPTER_URL + ", " + + TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_TRANSCRIPT_TYPE + ", " + + TABLE_NAME_FEED_ITEMS + "." + KEY_PODCASTINDEX_TRANSCRIPT_URL; private static final String KEYS_FEED_MEDIA = TABLE_NAME_FEED_MEDIA + "." + KEY_ID + " AS " + SELECT_KEY_MEDIA_ID + ", " @@ -675,6 +681,14 @@ public class PodDBAdapter { values.put(KEY_IMAGE_URL, item.getImageUrl()); values.put(KEY_PODCASTINDEX_CHAPTER_URL, item.getPodcastIndexChapterUrl()); + // We only store one transcript url, we prefer JSON if it exists + String type = item.getPodcastIndexTranscriptType(); + String url = item.getPodcastIndexTranscriptUrl(); + if (url != null) { + values.put(KEY_PODCASTINDEX_TRANSCRIPT_TYPE, type); + values.put(KEY_PODCASTINDEX_TRANSCRIPT_URL, url); + } + if (item.getId() == 0) { item.setId(db.insert(TABLE_NAME_FEED_ITEMS, null, values)); } else { diff --git a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/mapper/FeedItemCursor.java b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/mapper/FeedItemCursor.java index d526299e4..41e4d474a 100644 --- a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/mapper/FeedItemCursor.java +++ b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/mapper/FeedItemCursor.java @@ -26,6 +26,8 @@ public class FeedItemCursor extends CursorWrapper { private final int indexImageUrl; private final int indexPodcastIndexChapterUrl; private final int indexMediaId; + private final int indexPodcastIndexTranscriptUrl; + private final int indexPodcastIndexTranscriptType; public FeedItemCursor(Cursor cursor) { super(new FeedMediaCursor(cursor)); @@ -43,6 +45,8 @@ public class FeedItemCursor extends CursorWrapper { indexImageUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_IMAGE_URL); indexPodcastIndexChapterUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_CHAPTER_URL); indexMediaId = cursor.getColumnIndexOrThrow(PodDBAdapter.SELECT_KEY_MEDIA_ID); + indexPodcastIndexTranscriptUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_URL); + indexPodcastIndexTranscriptType = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_TYPE); } /** @@ -62,7 +66,9 @@ public class FeedItemCursor extends CursorWrapper { getInt(indexRead), getString(indexItemIdentifier), getLong(indexAutoDownload) > 0, - getString(indexPodcastIndexChapterUrl)); + getString(indexPodcastIndexChapterUrl), + getString(indexPodcastIndexTranscriptUrl), + getString(indexPodcastIndexTranscriptType)); if (!isNull(indexMediaId)) { item.setMedia(feedMediaCursor.getFeedMedia()); } From 27e9bf36b1696ea2f35cc342964e9659087a5948 Mon Sep 17 00:00:00 2001 From: Tony Tam <149837+tonytamsf@users.noreply.github.com> Date: Mon, 1 Jan 2024 04:06:00 -0800 Subject: [PATCH 2/4] Download and store transcript text (#6797) --- .../antennapod/model/feed/FeedItem.java | 9 +++ .../antennapod/model/feed/FeedMedia.java | 7 +++ .../episode/MediaDownloadedHandler.java | 11 ++++ .../antennapod/storage/database/DBWriter.java | 8 +++ .../chapters/PodcastIndexTranscriptUtils.java | 55 +++++++++++++++++++ 5 files changed, 90 insertions(+) create mode 100644 ui/chapters/src/main/java/de/danoeh/antennapod/ui/chapters/PodcastIndexTranscriptUtils.java diff --git a/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java b/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java index bc95dc85f..0f5a3f4bb 100644 --- a/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java +++ b/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java @@ -45,6 +45,7 @@ public class FeedItem implements Serializable { private String podcastIndexChapterUrl; private String podcastIndexTranscriptUrl; private String podcastIndexTranscriptType; + private String podcastIndexTranscriptText; private int state; public static final int NEW = -1; @@ -462,6 +463,14 @@ public class FeedItem implements Serializable { } } + public String getPodcastIndexTranscriptText() { + return podcastIndexTranscriptText; + } + + public String setPodcastIndexTranscriptText(String str) { + return podcastIndexTranscriptText = str; + } + @NonNull @Override public String toString() { diff --git a/model/src/main/java/de/danoeh/antennapod/model/feed/FeedMedia.java b/model/src/main/java/de/danoeh/antennapod/model/feed/FeedMedia.java index 02c221611..76a891c15 100644 --- a/model/src/main/java/de/danoeh/antennapod/model/feed/FeedMedia.java +++ b/model/src/main/java/de/danoeh/antennapod/model/feed/FeedMedia.java @@ -513,4 +513,11 @@ public class FeedMedia implements Playable { } return super.equals(o); } + + public String getTranscriptFileUrl() { + if (getLocalFileUrl() == null) { + return null; + } + return getLocalFileUrl() + ".transcript"; + } } diff --git a/net/download/service/src/main/java/de/danoeh/antennapod/net/download/service/episode/MediaDownloadedHandler.java b/net/download/service/src/main/java/de/danoeh/antennapod/net/download/service/episode/MediaDownloadedHandler.java index cf9ec17e1..b856d1b67 100644 --- a/net/download/service/src/main/java/de/danoeh/antennapod/net/download/service/episode/MediaDownloadedHandler.java +++ b/net/download/service/src/main/java/de/danoeh/antennapod/net/download/service/episode/MediaDownloadedHandler.java @@ -10,6 +10,8 @@ import de.danoeh.antennapod.model.MediaMetadataRetrieverCompat; import de.danoeh.antennapod.model.feed.Feed; import de.danoeh.antennapod.net.sync.serviceinterface.SynchronizationQueueSink; import de.danoeh.antennapod.ui.chapters.ChapterUtils; +import de.danoeh.antennapod.ui.chapters.PodcastIndexTranscriptUtils; +import org.apache.commons.lang3.StringUtils; import org.greenrobot.eventbus.EventBus; import java.io.File; @@ -64,6 +66,15 @@ public class MediaDownloadedHandler implements Runnable { if (media.getItem() != null && media.getItem().getPodcastIndexChapterUrl() != null) { ChapterUtils.loadChaptersFromUrl(media.getItem().getPodcastIndexChapterUrl(), false); } + FeedItem item = media.getItem(); + if (item != null && item.getPodcastIndexTranscriptUrl() != null) { + String transcript = PodcastIndexTranscriptUtils.loadTranscriptFromUrl( + item.getPodcastIndexTranscriptType(), item.getPodcastIndexTranscriptUrl(), false); + if (!StringUtils.isEmpty(transcript)) { + item.setPodcastIndexTranscriptText(transcript); + PodcastIndexTranscriptUtils.storeTranscript(media, transcript); + } + } } catch (InterruptedIOException ignore) { // Ignore } diff --git a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/DBWriter.java b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/DBWriter.java index 11e1ad751..2106eae39 100644 --- a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/DBWriter.java +++ b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/DBWriter.java @@ -119,6 +119,14 @@ public class DBWriter { media.setLocalFileUrl(null); localDelete = true; } else if (media.getLocalFileUrl() != null) { + // delete transcript file before the media file because the fileurl is needed + if (media.getTranscriptFileUrl() != null) { + File transcriptFile = new File(media.getTranscriptFileUrl()); + if (transcriptFile.exists() && !transcriptFile.delete()) { + Log.d(TAG, "Deletion of transcript file failed."); + } + } + // delete downloaded media file File mediaFile = new File(media.getLocalFileUrl()); if (mediaFile.exists() && !mediaFile.delete()) { diff --git a/ui/chapters/src/main/java/de/danoeh/antennapod/ui/chapters/PodcastIndexTranscriptUtils.java b/ui/chapters/src/main/java/de/danoeh/antennapod/ui/chapters/PodcastIndexTranscriptUtils.java new file mode 100644 index 000000000..9add3db23 --- /dev/null +++ b/ui/chapters/src/main/java/de/danoeh/antennapod/ui/chapters/PodcastIndexTranscriptUtils.java @@ -0,0 +1,55 @@ +package de.danoeh.antennapod.ui.chapters; + +import android.util.Log; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.charset.Charset; + +import de.danoeh.antennapod.model.feed.FeedMedia; +import de.danoeh.antennapod.net.common.AntennapodHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import org.apache.commons.io.IOUtils; + +public class PodcastIndexTranscriptUtils { + + private static final String TAG = "PodcastIndexTranscript"; + + public static String loadTranscriptFromUrl(String type, String url, boolean forceRefresh) { + StringBuilder str = new StringBuilder(); + Response response = null; + try { + Log.d(TAG, "Downloading transcript URL " + url.toString()); + Request request = new Request.Builder().url(url).build(); + response = AntennapodHttpClient.getHttpClient().newCall(request).execute(); + if (response.isSuccessful() && response.body() != null) { + str.append(response.body().string()); + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + if (response != null) { + response.close(); + } + } + return str.toString(); + } + + public static void storeTranscript(FeedMedia media, String transcript) { + File transcriptFile = new File(media.getTranscriptFileUrl()); + FileOutputStream ostream = null; + try { + if (!transcriptFile.exists() && transcriptFile.createNewFile()) { + ostream = new FileOutputStream(transcriptFile); + ostream.write(transcript.getBytes(Charset.forName("UTF-8"))); + ostream.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + IOUtils.closeQuietly(ostream); + } + } +} From 7c4f19c9798b7c2c0c059a17fdfe843731cba5b4 Mon Sep 17 00:00:00 2001 From: Tony Tam <149837+tonytamsf@users.noreply.github.com> Date: Mon, 5 Feb 2024 04:42:59 +0800 Subject: [PATCH 3/4] Transcript semantic parsing (#6852) --- .../antennapod/model/feed/FeedItem.java | 9 ++ .../antennapod/model/feed/Transcript.java | 28 +++++ .../model/feed/TranscriptSegment.java | 31 +++++ parser/transcript/README.md | 3 + parser/transcript/build.gradle | 23 ++++ .../transcript/JsonTranscriptParser.java | 65 ++++++++++ .../transcript/SrtTranscriptParser.java | 118 ++++++++++++++++++ .../parser/transcript/TranscriptParser.java | 24 ++++ .../transcript/JsonTranscriptParserTest.java | 84 +++++++++++++ .../transcript/SrtTranscriptParserTest.java | 93 ++++++++++++++ settings.gradle | 1 + 11 files changed, 479 insertions(+) create mode 100644 model/src/main/java/de/danoeh/antennapod/model/feed/Transcript.java create mode 100644 model/src/main/java/de/danoeh/antennapod/model/feed/TranscriptSegment.java create mode 100644 parser/transcript/README.md create mode 100644 parser/transcript/build.gradle create mode 100644 parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParser.java create mode 100644 parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParser.java create mode 100644 parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/TranscriptParser.java create mode 100644 parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParserTest.java create mode 100644 parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParserTest.java diff --git a/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java b/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java index 0f5a3f4bb..1e623fd8e 100644 --- a/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java +++ b/model/src/main/java/de/danoeh/antennapod/model/feed/FeedItem.java @@ -46,6 +46,7 @@ public class FeedItem implements Serializable { private String podcastIndexTranscriptUrl; private String podcastIndexTranscriptType; private String podcastIndexTranscriptText; + private Transcript transcript; private int state; public static final int NEW = -1; @@ -463,6 +464,14 @@ public class FeedItem implements Serializable { } } + public Transcript getTranscript() { + return transcript; + } + + public void setTranscript(Transcript t) { + transcript = t; + } + public String getPodcastIndexTranscriptText() { return podcastIndexTranscriptText; } diff --git a/model/src/main/java/de/danoeh/antennapod/model/feed/Transcript.java b/model/src/main/java/de/danoeh/antennapod/model/feed/Transcript.java new file mode 100644 index 000000000..da01c0e58 --- /dev/null +++ b/model/src/main/java/de/danoeh/antennapod/model/feed/Transcript.java @@ -0,0 +1,28 @@ +package de.danoeh.antennapod.model.feed; + +import java.util.Map; +import java.util.TreeMap; + +public class Transcript { + + private final TreeMap segmentsMap = new TreeMap<>(); + + public void addSegment(TranscriptSegment segment) { + segmentsMap.put(segment.getStartTime(), segment); + } + + public TranscriptSegment getSegmentAtTime(long time) { + if (segmentsMap.floorEntry(time) == null) { + return null; + } + return segmentsMap.floorEntry(time).getValue(); + } + + public int getSegmentCount() { + return segmentsMap.size(); + } + + public Map.Entry getEntryAfterTime(long time) { + return segmentsMap.ceilingEntry(time); + } +} diff --git a/model/src/main/java/de/danoeh/antennapod/model/feed/TranscriptSegment.java b/model/src/main/java/de/danoeh/antennapod/model/feed/TranscriptSegment.java new file mode 100644 index 000000000..0101bb8ed --- /dev/null +++ b/model/src/main/java/de/danoeh/antennapod/model/feed/TranscriptSegment.java @@ -0,0 +1,31 @@ +package de.danoeh.antennapod.model.feed; + +public class TranscriptSegment { + private final long startTime; + private final long endTime; + private final String words; + private final String speaker; + + public TranscriptSegment(long start, long end, String w, String s) { + startTime = start; + endTime = end; + words = w; + speaker = s; + } + + public long getStartTime() { + return startTime; + } + + public long getEndTime() { + return endTime; + } + + public String getWords() { + return words; + } + + public String getSpeaker() { + return speaker; + } +} \ No newline at end of file diff --git a/parser/transcript/README.md b/parser/transcript/README.md new file mode 100644 index 000000000..a6ca61612 --- /dev/null +++ b/parser/transcript/README.md @@ -0,0 +1,3 @@ +# :parser:transcript + +This module provides parsing for transcripts diff --git a/parser/transcript/build.gradle b/parser/transcript/build.gradle new file mode 100644 index 000000000..122c74025 --- /dev/null +++ b/parser/transcript/build.gradle @@ -0,0 +1,23 @@ +plugins { + id("com.android.library") +} +apply from: "../../common.gradle" + +android { + namespace "de.danoeh.antennapod.parser.transcript" +} + +dependencies { + implementation project(':model') + + annotationProcessor "androidx.annotation:annotation:$annotationVersion" + + implementation "androidx.core:core:$coreVersion" + + implementation "org.apache.commons:commons-lang3:$commonslangVersion" + implementation "commons-io:commons-io:$commonsioVersion" + implementation "org.jsoup:jsoup:$jsoupVersion" + + testImplementation "junit:junit:$junitVersion" + testImplementation "org.robolectric:robolectric:$robolectricVersion" +} diff --git a/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParser.java b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParser.java new file mode 100644 index 000000000..78f3bf9c8 --- /dev/null +++ b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParser.java @@ -0,0 +1,65 @@ +package de.danoeh.antennapod.parser.transcript; + +import org.apache.commons.lang3.StringUtils; +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.internal.StringUtil; + +import de.danoeh.antennapod.model.feed.Transcript; +import de.danoeh.antennapod.model.feed.TranscriptSegment; + +public class JsonTranscriptParser { + public static Transcript parse(String jsonStr) { + try { + Transcript transcript = new Transcript(); + long startTime = -1L; + long endTime = -1L; + long segmentStartTime = -1L; + long duration = 0L; + String speaker = ""; + String segmentBody = ""; + JSONObject obj = new JSONObject(jsonStr); + JSONArray objSegments = obj.getJSONArray("segments"); + + for (int i = 0; i < objSegments.length(); i++) { + JSONObject jsonObject = objSegments.getJSONObject(i); + startTime = Double.valueOf(jsonObject.optDouble("startTime", -1) * 1000L).longValue(); + endTime = Double.valueOf(jsonObject.optDouble("endTime", -1) * 1000L).longValue(); + if (startTime < 0 || endTime < 0) { + continue; + } + if (segmentStartTime == -1L) { + segmentStartTime = startTime; + } + duration += endTime - startTime; + + speaker = jsonObject.optString("speaker"); + String body = jsonObject.optString("body"); + segmentBody += body + " "; + + if (duration >= TranscriptParser.MIN_SPAN) { + segmentBody = StringUtils.trim(segmentBody); + transcript.addSegment(new TranscriptSegment(segmentStartTime, endTime, segmentBody, speaker)); + duration = 0L; + segmentBody = ""; + segmentStartTime = -1L; + } + } + + if (!StringUtil.isBlank(segmentBody)) { + segmentBody = StringUtils.trim(segmentBody); + transcript.addSegment(new TranscriptSegment(segmentStartTime, endTime, segmentBody, speaker)); + } + + if (transcript.getSegmentCount() > 0) { + return transcript; + } else { + return null; + } + + } catch (org.json.JSONException e) { + e.printStackTrace(); + } + return null; + } +} diff --git a/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParser.java b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParser.java new file mode 100644 index 000000000..098dadd99 --- /dev/null +++ b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParser.java @@ -0,0 +1,118 @@ +package de.danoeh.antennapod.parser.transcript; + +import org.apache.commons.lang3.StringUtils; +import org.jsoup.internal.StringUtil; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import de.danoeh.antennapod.model.feed.Transcript; +import de.danoeh.antennapod.model.feed.TranscriptSegment; + +public class SrtTranscriptParser { + private static final Pattern TIMECODE_PATTERN = Pattern.compile("^([0-9]{2}):([0-9]{2}):([0-9]{2}),([0-9]{3})$"); + + public static Transcript parse(String str) { + if (StringUtils.isBlank(str)) { + return null; + } + str = str.replaceAll("\r\n", "\n"); + + Transcript transcript = new Transcript(); + List lines = Arrays.asList(str.split("\n")); + Iterator iter = lines.iterator(); + String speaker = ""; + StringBuilder body = new StringBuilder(); + String line; + String segmentBody = ""; + long startTimecode = -1L; + long spanStartTimecode = -1L; + long endTimecode = -1L; + long duration = 0L; + + while (iter.hasNext()) { + line = iter.next(); + + if (line.isEmpty()) { + continue; + } + + if (line.contains("-->")) { + String[] timecodes = line.split("-->"); + if (timecodes.length < 2) { + continue; + } + startTimecode = parseTimecode(timecodes[0].trim()); + endTimecode = parseTimecode(timecodes[1].trim()); + if (startTimecode == -1 || endTimecode == -1) { + continue; + } + + if (spanStartTimecode == -1) { + spanStartTimecode = startTimecode; + } + duration += endTimecode - startTimecode; + do { + line = iter.next(); + if (StringUtil.isBlank(line)) { + break; + } + body.append(line.strip()); + body.append(" "); + } while (iter.hasNext()); + } + + if (body.indexOf(":") != -1) { + String [] parts = body.toString().trim().split(":"); + if (parts.length < 2) { + continue; + } + speaker = parts[0]; + body = new StringBuilder(parts[1].strip()); + } + if (!StringUtil.isBlank(body.toString())) { + segmentBody += " " + body; + segmentBody = StringUtils.trim(segmentBody); + if (duration >= TranscriptParser.MIN_SPAN && endTimecode > spanStartTimecode) { + transcript.addSegment(new TranscriptSegment(spanStartTimecode, + endTimecode, + segmentBody, + speaker)); + duration = 0L; + spanStartTimecode = -1L; + segmentBody = ""; + } + body = new StringBuilder(); + } + } + + if (!StringUtil.isBlank(segmentBody) && endTimecode > spanStartTimecode) { + segmentBody = StringUtils.trim(segmentBody); + transcript.addSegment(new TranscriptSegment(spanStartTimecode, + endTimecode, + segmentBody, + speaker)); + } + if (transcript.getSegmentCount() > 0) { + return transcript; + } else { + return null; + } + } + + // Time format 00:00:00,000 + static long parseTimecode(String timecode) { + Matcher matcher = TIMECODE_PATTERN.matcher(timecode); + if (!matcher.matches()) { + return -1; + } + long hours = Integer.parseInt(matcher.group(1)); + long minutes = Integer.parseInt(matcher.group(2)); + long seconds = Integer.parseInt(matcher.group(3)); + long milliseconds = Integer.parseInt(matcher.group(4)); + return (hours * 60 * 60 * 1000) + (minutes * 60 * 1000) + (seconds * 1000) + milliseconds; + } +} diff --git a/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/TranscriptParser.java b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/TranscriptParser.java new file mode 100644 index 000000000..0a4025d96 --- /dev/null +++ b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/TranscriptParser.java @@ -0,0 +1,24 @@ +package de.danoeh.antennapod.parser.transcript; + +import org.apache.commons.lang3.StringUtils; + +import de.danoeh.antennapod.model.feed.Transcript; + +public class TranscriptParser { + static final long MIN_SPAN = 1000L; // merge short segments together to form a span of 1 second + + public static Transcript parse(String str, String type) { + if (str == null || StringUtils.isBlank(str)) { + return null; + } + + if ("application/json".equals(type)) { + return JsonTranscriptParser.parse(str); + } + + if ("application/srt".equals(type) || "application/srr".equals(type) || "application/x-subrip".equals(type)) { + return SrtTranscriptParser.parse(str); + } + return null; + } +} diff --git a/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParserTest.java b/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParserTest.java new file mode 100644 index 000000000..48996f492 --- /dev/null +++ b/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParserTest.java @@ -0,0 +1,84 @@ +package de.danoeh.antennapod.parser.transcript; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.robolectric.RobolectricTestRunner; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import de.danoeh.antennapod.model.feed.Transcript; + +@RunWith(RobolectricTestRunner.class) +public class JsonTranscriptParserTest { + private static String jsonStr = "{'version': '1.0.0', " + + "'segments': [ " + + "{ 'speaker' : 'John Doe', 'startTime': 0.8, 'endTime': 1.9, 'body': 'And' }," + + "{ 'speaker' : 'Sally Green', 'startTime': 1.91, 'endTime': 2.8, 'body': 'this merges' }," + + "{ 'startTime': 2.9, 'endTime': 3.4, 'body': 'the' }," + + "{ 'startTime': 3.5, 'endTime': 3.6, 'body': 'person' }]}"; + + @Test + public void testParseJson() { + Transcript result = JsonTranscriptParser.parse(jsonStr); + + assertEquals(result.getSegmentAtTime(0L), null); + assertEquals(result.getSegmentAtTime(800L).getSpeaker(), "John Doe"); + assertEquals(result.getSegmentAtTime(800L).getStartTime(), 800L); + assertEquals(result.getSegmentAtTime(800L).getEndTime(), 1900L); + assertEquals(1910L, (long) result.getEntryAfterTime(1800L).getKey()); + // 2 segments get merged into at least 1 second + assertEquals("this merges the", result.getEntryAfterTime(1800L).getValue().getWords()); + } + + @Test + public void testParse() { + String type = "application/json"; + Transcript result = TranscriptParser.parse(jsonStr, type); + // There isn't a segment at 900L, so go backwards and get the segment at 800L + assertEquals(result.getSegmentAtTime(900L).getSpeaker(), "John Doe"); + assertEquals(result.getSegmentAtTime(930L).getWords(), "And"); + + // blank string + String blankStr = ""; + result = TranscriptParser.parse(blankStr, type); + assertEquals(result, null); + + result = TranscriptParser.parse(null, type); + assertEquals(result, null); + + // All blank lines + String allNewlinesStr = "\r\n\r\n\r\n\r\n"; + result = TranscriptParser.parse(allNewlinesStr, type); + assertEquals(result, null); + + // segments is missing + String jsonStrBad1 = "{'version': '1.0.0', " + + "'segmentsX': [ " + + "{ 'speaker' : 'John Doe', 'startTime': 0.8, 'endTime': 1.9, 'body': 'And' }," + + "{ 'startTime': 2.9, 'endTime': 3.4, 'body': 'the' }," + + "{ 'startTime': 3.5, 'endTime': 3.6, 'body': 'person' }]}"; + result = TranscriptParser.parse(jsonStrBad1, type); + assertEquals(result, null); + + // invalid time formatting + String jsonStrBad2 = "{'version': '1.0.0', " + + "'segments': [ " + + "{ 'speaker' : 'XJohn Doe', 'startTime': stringTime, 'endTime': stringTime, 'body': 'And' }," + + "{ 'XstartTime': 2.9, 'XendTime': 3.4, 'body': 'the' }," + + "{ 'startTime': '-2.9', 'endTime': '-3.4', 'body': 'the' }," + + "{ 'startTime': 'bad_time', 'endTime': '-3.4', 'body': 'the' }]}"; + result = TranscriptParser.parse(jsonStrBad2, type); + assertNull(result); + + // Just plain text + String strBad3 = "John Doe: Promoting your podcast in a new\n\n" + + "way. The latest from PogNews."; + result = TranscriptParser.parse(strBad3, type); + assertNull(result); + + // passing the wrong type + type = "application/srt"; + result = TranscriptParser.parse(jsonStr, type); + assertEquals(result, null); + } +} diff --git a/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParserTest.java b/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParserTest.java new file mode 100644 index 000000000..f7854c5bf --- /dev/null +++ b/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParserTest.java @@ -0,0 +1,93 @@ +package de.danoeh.antennapod.parser.transcript; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.robolectric.RobolectricTestRunner; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import de.danoeh.antennapod.model.feed.Transcript; + +@RunWith(RobolectricTestRunner.class) +public class SrtTranscriptParserTest { + private static String srtStr = "1\n" + + "00:00:00,000 --> 00:00:02,730\n" + + "John Doe: Promoting your podcast in a new\n\n" + + "2\n" + + "00:00:02,730 --> 00:00:04,600\n" + + "way. The latest from PogNews.\n\n" + + "00:00:04,730 --> 00:00:05,600\n" + + "way. The latest from PogNews."; + + @Test + public void testParseSrt() { + Transcript result = SrtTranscriptParser.parse(srtStr); + + assertEquals(result.getSegmentAtTime(0L).getWords(), "Promoting your podcast in a new"); + assertEquals(result.getSegmentAtTime(0L).getSpeaker(), "John Doe"); + assertEquals(result.getSegmentAtTime(0L).getStartTime(), 0L); + assertEquals(result.getSegmentAtTime(0L).getEndTime(), 2730L); + assertEquals((long) result.getEntryAfterTime(1000L).getKey(), 2730L); + assertEquals(result.getEntryAfterTime(1000L).getValue().getWords(), "way. The latest from PogNews."); + } + + @Test + public void testParse() { + String type = "application/srr"; + Transcript result; + + result = TranscriptParser.parse(srtStr, type); + // There isn't a segment at 800L, so go backwards and get the segment at 0L + assertEquals(result.getSegmentAtTime(800L).getWords(), "Promoting your podcast in a new"); + + result = TranscriptParser.parse(null, type); + assertEquals(result, null); + + // blank string + String blankStr = ""; + result = TranscriptParser.parse(blankStr, type); + assertNull(result); + + // All empty lines + String allNewlinesStr = "\r\n\r\n\r\n\r\n"; + result = TranscriptParser.parse(allNewlinesStr, type); + assertEquals(result, null); + + // first segment has invalid time formatting, so the entire segment will be thrown out + String srtStrBad1 = "00:0000,000 --> 00:00:02,730\n" + + "John Doe: Promoting your podcast in a new\n\n" + + "2\n" + + "00:00:02,730 --> 00:00:04,600\n" + + "way. The latest from PogNews."; + result = TranscriptParser.parse(srtStrBad1, type); + assertEquals(result.getSegmentAtTime(2730L).getWords(), "way. The latest from PogNews."); + + // first segment has invalid time in end time, 2nd segment has invalid time in both start time and end time + String srtStrBad2 = "00:00:00,000 --> 00:0002,730\n" + + "Jane Doe: Promoting your podcast in a new\n\n" + + "2\n" + + "badstarttime --> badendtime\n" + + "way. The latest from PogNews.\n" + + "badstarttime -->\n" + + "Jane Doe says something\n" + + "00:00:00,000 --> 00:00:02,730\n" + + "Jane Doe:"; + result = TranscriptParser.parse(srtStrBad2, type); + assertNull(result); + + // Just plain text + String strBad3 = "John Doe: Promoting your podcast in a new\n\n" + + "way. The latest from PogNews."; + result = TranscriptParser.parse(strBad3, type); + assertNull(result); + + // passing the wrong type + type = "application/json"; + result = TranscriptParser.parse(srtStr, type); + assertEquals(result, null); + + type = "unknown"; + result = TranscriptParser.parse(srtStr, type); + assertEquals(result, null); + } +} + diff --git a/settings.gradle b/settings.gradle index 8cf8baf3e..3b3df7ba8 100644 --- a/settings.gradle +++ b/settings.gradle @@ -30,6 +30,7 @@ include ':net:sync:service' include ':parser:feed' include ':parser:media' +include ':parser:transcript' include ':playback:base' include ':playback:cast' From e856a9f11813d181581a6935ea925e0c419696cd Mon Sep 17 00:00:00 2001 From: Tony Tam <149837+tonytamsf@users.noreply.github.com> Date: Sat, 18 May 2024 09:54:14 -0700 Subject: [PATCH 4/4] Display transcript text and follow along the audio (#7103) --- app/build.gradle | 2 + .../ui/episodeslist/FeedItemMenuHandler.java | 2 + .../ui/screen/playback/TranscriptAdapter.java | 145 ++++++++++++ .../playback/TranscriptDialogFragment.java | 220 ++++++++++++++++++ .../playback/audio/AudioPlayerFragment.java | 6 +- .../ui/transcript/TranscriptViewholder.java | 23 ++ app/src/main/res/layout/transcript_dialog.xml | 35 +++ app/src/main/res/layout/transcript_item.xml | 33 +++ app/src/main/res/menu/mediaplayer.xml | 6 + .../antennapod/model/feed/FeedItem.java | 13 +- .../antennapod/model/feed/FeedMedia.java | 22 ++ .../antennapod/model/feed/Transcript.java | 50 ++-- net/download/service/build.gradle | 1 + .../episode/MediaDownloadedHandler.java | 9 +- .../parser/feed/namespace/PodcastIndex.java | 2 +- .../feed/element/namespace/RssParserTest.java | 4 +- .../transcript/JsonTranscriptParser.java | 53 ++++- .../transcript/SrtTranscriptParser.java | 57 +++-- .../parser/transcript/TranscriptParser.java | 3 +- .../transcript/JsonTranscriptParserTest.java | 14 +- .../transcript/SrtTranscriptParserTest.java | 15 +- settings.gradle | 1 + .../storage/database/PodDBAdapter.java | 4 +- .../database/mapper/FeedItemCursor.java | 8 +- ui/chapters/build.gradle | 2 + .../chapters/PodcastIndexTranscriptUtils.java | 55 ----- .../src/main/res/drawable/transcript.xml | 9 + ui/i18n/src/main/res/values/strings.xml | 4 + ui/transcript/build.gradle | 20 ++ .../ui/transcript/TranscriptUtils.java | 111 +++++++++ 30 files changed, 800 insertions(+), 129 deletions(-) create mode 100644 app/src/main/java/de/danoeh/antennapod/ui/screen/playback/TranscriptAdapter.java create mode 100644 app/src/main/java/de/danoeh/antennapod/ui/screen/playback/TranscriptDialogFragment.java create mode 100644 app/src/main/java/de/danoeh/antennapod/ui/transcript/TranscriptViewholder.java create mode 100644 app/src/main/res/layout/transcript_dialog.xml create mode 100644 app/src/main/res/layout/transcript_item.xml delete mode 100644 ui/chapters/src/main/java/de/danoeh/antennapod/ui/chapters/PodcastIndexTranscriptUtils.java create mode 100644 ui/common/src/main/res/drawable/transcript.xml create mode 100644 ui/transcript/build.gradle create mode 100644 ui/transcript/src/main/java/de/danoeh/antennapod/ui/transcript/TranscriptUtils.java diff --git a/app/build.gradle b/app/build.gradle index a91891861..bfcfae8f4 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -69,6 +69,7 @@ dependencies { implementation project(':net:ssl') implementation project(':net:sync:service') implementation project(':parser:feed') + implementation project(':parser:transcript') implementation project(':playback:base') implementation project(':playback:cast') implementation project(':storage:database') @@ -88,6 +89,7 @@ dependencies { implementation project(':net:sync:service-interface') implementation project(':playback:service') implementation project(':ui:chapters') + implementation project(':ui:transcript') annotationProcessor "androidx.annotation:annotation:$annotationVersion" implementation "androidx.appcompat:appcompat:$appcompatVersion" diff --git a/app/src/main/java/de/danoeh/antennapod/ui/episodeslist/FeedItemMenuHandler.java b/app/src/main/java/de/danoeh/antennapod/ui/episodeslist/FeedItemMenuHandler.java index 720e0ccbc..b03a23d10 100644 --- a/app/src/main/java/de/danoeh/antennapod/ui/episodeslist/FeedItemMenuHandler.java +++ b/app/src/main/java/de/danoeh/antennapod/ui/episodeslist/FeedItemMenuHandler.java @@ -61,6 +61,7 @@ public class FeedItemMenuHandler { final boolean fileDownloaded = hasMedia && selectedItem.getMedia().fileExists(); final boolean isLocalFile = hasMedia && selectedItem.getFeed().isLocalFeed(); final boolean isFavorite = selectedItem.isTagged(FeedItem.TAG_FAVORITE); + final boolean hasTranscript = selectedItem.hasTranscript(); setItemVisibility(menu, R.id.skip_episode_item, isPlaying); setItemVisibility(menu, R.id.remove_from_queue_item, isInQueue); @@ -85,6 +86,7 @@ public class FeedItemMenuHandler { setItemVisibility(menu, R.id.add_to_favorites_item, !isFavorite); setItemVisibility(menu, R.id.remove_from_favorites_item, isFavorite); setItemVisibility(menu, R.id.remove_item, fileDownloaded || isLocalFile); + setItemVisibility(menu, R.id.transcript_item, hasTranscript); if (selectedItem.getFeed().getState() != Feed.STATE_SUBSCRIBED) { setItemVisibility(menu, R.id.mark_read_item, false); diff --git a/app/src/main/java/de/danoeh/antennapod/ui/screen/playback/TranscriptAdapter.java b/app/src/main/java/de/danoeh/antennapod/ui/screen/playback/TranscriptAdapter.java new file mode 100644 index 000000000..ffb3b7bda --- /dev/null +++ b/app/src/main/java/de/danoeh/antennapod/ui/screen/playback/TranscriptAdapter.java @@ -0,0 +1,145 @@ +package de.danoeh.antennapod.ui.screen.playback; + +import android.content.Context; +import android.view.LayoutInflater; +import android.view.View; +import android.view.ViewGroup; +import androidx.annotation.NonNull; +import androidx.core.content.ContextCompat; +import androidx.recyclerview.widget.RecyclerView; +import com.google.android.material.elevation.SurfaceColors; +import de.danoeh.antennapod.databinding.TranscriptItemBinding; +import de.danoeh.antennapod.event.playback.PlaybackPositionEvent; +import de.danoeh.antennapod.model.feed.FeedMedia; +import de.danoeh.antennapod.model.feed.TranscriptSegment; +import de.danoeh.antennapod.model.playback.Playable; +import de.danoeh.antennapod.ui.common.Converter; +import de.danoeh.antennapod.ui.transcript.TranscriptViewholder; +import java.util.Set; +import org.greenrobot.eventbus.EventBus; +import org.greenrobot.eventbus.Subscribe; +import org.greenrobot.eventbus.ThreadMode; +import org.jsoup.internal.StringUtil; + +public class TranscriptAdapter extends RecyclerView.Adapter { + + public String tag = "TranscriptAdapter"; + private final SegmentClickListener segmentClickListener; + private final Context context; + private FeedMedia media; + private int prevHighlightPosition = -1; + private int highlightPosition = -1; + + public TranscriptAdapter(Context context, SegmentClickListener segmentClickListener) { + this.context = context; + this.segmentClickListener = segmentClickListener; + } + + @NonNull + @Override + public TranscriptViewholder onCreateViewHolder(@NonNull ViewGroup viewGroup, int viewType) { + return new TranscriptViewholder(TranscriptItemBinding.inflate(LayoutInflater.from(context), viewGroup, false)); + } + + public void setMedia(Playable media) { + if (!(media instanceof FeedMedia)) { + return; + } + this.media = (FeedMedia) media; + notifyDataSetChanged(); + } + + @Override + public void onBindViewHolder(@NonNull TranscriptViewholder holder, int position) { + if (media == null || media.getTranscript() == null) { + return; + } + + TranscriptSegment seg = media.getTranscript().getSegmentAt(position); + holder.viewContent.setOnClickListener(v -> { + if (segmentClickListener != null) { + segmentClickListener.onTranscriptClicked(position, seg); + } + }); + + String timecode = Converter.getDurationStringLong((int) seg.getStartTime()); + if (!StringUtil.isBlank(seg.getSpeaker())) { + if (position > 0 && media.getTranscript() + .getSegmentAt(position - 1).getSpeaker().equals(seg.getSpeaker())) { + holder.viewTimecode.setVisibility(View.GONE); + holder.viewContent.setText(seg.getWords()); + } else { + holder.viewTimecode.setVisibility(View.VISIBLE); + holder.viewTimecode.setText(timecode + " • " + seg.getSpeaker()); + holder.viewContent.setText(seg.getWords()); + } + } else { + Set speakers = media.getTranscript().getSpeakers(); + if (speakers.isEmpty() && (position % 5 == 0)) { + holder.viewTimecode.setVisibility(View.VISIBLE); + holder.viewTimecode.setText(timecode); + } + holder.viewContent.setText(seg.getWords()); + } + + if (position == highlightPosition) { + float density = context.getResources().getDisplayMetrics().density; + holder.viewContent.setBackgroundColor(SurfaceColors.getColorForElevation(context, 32 * density)); + holder.viewContent.setAlpha(1.0f); + holder.viewTimecode.setAlpha(1.0f); + holder.viewContent.setAlpha(1.0f); + } else { + holder.viewContent.setBackgroundColor(ContextCompat.getColor(context, android.R.color.transparent)); + holder.viewContent.setAlpha(0.5f); + holder.viewTimecode.setAlpha(0.5f); + } + } + + @Subscribe(threadMode = ThreadMode.MAIN) + public void onEventMainThread(PlaybackPositionEvent event) { + if (media == null || media.getTranscript() == null) { + return; + } + int index = media.getTranscript().findSegmentIndexBefore(event.getPosition()); + if (index < 0 || index > media.getTranscript().getSegmentCount()) { + return; + } + if (prevHighlightPosition != highlightPosition) { + prevHighlightPosition = highlightPosition; + } + if (index != highlightPosition) { + highlightPosition = index; + notifyItemChanged(prevHighlightPosition); + notifyItemChanged(highlightPosition); + } + } + + @Override + public int getItemCount() { + if (media == null) { + return 0; + } + + if (media.getTranscript() == null) { + return 0; + } + return media.getTranscript().getSegmentCount(); + } + + @Override + public void onAttachedToRecyclerView(@NonNull RecyclerView recyclerView) { + super.onAttachedToRecyclerView(recyclerView); + EventBus.getDefault().register(this); + } + + @Override + public void onDetachedFromRecyclerView(@NonNull RecyclerView recyclerView) { + super.onDetachedFromRecyclerView(recyclerView); + EventBus.getDefault().unregister(this); + } + + + public interface SegmentClickListener { + void onTranscriptClicked(int position, TranscriptSegment seg); + } +} \ No newline at end of file diff --git a/app/src/main/java/de/danoeh/antennapod/ui/screen/playback/TranscriptDialogFragment.java b/app/src/main/java/de/danoeh/antennapod/ui/screen/playback/TranscriptDialogFragment.java new file mode 100644 index 000000000..c0c492d10 --- /dev/null +++ b/app/src/main/java/de/danoeh/antennapod/ui/screen/playback/TranscriptDialogFragment.java @@ -0,0 +1,220 @@ +package de.danoeh.antennapod.ui.screen.playback; + +import android.app.Dialog; +import android.content.DialogInterface; +import android.os.Bundle; +import android.text.TextUtils; +import android.util.DisplayMetrics; +import android.util.Log; +import android.view.View; +import android.view.ViewGroup; +import android.view.WindowManager; +import android.widget.Toast; +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; +import androidx.appcompat.app.AlertDialog; +import androidx.fragment.app.DialogFragment; +import androidx.recyclerview.widget.LinearLayoutManager; +import androidx.recyclerview.widget.LinearSmoothScroller; +import androidx.recyclerview.widget.RecyclerView; +import com.google.android.material.dialog.MaterialAlertDialogBuilder; +import de.danoeh.antennapod.R; +import de.danoeh.antennapod.databinding.TranscriptDialogBinding; +import de.danoeh.antennapod.event.playback.PlaybackPositionEvent; +import de.danoeh.antennapod.model.feed.FeedMedia; +import de.danoeh.antennapod.model.feed.Transcript; +import de.danoeh.antennapod.model.feed.TranscriptSegment; +import de.danoeh.antennapod.model.playback.Playable; +import de.danoeh.antennapod.playback.service.PlaybackController; +import de.danoeh.antennapod.ui.transcript.TranscriptUtils; +import io.reactivex.Maybe; +import io.reactivex.android.schedulers.AndroidSchedulers; +import io.reactivex.disposables.Disposable; +import io.reactivex.schedulers.Schedulers; +import org.greenrobot.eventbus.EventBus; +import org.greenrobot.eventbus.Subscribe; +import org.greenrobot.eventbus.ThreadMode; + +public class TranscriptDialogFragment extends DialogFragment { + public static final String TAG = "TranscriptFragment"; + private TranscriptDialogBinding viewBinding; + private PlaybackController controller; + private Disposable disposable; + private Playable media; + private Transcript transcript; + private TranscriptAdapter adapter = null; + private boolean doInitialScroll = true; + private LinearLayoutManager layoutManager; + + @Override + public void onResume() { + ViewGroup.LayoutParams params; + params = getDialog().getWindow().getAttributes(); + params.width = WindowManager.LayoutParams.MATCH_PARENT; + getDialog().getWindow().setAttributes((WindowManager.LayoutParams) params); + super.onResume(); + } + + @NonNull + @Override + public Dialog onCreateDialog(@Nullable Bundle savedInstanceState) { + viewBinding = TranscriptDialogBinding.inflate(getLayoutInflater()); + layoutManager = new LinearLayoutManager(getContext()); + viewBinding.transcriptList.setLayoutManager(layoutManager); + + adapter = new TranscriptAdapter(getContext(), this::transcriptClicked); + viewBinding.transcriptList.setAdapter(adapter); + viewBinding.transcriptList.addOnScrollListener(new RecyclerView.OnScrollListener() { + @Override + public void onScrollStateChanged(@NonNull RecyclerView recyclerView, int newState) { + super.onScrollStateChanged(recyclerView, newState); + if (newState == RecyclerView.SCROLL_STATE_DRAGGING) { + viewBinding.followAudioCheckbox.setChecked(false); + } + } + }); + + AlertDialog dialog = new MaterialAlertDialogBuilder(requireContext()) + .setView(viewBinding.getRoot()) + .setPositiveButton(getString(R.string.close_label), null) + .setNegativeButton(getString(R.string.refresh_label), null) + .setTitle(R.string.transcript) + .create(); + viewBinding.followAudioCheckbox.setChecked(true); + dialog.setOnShowListener(dialog1 -> { + dialog.getButton(DialogInterface.BUTTON_NEGATIVE).setOnClickListener(v -> { + viewBinding.progLoading.setVisibility(View.VISIBLE); + v.setClickable(false); + v.setEnabled(false); + loadMediaInfo(true); + }); + }); + viewBinding.progLoading.setVisibility(View.VISIBLE); + doInitialScroll = true; + + + return dialog; + } + + private void transcriptClicked(int pos, TranscriptSegment segment) { + long startTime = segment.getStartTime(); + long endTime = segment.getEndTime(); + + scrollToPosition(pos); + if (!(controller.getPosition() >= startTime && controller.getPosition() <= endTime)) { + controller.seekTo((int) startTime); + } else { + controller.playPause(); + } + adapter.notifyItemChanged(pos); + viewBinding.followAudioCheckbox.setChecked(true); + } + + @Override + public void onStart() { + super.onStart(); + controller = new PlaybackController(getActivity()) { + @Override + public void loadMediaInfo() { + TranscriptDialogFragment.this.loadMediaInfo(false); + } + }; + controller.init(); + EventBus.getDefault().register(this); + loadMediaInfo(false); + } + + @Subscribe(threadMode = ThreadMode.MAIN) + public void onEventMainThread(PlaybackPositionEvent event) { + if (transcript == null) { + return; + } + int pos = transcript.findSegmentIndexBefore(event.getPosition()); + scrollToPosition(pos); + } + + private void loadMediaInfo(boolean forceRefresh) { + if (disposable != null) { + disposable.dispose(); + } + disposable = Maybe.create(emitter -> { + Playable media = controller.getMedia(); + if (media instanceof FeedMedia) { + this.media = media; + + transcript = TranscriptUtils.loadTranscript((FeedMedia) this.media, forceRefresh); + doInitialScroll = true; + ((FeedMedia) this.media).setTranscript(transcript); + emitter.onSuccess(this.media); + } else { + emitter.onComplete(); + } + }) + .subscribeOn(Schedulers.io()) + .observeOn(AndroidSchedulers.mainThread()) + .subscribe(media -> onMediaChanged((Playable) media), + error -> Log.e(TAG, Log.getStackTraceString(error))); + } + + private void onMediaChanged(Playable media) { + if (!(media instanceof FeedMedia)) { + return; + } + this.media = media; + + if (!((FeedMedia) media).hasTranscript()) { + dismiss(); + Toast.makeText(getContext(), R.string.no_transcript_label, Toast.LENGTH_LONG).show(); + return; + } + + viewBinding.progLoading.setVisibility(View.GONE); + adapter.setMedia(media); + ((AlertDialog) getDialog()).getButton(DialogInterface.BUTTON_NEGATIVE).setVisibility(View.INVISIBLE); + if (!TextUtils.isEmpty(((FeedMedia) media).getItem().getTranscriptUrl())) { + ((AlertDialog) getDialog()).getButton(DialogInterface.BUTTON_NEGATIVE).setVisibility(View.VISIBLE); + ((AlertDialog) getDialog()).getButton(DialogInterface.BUTTON_NEGATIVE).setEnabled(true); + ((AlertDialog) getDialog()).getButton(DialogInterface.BUTTON_NEGATIVE).setClickable(true); + } + } + + public void scrollToPosition(int pos) { + if (pos <= 0) { + return; + } + if (!viewBinding.followAudioCheckbox.isChecked() && !doInitialScroll) { + return; + } + doInitialScroll = false; + + boolean quickScroll = Math.abs(layoutManager.findFirstVisibleItemPosition() - pos) > 5; + if (quickScroll) { + viewBinding.transcriptList.scrollToPosition(pos - 1); + // Additionally, smooth scroll, so that currently active segment is on top of screen + } + LinearSmoothScroller smoothScroller = new LinearSmoothScroller(getContext()) { + @Override + protected int getVerticalSnapPreference() { + return LinearSmoothScroller.SNAP_TO_START; + } + + protected float calculateSpeedPerPixel(DisplayMetrics displayMetrics) { + return (quickScroll ? 200 : 1000) / (float) displayMetrics.densityDpi; + } + }; + smoothScroller.setTargetPosition(pos - 1); + layoutManager.startSmoothScroll(smoothScroller); + } + + @Override + public void onStop() { + super.onStop(); + if (disposable != null) { + disposable.dispose(); + } + controller.release(); + controller = null; + EventBus.getDefault().unregister(this); + } + +} \ No newline at end of file diff --git a/app/src/main/java/de/danoeh/antennapod/ui/screen/playback/audio/AudioPlayerFragment.java b/app/src/main/java/de/danoeh/antennapod/ui/screen/playback/audio/AudioPlayerFragment.java index 7aa9da503..634f1ecfe 100644 --- a/app/src/main/java/de/danoeh/antennapod/ui/screen/playback/audio/AudioPlayerFragment.java +++ b/app/src/main/java/de/danoeh/antennapod/ui/screen/playback/audio/AudioPlayerFragment.java @@ -35,6 +35,7 @@ import de.danoeh.antennapod.ui.episodes.TimeSpeedConverter; import de.danoeh.antennapod.ui.screen.playback.MediaPlayerErrorDialog; import de.danoeh.antennapod.ui.screen.playback.PlayButton; import de.danoeh.antennapod.ui.screen.playback.SleepTimerDialog; +import de.danoeh.antennapod.ui.screen.playback.TranscriptDialogFragment; import de.danoeh.antennapod.ui.screen.playback.VariableSpeedDialog; import org.greenrobot.eventbus.EventBus; import org.greenrobot.eventbus.Subscribe; @@ -163,7 +164,6 @@ public class AudioPlayerFragment extends Fragment implements } private void setChapterDividers(Playable media) { - if (media == null) { return; } @@ -497,6 +497,10 @@ public class AudioPlayerFragment extends Fragment implements if (itemId == R.id.disable_sleeptimer_item || itemId == R.id.set_sleeptimer_item) { new SleepTimerDialog().show(getChildFragmentManager(), "SleepTimerDialog"); return true; + } else if (itemId == R.id.transcript_item) { + new TranscriptDialogFragment().show( + getActivity().getSupportFragmentManager(), TranscriptDialogFragment.TAG); + return true; } else if (itemId == R.id.open_feed_item) { if (feedItem != null) { openFeed(feedItem.getFeed()); diff --git a/app/src/main/java/de/danoeh/antennapod/ui/transcript/TranscriptViewholder.java b/app/src/main/java/de/danoeh/antennapod/ui/transcript/TranscriptViewholder.java new file mode 100644 index 000000000..4e5e5f865 --- /dev/null +++ b/app/src/main/java/de/danoeh/antennapod/ui/transcript/TranscriptViewholder.java @@ -0,0 +1,23 @@ +package de.danoeh.antennapod.ui.transcript; + +import android.widget.TextView; + +import androidx.recyclerview.widget.RecyclerView; + +import de.danoeh.antennapod.databinding.TranscriptItemBinding; + +public class TranscriptViewholder extends RecyclerView.ViewHolder { + public final TextView viewTimecode; + public final TextView viewContent; + + public TranscriptViewholder(TranscriptItemBinding binding) { + super(binding.getRoot()); + viewTimecode = binding.speaker; + viewContent = binding.content; + } + + @Override + public String toString() { + return super.toString() + " '" + viewContent.getText() + "'"; + } +} \ No newline at end of file diff --git a/app/src/main/res/layout/transcript_dialog.xml b/app/src/main/res/layout/transcript_dialog.xml new file mode 100644 index 000000000..22ac9aa4f --- /dev/null +++ b/app/src/main/res/layout/transcript_dialog.xml @@ -0,0 +1,35 @@ + + + + + + + + + + diff --git a/app/src/main/res/layout/transcript_item.xml b/app/src/main/res/layout/transcript_item.xml new file mode 100644 index 000000000..548ae7574 --- /dev/null +++ b/app/src/main/res/layout/transcript_item.xml @@ -0,0 +1,33 @@ + + + + + + + + diff --git a/app/src/main/res/menu/mediaplayer.xml b/app/src/main/res/menu/mediaplayer.xml index a99151ac8..85eef565b 100644 --- a/app/src/main/res/menu/mediaplayer.xml +++ b/app/src/main/res/menu/mediaplayer.xml @@ -2,6 +2,12 @@ + + segmentsMap = new TreeMap<>(); + private Set speakers; + private final ArrayList segments = new ArrayList<>(); public void addSegment(TranscriptSegment segment) { - segmentsMap.put(segment.getStartTime(), segment); + if ((!segments.isEmpty() && segments.get(segments.size() - 1).getStartTime() >= segment.getStartTime())) { + throw new IllegalArgumentException("Segments must be added in sorted order"); + } + segments.add(segment); + } + + public int findSegmentIndexBefore(long time) { + int a = 0; + int b = segments.size() - 1; + while (a < b) { + int pivot = (a + b + 1) / 2; + if (segments.get(pivot).getStartTime() > time) { + b = pivot - 1; + } else { + a = pivot; + } + } + return a; + } + + public TranscriptSegment getSegmentAt(int index) { + return segments.get(index); } public TranscriptSegment getSegmentAtTime(long time) { - if (segmentsMap.floorEntry(time) == null) { - return null; - } - return segmentsMap.floorEntry(time).getValue(); + return getSegmentAt(findSegmentIndexBefore(time)); + } + + public Set getSpeakers() { + return speakers; + } + + public void setSpeakers(Set speakers) { + this.speakers = speakers; } public int getSegmentCount() { - return segmentsMap.size(); - } - - public Map.Entry getEntryAfterTime(long time) { - return segmentsMap.ceilingEntry(time); + return segments.size(); } } diff --git a/net/download/service/build.gradle b/net/download/service/build.gradle index cebffc75c..4aee1acd3 100644 --- a/net/download/service/build.gradle +++ b/net/download/service/build.gradle @@ -22,6 +22,7 @@ dependencies { implementation project(':storage:preferences') implementation project(':ui:app-start-intent') implementation project(':ui:chapters') + implementation project(':ui:transcript') annotationProcessor "androidx.annotation:annotation:$annotationVersion" implementation "androidx.core:core:$coreVersion" diff --git a/net/download/service/src/main/java/de/danoeh/antennapod/net/download/service/episode/MediaDownloadedHandler.java b/net/download/service/src/main/java/de/danoeh/antennapod/net/download/service/episode/MediaDownloadedHandler.java index b856d1b67..a10a35037 100644 --- a/net/download/service/src/main/java/de/danoeh/antennapod/net/download/service/episode/MediaDownloadedHandler.java +++ b/net/download/service/src/main/java/de/danoeh/antennapod/net/download/service/episode/MediaDownloadedHandler.java @@ -10,7 +10,6 @@ import de.danoeh.antennapod.model.MediaMetadataRetrieverCompat; import de.danoeh.antennapod.model.feed.Feed; import de.danoeh.antennapod.net.sync.serviceinterface.SynchronizationQueueSink; import de.danoeh.antennapod.ui.chapters.ChapterUtils; -import de.danoeh.antennapod.ui.chapters.PodcastIndexTranscriptUtils; import org.apache.commons.lang3.StringUtils; import org.greenrobot.eventbus.EventBus; @@ -27,6 +26,7 @@ import de.danoeh.antennapod.model.download.DownloadError; import de.danoeh.antennapod.model.feed.FeedItem; import de.danoeh.antennapod.model.feed.FeedMedia; import de.danoeh.antennapod.net.sync.serviceinterface.EpisodeAction; +import de.danoeh.antennapod.ui.transcript.TranscriptUtils; /** * Handles a completed media download. @@ -67,12 +67,11 @@ public class MediaDownloadedHandler implements Runnable { ChapterUtils.loadChaptersFromUrl(media.getItem().getPodcastIndexChapterUrl(), false); } FeedItem item = media.getItem(); - if (item != null && item.getPodcastIndexTranscriptUrl() != null) { - String transcript = PodcastIndexTranscriptUtils.loadTranscriptFromUrl( - item.getPodcastIndexTranscriptType(), item.getPodcastIndexTranscriptUrl(), false); + if (item != null && item.getTranscriptUrl() != null) { + String transcript = TranscriptUtils.loadTranscriptFromUrl(item.getTranscriptUrl(), true); if (!StringUtils.isEmpty(transcript)) { item.setPodcastIndexTranscriptText(transcript); - PodcastIndexTranscriptUtils.storeTranscript(media, transcript); + TranscriptUtils.storeTranscript(media, transcript); } } } catch (InterruptedIOException ignore) { diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java index dd11fb4e7..8b49831fe 100644 --- a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java @@ -34,7 +34,7 @@ public class PodcastIndex extends Namespace { String href = attributes.getValue(URL); String type = attributes.getValue(TYPE); if (!TextUtils.isEmpty(href) && !TextUtils.isEmpty(type)) { - state.getCurrentItem().setPodcastIndexTranscriptUrl(type, href); + state.getCurrentItem().setTranscriptUrl(type, href); } } return new SyndElement(localName, this); diff --git a/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java index 983766f12..027e843a8 100644 --- a/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java +++ b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java @@ -101,8 +101,8 @@ public class RssParserTest { public void testPodcastIndexTranscript() throws Exception { File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testPodcastIndexTranscript.xml"); Feed feed = FeedParserTestHelper.runFeedParser(feedFile); - assertEquals("https://podnews.net/audio/podnews231011.mp3.json", feed.getItems().get(0).getPodcastIndexTranscriptUrl()); - assertEquals("application/json", feed.getItems().get(0).getPodcastIndexTranscriptType()); + assertEquals("https://podnews.net/audio/podnews231011.mp3.json", feed.getItems().get(0).getTranscriptUrl()); + assertEquals("application/json", feed.getItems().get(0).getTranscriptType()); } @Test diff --git a/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParser.java b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParser.java index 78f3bf9c8..38c24f09b 100644 --- a/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParser.java +++ b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParser.java @@ -2,9 +2,13 @@ package de.danoeh.antennapod.parser.transcript; import org.apache.commons.lang3.StringUtils; import org.json.JSONArray; +import org.json.JSONException; import org.json.JSONObject; import org.jsoup.internal.StringUtil; +import java.util.HashSet; +import java.util.Set; + import de.danoeh.antennapod.model.feed.Transcript; import de.danoeh.antennapod.model.feed.TranscriptSegment; @@ -15,14 +19,25 @@ public class JsonTranscriptParser { long startTime = -1L; long endTime = -1L; long segmentStartTime = -1L; + long segmentEndTime = -1L; long duration = 0L; String speaker = ""; + String prevSpeaker = ""; String segmentBody = ""; - JSONObject obj = new JSONObject(jsonStr); - JSONArray objSegments = obj.getJSONArray("segments"); + JSONArray objSegments; + Set speakers = new HashSet<>(); + + try { + JSONObject obj = new JSONObject(jsonStr); + objSegments = obj.getJSONArray("segments"); + } catch (JSONException e) { + e.printStackTrace(); + return null; + } for (int i = 0; i < objSegments.length(); i++) { JSONObject jsonObject = objSegments.getJSONObject(i); + segmentEndTime = endTime; startTime = Double.valueOf(jsonObject.optDouble("startTime", -1) * 1000L).longValue(); endTime = Double.valueOf(jsonObject.optDouble("endTime", -1) * 1000L).longValue(); if (startTime < 0 || endTime < 0) { @@ -33,11 +48,40 @@ public class JsonTranscriptParser { } duration += endTime - startTime; + prevSpeaker = speaker; speaker = jsonObject.optString("speaker"); + speakers.add(speaker); + if (StringUtils.isEmpty(speaker) && StringUtils.isNotEmpty(prevSpeaker)) { + speaker = prevSpeaker; + } String body = jsonObject.optString("body"); - segmentBody += body + " "; + if (!prevSpeaker.equals(speaker)) { + if (StringUtils.isNotEmpty(segmentBody)) { + segmentBody = StringUtils.trim(segmentBody); + transcript.addSegment(new TranscriptSegment(segmentStartTime, + segmentEndTime, + segmentBody, + prevSpeaker)); + segmentStartTime = startTime; + segmentBody = body.toString(); + duration = 0L; + continue; + } + } + + segmentBody += " " + body; if (duration >= TranscriptParser.MIN_SPAN) { + // Look ahead and make sure the next segment does not start with an alphanumeric character + if ((i + 1) < objSegments.length()) { + String nextSegmentFirstChar = objSegments.getJSONObject(i + 1) + .optString("body") + .substring(0, 1); + if (!StringUtils.isAlphanumeric(nextSegmentFirstChar) + && (duration < TranscriptParser.MAX_SPAN)) { + continue; + } + } segmentBody = StringUtils.trim(segmentBody); transcript.addSegment(new TranscriptSegment(segmentStartTime, endTime, segmentBody, speaker)); duration = 0L; @@ -52,12 +96,13 @@ public class JsonTranscriptParser { } if (transcript.getSegmentCount() > 0) { + transcript.setSpeakers(speakers); return transcript; } else { return null; } - } catch (org.json.JSONException e) { + } catch (JSONException e) { e.printStackTrace(); } return null; diff --git a/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParser.java b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParser.java index 098dadd99..5d80a7265 100644 --- a/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParser.java +++ b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParser.java @@ -4,8 +4,10 @@ import org.apache.commons.lang3.StringUtils; import org.jsoup.internal.StringUtil; import java.util.Arrays; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -25,21 +27,26 @@ public class SrtTranscriptParser { List lines = Arrays.asList(str.split("\n")); Iterator iter = lines.iterator(); String speaker = ""; - StringBuilder body = new StringBuilder(); + String prevSpeaker = ""; + StringBuilder body; String line; String segmentBody = ""; long startTimecode = -1L; long spanStartTimecode = -1L; + long spanEndTimecode = -1L; long endTimecode = -1L; long duration = 0L; + Set speakers = new HashSet<>(); while (iter.hasNext()) { + body = new StringBuilder(); line = iter.next(); if (line.isEmpty()) { continue; } + spanEndTimecode = endTimecode; if (line.contains("-->")) { String[] timecodes = line.split("-->"); if (timecodes.length < 2) { @@ -65,45 +72,53 @@ public class SrtTranscriptParser { } while (iter.hasNext()); } - if (body.indexOf(":") != -1) { - String [] parts = body.toString().trim().split(":"); + if (body.indexOf(": ") != -1) { + String[] parts = body.toString().trim().split(":"); if (parts.length < 2) { continue; } + prevSpeaker = speaker; speaker = parts[0]; + speakers.add(speaker); body = new StringBuilder(parts[1].strip()); - } - if (!StringUtil.isBlank(body.toString())) { - segmentBody += " " + body; - segmentBody = StringUtils.trim(segmentBody); - if (duration >= TranscriptParser.MIN_SPAN && endTimecode > spanStartTimecode) { - transcript.addSegment(new TranscriptSegment(spanStartTimecode, - endTimecode, - segmentBody, - speaker)); - duration = 0L; - spanStartTimecode = -1L; - segmentBody = ""; + if (StringUtils.isNotEmpty(prevSpeaker) && !StringUtils.equals(speaker, prevSpeaker)) { + if (StringUtils.isNotEmpty(segmentBody)) { + transcript.addSegment(new TranscriptSegment(spanStartTimecode, + spanEndTimecode, segmentBody, prevSpeaker)); + duration = 0L; + spanStartTimecode = startTimecode; + segmentBody = body.toString(); + continue; + } } - body = new StringBuilder(); + } else { + if (StringUtils.isNotEmpty(prevSpeaker) && StringUtils.isEmpty(speaker)) { + speaker = prevSpeaker; + } + } + + segmentBody += " " + body; + segmentBody = StringUtils.trim(segmentBody); + if (duration >= TranscriptParser.MIN_SPAN && endTimecode > spanStartTimecode) { + transcript.addSegment(new TranscriptSegment(spanStartTimecode, endTimecode, segmentBody, speaker)); + duration = 0L; + spanStartTimecode = -1L; + segmentBody = ""; } } if (!StringUtil.isBlank(segmentBody) && endTimecode > spanStartTimecode) { segmentBody = StringUtils.trim(segmentBody); - transcript.addSegment(new TranscriptSegment(spanStartTimecode, - endTimecode, - segmentBody, - speaker)); + transcript.addSegment(new TranscriptSegment(spanStartTimecode, endTimecode, segmentBody, speaker)); } if (transcript.getSegmentCount() > 0) { + transcript.setSpeakers(speakers); return transcript; } else { return null; } } - // Time format 00:00:00,000 static long parseTimecode(String timecode) { Matcher matcher = TIMECODE_PATTERN.matcher(timecode); if (!matcher.matches()) { diff --git a/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/TranscriptParser.java b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/TranscriptParser.java index 0a4025d96..4049b2c8f 100644 --- a/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/TranscriptParser.java +++ b/parser/transcript/src/main/java/de/danoeh/antennapod/parser/transcript/TranscriptParser.java @@ -5,7 +5,8 @@ import org.apache.commons.lang3.StringUtils; import de.danoeh.antennapod.model.feed.Transcript; public class TranscriptParser { - static final long MIN_SPAN = 1000L; // merge short segments together to form a span of 1 second + static final long MIN_SPAN = 5000L; // Merge short segments together to form a span of 5 seconds + static final long MAX_SPAN = 8000L; // Don't go beyond 10 seconds when merging public static Transcript parse(String str, String type) { if (str == null || StringUtils.isBlank(str)) { diff --git a/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParserTest.java b/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParserTest.java index 48996f492..b795d88d7 100644 --- a/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParserTest.java +++ b/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/JsonTranscriptParserTest.java @@ -14,20 +14,20 @@ public class JsonTranscriptParserTest { + "'segments': [ " + "{ 'speaker' : 'John Doe', 'startTime': 0.8, 'endTime': 1.9, 'body': 'And' }," + "{ 'speaker' : 'Sally Green', 'startTime': 1.91, 'endTime': 2.8, 'body': 'this merges' }," - + "{ 'startTime': 2.9, 'endTime': 3.4, 'body': 'the' }," - + "{ 'startTime': 3.5, 'endTime': 3.6, 'body': 'person' }]}"; + + "{ 'startTime': 2.9, 'endTime': 3.4, 'body': ' the' }," + + "{ 'startTime': 3.5, 'endTime': 3.6, 'body': ' person' }]}"; @Test public void testParseJson() { Transcript result = JsonTranscriptParser.parse(jsonStr); - - assertEquals(result.getSegmentAtTime(0L), null); + // TODO: for gaps in the transcript (ads, music) should we return null? + assertEquals(result.getSegmentAtTime(0L).getStartTime(), 800L); assertEquals(result.getSegmentAtTime(800L).getSpeaker(), "John Doe"); assertEquals(result.getSegmentAtTime(800L).getStartTime(), 800L); assertEquals(result.getSegmentAtTime(800L).getEndTime(), 1900L); - assertEquals(1910L, (long) result.getEntryAfterTime(1800L).getKey()); - // 2 segments get merged into at least 1 second - assertEquals("this merges the", result.getEntryAfterTime(1800L).getValue().getWords()); + assertEquals(result.getSegmentAtTime(1800L).getStartTime(), 800L); + // 2 segments get merged into at least 5 second + assertEquals(result.getSegmentAtTime(1800L).getWords(), "And"); } @Test diff --git a/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParserTest.java b/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParserTest.java index f7854c5bf..e72ea8ebc 100644 --- a/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParserTest.java +++ b/parser/transcript/src/test/java/de/danoeh/antennapod/parser/transcript/SrtTranscriptParserTest.java @@ -10,13 +10,13 @@ import de.danoeh.antennapod.model.feed.Transcript; @RunWith(RobolectricTestRunner.class) public class SrtTranscriptParserTest { private static String srtStr = "1\n" - + "00:00:00,000 --> 00:00:02,730\n" + + "00:00:00,000 --> 00:00:50,730\n" + "John Doe: Promoting your podcast in a new\n\n" + "2\n" - + "00:00:02,730 --> 00:00:04,600\n" + + "00:00:90,740 --> 00:00:91,600\n" + "way. The latest from PogNews.\n\n" - + "00:00:04,730 --> 00:00:05,600\n" - + "way. The latest from PogNews."; + + "00:00:91,730 --> 00:00:93,600\n" + + "We bring your favorite podcast."; @Test public void testParseSrt() { @@ -25,9 +25,10 @@ public class SrtTranscriptParserTest { assertEquals(result.getSegmentAtTime(0L).getWords(), "Promoting your podcast in a new"); assertEquals(result.getSegmentAtTime(0L).getSpeaker(), "John Doe"); assertEquals(result.getSegmentAtTime(0L).getStartTime(), 0L); - assertEquals(result.getSegmentAtTime(0L).getEndTime(), 2730L); - assertEquals((long) result.getEntryAfterTime(1000L).getKey(), 2730L); - assertEquals(result.getEntryAfterTime(1000L).getValue().getWords(), "way. The latest from PogNews."); + assertEquals(result.getSegmentAtTime(0L).getEndTime(), 50730L); + assertEquals(result.getSegmentAtTime(90740).getStartTime(), 90740); + assertEquals("way. The latest from PogNews. We bring your favorite podcast.", + result.getSegmentAtTime(90740).getWords()); } @Test diff --git a/settings.gradle b/settings.gradle index 3b3df7ba8..9f1d16fa4 100644 --- a/settings.gradle +++ b/settings.gradle @@ -52,3 +52,4 @@ include ':ui:notifications' include ':ui:preferences' include ':ui:statistics' include ':ui:widget' +include ':ui:transcript' diff --git a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/PodDBAdapter.java b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/PodDBAdapter.java index c7204b567..0b8d7da43 100644 --- a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/PodDBAdapter.java +++ b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/PodDBAdapter.java @@ -682,8 +682,8 @@ public class PodDBAdapter { values.put(KEY_PODCASTINDEX_CHAPTER_URL, item.getPodcastIndexChapterUrl()); // We only store one transcript url, we prefer JSON if it exists - String type = item.getPodcastIndexTranscriptType(); - String url = item.getPodcastIndexTranscriptUrl(); + String type = item.getTranscriptType(); + String url = item.getTranscriptUrl(); if (url != null) { values.put(KEY_PODCASTINDEX_TRANSCRIPT_TYPE, type); values.put(KEY_PODCASTINDEX_TRANSCRIPT_URL, url); diff --git a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/mapper/FeedItemCursor.java b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/mapper/FeedItemCursor.java index 41e4d474a..e771133ff 100644 --- a/storage/database/src/main/java/de/danoeh/antennapod/storage/database/mapper/FeedItemCursor.java +++ b/storage/database/src/main/java/de/danoeh/antennapod/storage/database/mapper/FeedItemCursor.java @@ -26,8 +26,8 @@ public class FeedItemCursor extends CursorWrapper { private final int indexImageUrl; private final int indexPodcastIndexChapterUrl; private final int indexMediaId; - private final int indexPodcastIndexTranscriptUrl; private final int indexPodcastIndexTranscriptType; + private final int indexPodcastIndexTranscriptUrl; public FeedItemCursor(Cursor cursor) { super(new FeedMediaCursor(cursor)); @@ -45,8 +45,8 @@ public class FeedItemCursor extends CursorWrapper { indexImageUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_IMAGE_URL); indexPodcastIndexChapterUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_CHAPTER_URL); indexMediaId = cursor.getColumnIndexOrThrow(PodDBAdapter.SELECT_KEY_MEDIA_ID); - indexPodcastIndexTranscriptUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_URL); indexPodcastIndexTranscriptType = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_TYPE); + indexPodcastIndexTranscriptUrl = cursor.getColumnIndexOrThrow(PodDBAdapter.KEY_PODCASTINDEX_TRANSCRIPT_URL); } /** @@ -67,8 +67,8 @@ public class FeedItemCursor extends CursorWrapper { getString(indexItemIdentifier), getLong(indexAutoDownload) > 0, getString(indexPodcastIndexChapterUrl), - getString(indexPodcastIndexTranscriptUrl), - getString(indexPodcastIndexTranscriptType)); + getString(indexPodcastIndexTranscriptType), + getString(indexPodcastIndexTranscriptUrl)); if (!isNull(indexMediaId)) { item.setMedia(feedMediaCursor.getFeedMedia()); } diff --git a/ui/chapters/build.gradle b/ui/chapters/build.gradle index a3cb1b677..a38802780 100644 --- a/ui/chapters/build.gradle +++ b/ui/chapters/build.gradle @@ -13,9 +13,11 @@ dependencies { implementation project(':net:common') implementation project(':parser:media') implementation project(':parser:feed') + implementation project(':parser:transcript') implementation project(':storage:database') annotationProcessor "androidx.annotation:annotation:$annotationVersion" implementation "commons-io:commons-io:$commonsioVersion" + implementation "org.apache.commons:commons-lang3:$commonslangVersion" implementation "com.squareup.okhttp3:okhttp:$okhttpVersion" } diff --git a/ui/chapters/src/main/java/de/danoeh/antennapod/ui/chapters/PodcastIndexTranscriptUtils.java b/ui/chapters/src/main/java/de/danoeh/antennapod/ui/chapters/PodcastIndexTranscriptUtils.java deleted file mode 100644 index 9add3db23..000000000 --- a/ui/chapters/src/main/java/de/danoeh/antennapod/ui/chapters/PodcastIndexTranscriptUtils.java +++ /dev/null @@ -1,55 +0,0 @@ -package de.danoeh.antennapod.ui.chapters; - -import android.util.Log; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.nio.charset.Charset; - -import de.danoeh.antennapod.model.feed.FeedMedia; -import de.danoeh.antennapod.net.common.AntennapodHttpClient; -import okhttp3.Request; -import okhttp3.Response; -import org.apache.commons.io.IOUtils; - -public class PodcastIndexTranscriptUtils { - - private static final String TAG = "PodcastIndexTranscript"; - - public static String loadTranscriptFromUrl(String type, String url, boolean forceRefresh) { - StringBuilder str = new StringBuilder(); - Response response = null; - try { - Log.d(TAG, "Downloading transcript URL " + url.toString()); - Request request = new Request.Builder().url(url).build(); - response = AntennapodHttpClient.getHttpClient().newCall(request).execute(); - if (response.isSuccessful() && response.body() != null) { - str.append(response.body().string()); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - if (response != null) { - response.close(); - } - } - return str.toString(); - } - - public static void storeTranscript(FeedMedia media, String transcript) { - File transcriptFile = new File(media.getTranscriptFileUrl()); - FileOutputStream ostream = null; - try { - if (!transcriptFile.exists() && transcriptFile.createNewFile()) { - ostream = new FileOutputStream(transcriptFile); - ostream.write(transcript.getBytes(Charset.forName("UTF-8"))); - ostream.close(); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - IOUtils.closeQuietly(ostream); - } - } -} diff --git a/ui/common/src/main/res/drawable/transcript.xml b/ui/common/src/main/res/drawable/transcript.xml new file mode 100644 index 000000000..435df4d7c --- /dev/null +++ b/ui/common/src/main/res/drawable/transcript.xml @@ -0,0 +1,9 @@ + + + diff --git a/ui/i18n/src/main/res/values/strings.xml b/ui/i18n/src/main/res/values/strings.xml index f5ac3c306..4f3a18f6d 100644 --- a/ui/i18n/src/main/res/values/strings.xml +++ b/ui/i18n/src/main/res/values/strings.xml @@ -258,6 +258,10 @@ %d episodes removed from inbox. Add to favorites + Show transcript + Transcript + Follow audio + No transcript Remove from favorites Visit website Skip episode diff --git a/ui/transcript/build.gradle b/ui/transcript/build.gradle new file mode 100644 index 000000000..c82639fa7 --- /dev/null +++ b/ui/transcript/build.gradle @@ -0,0 +1,20 @@ +plugins { + id("com.android.library") +} +apply from: "../../common.gradle" +apply from: "../../playFlavor.gradle" + +android { + namespace "de.danoeh.antennapod.ui.transcript" +} + +dependencies { + implementation project(':model') + implementation project(':net:common') + implementation project(':parser:media') + implementation project(':parser:transcript') + + implementation "commons-io:commons-io:$commonsioVersion" + implementation "org.apache.commons:commons-lang3:$commonslangVersion" + implementation "com.squareup.okhttp3:okhttp:$okhttpVersion" +} diff --git a/ui/transcript/src/main/java/de/danoeh/antennapod/ui/transcript/TranscriptUtils.java b/ui/transcript/src/main/java/de/danoeh/antennapod/ui/transcript/TranscriptUtils.java new file mode 100644 index 000000000..6f784457e --- /dev/null +++ b/ui/transcript/src/main/java/de/danoeh/antennapod/ui/transcript/TranscriptUtils.java @@ -0,0 +1,111 @@ +package de.danoeh.antennapod.ui.transcript; + +import android.util.Log; +import org.apache.commons.io.FileUtils; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.nio.charset.Charset; +import de.danoeh.antennapod.model.feed.FeedMedia; +import de.danoeh.antennapod.net.common.AntennapodHttpClient; +import de.danoeh.antennapod.model.feed.Transcript; +import de.danoeh.antennapod.parser.transcript.TranscriptParser; +import okhttp3.CacheControl; +import okhttp3.Request; +import okhttp3.Response; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; + +public class TranscriptUtils { + private static final String TAG = "Transcript"; + + public static String loadTranscriptFromUrl(String url, boolean forceRefresh) throws InterruptedIOException { + if (forceRefresh) { + return loadTranscriptFromUrl(url, CacheControl.FORCE_NETWORK); + } + String str = loadTranscriptFromUrl(url, CacheControl.FORCE_CACHE); + if (str == null || str.length() <= 1) { + // Some publishers use one dummy transcript before actual transcript are available + return loadTranscriptFromUrl(url, CacheControl.FORCE_NETWORK); + } + return str; + } + + private static String loadTranscriptFromUrl(String url, CacheControl cacheControl) throws InterruptedIOException { + StringBuilder str = new StringBuilder(); + Response response = null; + + try { + Log.d(TAG, "Downloading transcript URL " + url); + Request request = new Request.Builder().url(url).cacheControl(cacheControl).build(); + response = AntennapodHttpClient.getHttpClient().newCall(request).execute(); + if (response.isSuccessful() && response.body() != null) { + Log.d(TAG, "Done Downloading transcript URL " + url); + str.append(response.body().string()); + } else { + Log.d(TAG, "Error Downloading transcript URL " + url + ": " + response.message()); + } + } catch (InterruptedIOException e) { + Log.d(TAG, "InterruptedIOException while downloading transcript URL " + url); + throw e; + } catch (Exception e) { + e.printStackTrace(); + return null; + } finally { + if (response != null) { + response.close(); + } + } + return str.toString(); + } + + public static Transcript loadTranscript(FeedMedia media, Boolean forceRefresh) throws InterruptedIOException { + String transcriptType = media.getItem().getTranscriptType(); + + if (!forceRefresh && media.getItem().getTranscript() != null) { + return media.getTranscript(); + } + + if (!forceRefresh && media.getTranscriptFileUrl() != null) { + File transcriptFile = new File(media.getTranscriptFileUrl()); + try { + if (transcriptFile.exists()) { + String t = FileUtils.readFileToString(transcriptFile, (String) null); + if (StringUtils.isNotEmpty(t)) { + media.setTranscript(TranscriptParser.parse(t, transcriptType)); + return media.getTranscript(); + } + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + String transcriptUrl = media.getItem().getTranscriptUrl(); + String t = TranscriptUtils.loadTranscriptFromUrl(transcriptUrl, forceRefresh); + if (StringUtils.isNotEmpty(t)) { + return TranscriptParser.parse(t, transcriptType); + } + return null; + } + + public static void storeTranscript(FeedMedia media, String transcript) { + File transcriptFile = new File(media.getTranscriptFileUrl()); + FileOutputStream ostream = null; + try { + if (transcriptFile.exists() && !transcriptFile.delete()) { + Log.e(TAG, "Failed to delete existing transcript file " + transcriptFile.getAbsolutePath()); + } + if (transcriptFile.createNewFile()) { + ostream = new FileOutputStream(transcriptFile); + ostream.write(transcript.getBytes(Charset.forName("UTF-8"))); + ostream.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + IOUtils.closeQuietly(ostream); + } + } +}