From 52fda0f8d8be57b4515b3f78b34ed9bbcc047886 Mon Sep 17 00:00:00 2001 From: Shinokuni Date: Tue, 6 Oct 2020 21:43:09 +0200 Subject: [PATCH] Remove local RSS header image manual parsing --- .../app/repositories/LocalFeedRepository.java | 25 ++-------- .../com/readrops/app/utils/HtmlParser.java | 49 ------------------- 2 files changed, 4 insertions(+), 70 deletions(-) diff --git a/app/src/main/java/com/readrops/app/repositories/LocalFeedRepository.java b/app/src/main/java/com/readrops/app/repositories/LocalFeedRepository.java index f5159027..f6b05c3b 100644 --- a/app/src/main/java/com/readrops/app/repositories/LocalFeedRepository.java +++ b/app/src/main/java/com/readrops/app/repositories/LocalFeedRepository.java @@ -14,7 +14,6 @@ import com.readrops.api.utils.LibUtils; import com.readrops.api.utils.ParseException; import com.readrops.api.utils.UnknownFormatException; import com.readrops.app.utils.FeedInsertionResult; -import com.readrops.app.utils.HtmlParser; import com.readrops.app.utils.ParsingResult; import com.readrops.app.utils.SharedPreferencesManager; import com.readrops.app.utils.Utils; @@ -174,29 +173,13 @@ public class LocalFeedRepository extends ARepository { if (!database.itemDao().itemExists(dbItem.getGuid(), feed.getAccountId())) { if (dbItem.getDescription() != null) { dbItem.setCleanDescription(Jsoup.parse(dbItem.getDescription()).text()); - - if (dbItem.getImageLink() == null) { - String imageUrl = HtmlParser.getDescImageLink(dbItem.getDescription(), feed.getSiteUrl()); - - if (imageUrl != null) - dbItem.setImageLink(imageUrl); - } } - // we check a second time because imageLink could have been set earlier with media:content tag value - if (dbItem.getImageLink() != null) { - if (dbItem.getContent() != null) { - // removing cover image in content if found in description - dbItem.setContent(HtmlParser.deleteCoverImage(dbItem.getContent())); - - } else if (dbItem.getDescription() != null) - dbItem.setDescription(HtmlParser.deleteCoverImage(dbItem.getDescription())); - } - - if (dbItem.getContent() != null) - dbItem.setReadTime(Utils.readTimeFromString(Jsoup.parse(dbItem.getContent()).text())); - else if (dbItem.getDescription() != null) + if (dbItem.getContent() != null) { + dbItem.setReadTime(Utils.readTimeFromString(dbItem.getContent())); + } else if (dbItem.getDescription() != null) { dbItem.setReadTime(Utils.readTimeFromString(dbItem.getCleanDescription())); + } itemsToInsert.add(dbItem); } diff --git a/app/src/main/java/com/readrops/app/utils/HtmlParser.java b/app/src/main/java/com/readrops/app/utils/HtmlParser.java index 85c7965d..b408a08b 100644 --- a/app/src/main/java/com/readrops/app/utils/HtmlParser.java +++ b/app/src/main/java/com/readrops/app/utils/HtmlParser.java @@ -13,11 +13,9 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.regex.Pattern; import okhttp3.Request; import okhttp3.Response; @@ -26,8 +24,6 @@ public final class HtmlParser { private static final String TAG = HtmlParser.class.getSimpleName(); - public static final String COVER_IMAGE_REGEX = "^(

|())?"; - /** * Parse the html page to get all rss urls * @@ -68,27 +64,6 @@ public final class HtmlParser { type.equals(LibUtils.RSS_APPLICATION_CONTENT_TYPE); } - /** - * get the feed item image based on open graph metadata. - * Warning, This method is slow. - * - * @param url url to request - * @return the item image - */ - public static String getOGImageLink(String url) throws IOException { - String imageUrl = null; - - String head = getHTMLHeadFromUrl(url); - - Document document = Jsoup.parse(head); - Element element = document.select("meta[property=og:image]").first(); - - if (element != null) - imageUrl = element.attributes().get("content"); - - return imageUrl; - } - @Nullable public static String getFaviconLink(@NonNull String url) { String favUrl = null; @@ -134,28 +109,4 @@ public final class HtmlParser { } } - - public static String getDescImageLink(String description, String url) { - Document document = Jsoup.parse(description, url); - Elements elements = document.select("img"); - - if (!elements.isEmpty()) - return elements.first().absUrl("src"); - else - return null; - } - - public static String deleteCoverImage(String content) { - Document document = Jsoup.parse(content); - - if (Pattern.compile(COVER_IMAGE_REGEX).matcher(document.body().html()).find()) { - Elements elements = document.select("img"); - - if (!elements.isEmpty()) - elements.first().remove(); - - return document.toString(); - } else - return content; - } }