Remove local RSS header image manual parsing

This commit is contained in:
Shinokuni 2020-10-06 21:43:09 +02:00
parent fb2e1de03b
commit 52fda0f8d8
2 changed files with 4 additions and 70 deletions

View File

@ -14,7 +14,6 @@ import com.readrops.api.utils.LibUtils;
import com.readrops.api.utils.ParseException;
import com.readrops.api.utils.UnknownFormatException;
import com.readrops.app.utils.FeedInsertionResult;
import com.readrops.app.utils.HtmlParser;
import com.readrops.app.utils.ParsingResult;
import com.readrops.app.utils.SharedPreferencesManager;
import com.readrops.app.utils.Utils;
@ -174,29 +173,13 @@ public class LocalFeedRepository extends ARepository<Void> {
if (!database.itemDao().itemExists(dbItem.getGuid(), feed.getAccountId())) {
if (dbItem.getDescription() != null) {
dbItem.setCleanDescription(Jsoup.parse(dbItem.getDescription()).text());
if (dbItem.getImageLink() == null) {
String imageUrl = HtmlParser.getDescImageLink(dbItem.getDescription(), feed.getSiteUrl());
if (imageUrl != null)
dbItem.setImageLink(imageUrl);
}
}
// we check a second time because imageLink could have been set earlier with media:content tag value
if (dbItem.getImageLink() != null) {
if (dbItem.getContent() != null) {
// removing cover image in content if found in description
dbItem.setContent(HtmlParser.deleteCoverImage(dbItem.getContent()));
} else if (dbItem.getDescription() != null)
dbItem.setDescription(HtmlParser.deleteCoverImage(dbItem.getDescription()));
}
if (dbItem.getContent() != null)
dbItem.setReadTime(Utils.readTimeFromString(Jsoup.parse(dbItem.getContent()).text()));
else if (dbItem.getDescription() != null)
if (dbItem.getContent() != null) {
dbItem.setReadTime(Utils.readTimeFromString(dbItem.getContent()));
} else if (dbItem.getDescription() != null) {
dbItem.setReadTime(Utils.readTimeFromString(dbItem.getCleanDescription()));
}
itemsToInsert.add(dbItem);
}

View File

@ -13,11 +13,9 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import okhttp3.Request;
import okhttp3.Response;
@ -26,8 +24,6 @@ public final class HtmlParser {
private static final String TAG = HtmlParser.class.getSimpleName();
public static final String COVER_IMAGE_REGEX = "^(<p>|(<div.*>))?<img.*>";
/**
* Parse the html page to get all rss urls
*
@ -68,27 +64,6 @@ public final class HtmlParser {
type.equals(LibUtils.RSS_APPLICATION_CONTENT_TYPE);
}
/**
* get the feed item image based on open graph metadata.
* Warning, This method is slow.
*
* @param url url to request
* @return the item image
*/
public static String getOGImageLink(String url) throws IOException {
String imageUrl = null;
String head = getHTMLHeadFromUrl(url);
Document document = Jsoup.parse(head);
Element element = document.select("meta[property=og:image]").first();
if (element != null)
imageUrl = element.attributes().get("content");
return imageUrl;
}
@Nullable
public static String getFaviconLink(@NonNull String url) {
String favUrl = null;
@ -134,28 +109,4 @@ public final class HtmlParser {
}
}
public static String getDescImageLink(String description, String url) {
Document document = Jsoup.parse(description, url);
Elements elements = document.select("img");
if (!elements.isEmpty())
return elements.first().absUrl("src");
else
return null;
}
public static String deleteCoverImage(String content) {
Document document = Jsoup.parse(content);
if (Pattern.compile(COVER_IMAGE_REGEX).matcher(document.body().html()).find()) {
Elements elements = document.select("img");
if (!elements.isEmpty())
elements.first().remove();
return document.toString();
} else
return content;
}
}