Delete image from content or description only if it's a cover image, so an img tag surrounded by a p or div tag a the beginning of the string

This commit is contained in:
Shinokuni 2019-02-10 21:36:38 +00:00
parent 1a78feec81
commit c9b3e3f09f
3 changed files with 25 additions and 15 deletions

View File

@ -201,22 +201,26 @@ public class LocalFeedRepository extends ARepository implements QueryCallback {
if (dbItem.getImageLink() == null) {
String imageUrl = HtmlParser.getDescImageLink(dbItem.getDescription(), feed.getSiteUrl());
if (imageUrl != null) {
if (imageUrl != null)
dbItem.setImageLink(imageUrl);
}
}
if (dbItem.getContent() != null) {
// removing cover image in content if found in description
dbItem.setContent(HtmlParser.deleteCoverImage(dbItem.getContent()));
// we check a second time because imageLink could have been set earlier with media:content tag value
if (dbItem.getImageLink() != null) {
if (dbItem.getContent() != null) {
// removing cover image in content if found in description
dbItem.setContent(HtmlParser.deleteCoverImage(dbItem.getContent()));
dbItem.setReadTime(Utils.readTimeFromString(dbItem.getContent()));
} else
dbItem.setReadTime(Utils.readTimeFromString(dbItem.getCleanDescription()));
}
dbItem.setReadTime(Utils.readTimeFromString(Jsoup.parse(dbItem.getContent()).text()));
} else if (dbItem.getDescription() != null) {
dbItem.setDescription(HtmlParser.deleteCoverImage(dbItem.getDescription()));
dbItem.setReadTime(Utils.readTimeFromString(dbItem.getCleanDescription()));
}
}
database.itemDao().insert(dbItem);
Log.d(TAG, "adding " + dbItem.getTitle());
}
}
}

View File

@ -13,11 +13,14 @@ import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
public final class HtmlParser {
private static final String TAG = HtmlParser.class.getSimpleName();
public static final String COVER_IMAGE_REGEX = "^(<p>|(<div.*>))?<img.*>";
/**
* Parse the html page to get all rss urls
* @param url url to request
@ -113,12 +116,15 @@ public final class HtmlParser {
}
public static String deleteCoverImage(String content) {
Document document = Jsoup.parse(content);
Elements elements = document.select("img");
if (Pattern.compile(COVER_IMAGE_REGEX).matcher(content).find()) {
Document document = Jsoup.parse(content);
Elements elements = document.select("img");
if (!elements.isEmpty())
elements.first().remove();
if (!elements.isEmpty())
elements.first().remove();
return document.toString();
return document.toString();
} else
return content;
}
}

View File

@ -29,6 +29,6 @@ public class RSSMediaContent {
}
public boolean isContentAnImage() {
return medium.equals("image");
return medium.equals("image") || medium.equals("image/jpeg") || medium.equals("image/png");
}
}