restructured search engine

This commit is contained in:
Christian Schabesberger 2016-02-29 15:59:06 +01:00
parent 737a41f45b
commit d7e75e6011
7 changed files with 261 additions and 132 deletions

View File

@ -17,6 +17,7 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import org.schabi.newpipe.extractor.ExtractionException; import org.schabi.newpipe.extractor.ExtractionException;
import org.schabi.newpipe.extractor.SearchResult;
import org.schabi.newpipe.extractor.StreamPreviewInfo; import org.schabi.newpipe.extractor.StreamPreviewInfo;
import org.schabi.newpipe.extractor.SearchEngine; import org.schabi.newpipe.extractor.SearchEngine;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
@ -67,9 +68,9 @@ public class VideoItemListFragment extends ListFragment {
private boolean loadingNextPage = true; private boolean loadingNextPage = true;
private class ResultRunnable implements Runnable { private class ResultRunnable implements Runnable {
private final SearchEngine.Result result; private final SearchResult result;
private final int requestId; private final int requestId;
public ResultRunnable(SearchEngine.Result result, int requestId) { public ResultRunnable(SearchResult result, int requestId) {
this.result = result; this.result = result;
this.requestId = requestId; this.requestId = requestId;
} }
@ -105,8 +106,8 @@ public class VideoItemListFragment extends ListFragment {
String searchLanguageKey = getContext().getString(R.string.search_language_key); String searchLanguageKey = getContext().getString(R.string.search_language_key);
String searchLanguage = sp.getString(searchLanguageKey, String searchLanguage = sp.getString(searchLanguageKey,
getString(R.string.default_language_value)); getString(R.string.default_language_value));
SearchEngine.Result result = engine.search(query, page, searchLanguage, SearchResult result = SearchResult
new Downloader()); .getSearchResult(engine, query, page, searchLanguage, new Downloader());
//Log.i(TAG, "language code passed:\""+searchLanguage+"\""); //Log.i(TAG, "language code passed:\""+searchLanguage+"\"");
if(runs) { if(runs) {
@ -165,12 +166,10 @@ public class VideoItemListFragment extends ListFragment {
this.streamingService = streamingService; this.streamingService = streamingService;
} }
private void updateListOnResult(SearchEngine.Result result, int requestId) { private void updateListOnResult(SearchResult result, int requestId) {
if(requestId == currentRequestId) { if(requestId == currentRequestId) {
setListShown(true); setListShown(true);
if (result.resultList.isEmpty()) { if (!result.resultList.isEmpty()) {
Toast.makeText(getActivity(), result.errorMessage, Toast.LENGTH_LONG).show();
} else {
if (!result.suggestion.isEmpty()) { if (!result.suggestion.isEmpty()) {
Toast.makeText(getActivity(), getString(R.string.did_you_mean) + result.suggestion + " ?", Toast.makeText(getActivity(), getString(R.string.did_you_mean) + result.suggestion + " ?",
Toast.LENGTH_LONG).show(); Toast.LENGTH_LONG).show();

View File

@ -27,16 +27,10 @@ import java.util.Vector;
@SuppressWarnings("ALL") @SuppressWarnings("ALL")
public interface SearchEngine { public interface SearchEngine {
class Result {
public String errorMessage = "";
public String suggestion = "";
public final List<StreamPreviewInfo> resultList = new Vector<>();
}
ArrayList<String> suggestionList(String query,String contentCountry, Downloader dl) ArrayList<String> suggestionList(String query,String contentCountry, Downloader dl)
throws ExtractionException, IOException; throws ExtractionException, IOException;
//Result search(String query, int page); //Result search(String query, int page);
Result search(String query, int page, String contentCountry, Downloader dl) StreamPreviewInfoCollector search(String query, int page, String contentCountry, Downloader dl)
throws ExtractionException, IOException; throws ExtractionException, IOException;
} }

View File

@ -0,0 +1,41 @@
package org.schabi.newpipe.extractor;
import java.io.IOException;
import java.util.List;
import java.util.Vector;
/**
* Created by Christian Schabesberger on 29.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* SearchResult.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class SearchResult {
public static SearchResult getSearchResult(SearchEngine engine, String query,
int page, String languageCode, Downloader dl)
throws ExtractionException, IOException {
try {
return engine.search(query, page, languageCode, dl).getSearchResult();
} catch (Exception e) {
throw new ExtractionException("Could not get any search result", e);
}
}
public String suggestion = "";
public final List<StreamPreviewInfo> resultList = new Vector<>();
public List<Exception> errors = new Vector<>();
}

View File

@ -1,9 +1,5 @@
package org.schabi.newpipe.extractor; package org.schabi.newpipe.extractor;
import android.graphics.Bitmap;
import android.os.Parcel;
import android.os.Parcelable;
/** /**
* Created by Christian Schabesberger on 26.08.15. * Created by Christian Schabesberger on 26.08.15.
* *

View File

@ -0,0 +1,91 @@
package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.services.youtube.YoutubeStreamUrlIdHandler;
/**
* Created by Christian Schabesberger on 28.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* StreamPreviewInfoCollector.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class StreamPreviewInfoCollector {
SearchResult result = new SearchResult();
StreamUrlIdHandler urlIdHandler = null;
public StreamPreviewInfoCollector(StreamUrlIdHandler handler) {
urlIdHandler = handler;
}
public void setSuggestion(String suggestion) {
result.suggestion = suggestion;
}
public void addError(Exception e) {
result.errors.add(e);
}
public SearchResult getSearchResult() {
return result;
}
public void commit(StreamPreviewInfoExtractor extractor) throws ParsingException {
try {
StreamPreviewInfo resultItem = new StreamPreviewInfo();
// importand information
resultItem.webpage_url = extractor.getWebPageUrl();
if (urlIdHandler == null) {
throw new ParsingException("Error: UrlIdHandler not set");
} else {
resultItem.id = (new YoutubeStreamUrlIdHandler()).getVideoId(resultItem.webpage_url);
}
resultItem.title = extractor.getTitle();
// optional iformation
try {
resultItem.duration = extractor.getDuration();
} catch (Exception e) {
addError(e);
}
try {
resultItem.uploader = extractor.getUploader();
} catch (Exception e) {
addError(e);
}
try {
resultItem.upload_date = extractor.getUploadDate();
} catch (Exception e) {
addError(e);
}
try {
resultItem.view_count = extractor.getViewCount();
} catch (Exception e) {
addError(e);
}
try {
resultItem.thumbnail_url = extractor.getThumbnailUrl();
} catch (Exception e) {
addError(e);
}
result.resultList.add(resultItem);
} catch (Exception e) {
addError(e);
}
}
}

View File

@ -0,0 +1,31 @@
package org.schabi.newpipe.extractor;
/**
* Created by Christian Schabesberger on 28.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* StreamPreviewInfoExtractor.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public interface StreamPreviewInfoExtractor {
String getWebPageUrl() throws ParsingException;
String getTitle() throws ParsingException;
String getDuration() throws ParsingException;
String getUploader() throws ParsingException;
String getUploadDate() throws ParsingException;
long getViewCount() throws ParsingException;
String getThumbnailUrl() throws ParsingException;
}

View File

@ -10,7 +10,9 @@ import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.Parser; import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.ParsingException; import org.schabi.newpipe.extractor.ParsingException;
import org.schabi.newpipe.extractor.SearchEngine; import org.schabi.newpipe.extractor.SearchEngine;
import org.schabi.newpipe.extractor.StreamPreviewInfo; import org.schabi.newpipe.extractor.StreamExtractor;
import org.schabi.newpipe.extractor.StreamPreviewInfoCollector;
import org.schabi.newpipe.extractor.StreamPreviewInfoExtractor;
import org.w3c.dom.Node; import org.w3c.dom.Node;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
@ -49,9 +51,10 @@ public class YoutubeSearchEngine implements SearchEngine {
private static final String TAG = YoutubeSearchEngine.class.toString(); private static final String TAG = YoutubeSearchEngine.class.toString();
@Override @Override
public Result search(String query, int page, String languageCode, Downloader downloader) public StreamPreviewInfoCollector search(String query, int page, String languageCode, Downloader downloader)
throws IOException, ParsingException { throws IOException, ParsingException {
Result result = new Result(); StreamPreviewInfoCollector collector = new StreamPreviewInfoCollector(
new YoutubeStreamUrlIdHandler());
Uri.Builder builder = new Uri.Builder(); Uri.Builder builder = new Uri.Builder();
builder.scheme("https") builder.scheme("https")
.authority("www.youtube.com") .authority("www.youtube.com")
@ -71,12 +74,11 @@ public class YoutubeSearchEngine implements SearchEngine {
site = downloader.download(url); site = downloader.download(url);
} }
try {
Document doc = Jsoup.parse(site, url); Document doc = Jsoup.parse(site, url);
Element list = doc.select("ol[class=\"item-section\"]").first(); Element list = doc.select("ol[class=\"item-section\"]").first();
for (Element item : list.children()) { for (Element item : list.children()) {
/* First we need to determine which kind of item we are working with. /* First we need to determine which kind of item we are working with.
Youtube depicts five different kinds of items on its search result page. These are Youtube depicts five different kinds of items on its search result page. These are
regular videos, playlists, channels, two types of video suggestions, and a "no video regular videos, playlists, channels, two types of video suggestions, and a "no video
@ -88,62 +90,26 @@ public class YoutubeSearchEngine implements SearchEngine {
playlists now. playlists now.
*/ */
Element el; Element el;
// both types of spell correction item // both types of spell correction item
if (!((el = item.select("div[class*=\"spell-correction\"]").first()) == null)) { if (!((el = item.select("div[class*=\"spell-correction\"]").first()) == null)) {
result.suggestion = el.select("a").first().text(); collector.setSuggestion(el.select("a").first().text());
// search message item // search message item
} else if (!((el = item.select("div[class*=\"search-message\"]").first()) == null)) { } else if (!((el = item.select("div[class*=\"search-message\"]").first()) == null)) {
result.errorMessage = el.text(); //result.errorMessage = el.text();
throw new StreamExtractor.ContentNotAvailableException(el.text());
// video item type // video item type
} else if (!((el = item.select("div[class*=\"yt-lockup-video\"").first()) == null)) { } else if (!((el = item.select("div[class*=\"yt-lockup-video\"").first()) == null)) {
StreamPreviewInfo resultItem = new StreamPreviewInfo(); collector.commit(extractPreviewInfo(el));
} else {
// importand information //noinspection ConstantConditions
resultItem.webpage_url = getWebpageUrl(item); collector.addError(new Exception("unexpected element found:\"" + el + "\""));
resultItem.id = (new YoutubeStreamUrlIdHandler()).getVideoId(resultItem.webpage_url);
resultItem.title = getTitle(item);
// optional iformation
//todo: make this a proper error handling
try {
resultItem.duration = getDuration(item);
} catch (Exception e) {
e.printStackTrace();
}
try {
resultItem.uploader = getUploader(item);
} catch (Exception e) {
e.printStackTrace();
}
try {
resultItem.upload_date = getUploadDate(item);
} catch (Exception e) {
e.printStackTrace();
}
try {
resultItem.view_count = getViewCount(item);
} catch (Exception e) {
e.printStackTrace();
}
try {
resultItem.thumbnail_url = getThumbnailUrl(item);
} catch (Exception e) {
e.printStackTrace();
}
result.resultList.add(resultItem);
} else {
//noinspection ConstantConditions
Log.e(TAG, "unexpected element found:\"" + el + "\"");
}
} }
} catch(Exception e) {
throw new ParsingException(e);
} }
return result;
return collector;
} }
@Override @Override
@ -203,67 +169,78 @@ public class YoutubeSearchEngine implements SearchEngine {
} }
} }
private String getWebpageUrl(Element item) { private StreamPreviewInfoExtractor extractPreviewInfo(final Element item) {
Element el = item.select("div[class*=\"yt-lockup-video\"").first(); return new StreamPreviewInfoExtractor() {
Element dl = el.select("h3").first().select("a").first(); @Override
return dl.attr("abs:href"); public String getWebPageUrl() throws ParsingException {
} Element el = item.select("div[class*=\"yt-lockup-video\"").first();
Element dl = el.select("h3").first().select("a").first();
return dl.attr("abs:href");
}
private String getTitle(Element item) { @Override
Element el = item.select("div[class*=\"yt-lockup-video\"").first(); public String getTitle() throws ParsingException {
Element dl = el.select("h3").first().select("a").first(); Element el = item.select("div[class*=\"yt-lockup-video\"").first();
return dl.text(); Element dl = el.select("h3").first().select("a").first();
} return dl.text();
}
private String getDuration(Element item) { @Override
try { public String getDuration() throws ParsingException {
return item.select("span[class=\"video-time\"]").first().text(); try {
} catch(Exception e) { return item.select("span[class=\"video-time\"]").first().text();
e.printStackTrace(); } catch(Exception e) {
} e.printStackTrace();
return ""; }
} return "";
}
private String getUploader(Element item) { @Override
return item.select("div[class=\"yt-lockup-byline\"]").first() public String getUploader() throws ParsingException {
.select("a").first() return item.select("div[class=\"yt-lockup-byline\"]").first()
.text(); .select("a").first()
} .text();
}
private String getUploadDate(Element item) { @Override
return item.select("div[class=\"yt-lockup-meta\"]").first() public String getUploadDate() throws ParsingException {
.select("li").first() return item.select("div[class=\"yt-lockup-meta\"]").first()
.text(); .select("li").first()
} .text();
}
private long getViewCount(Element item) throws Parser.RegexException{ @Override
String output; public long getViewCount() throws ParsingException {
String input = item.select("div[class=\"yt-lockup-meta\"]").first() String output;
.select("li").get(1) String input = item.select("div[class=\"yt-lockup-meta\"]").first()
.text(); .select("li").get(1)
output = Parser.matchGroup1("([0-9,\\. ]*)", input) .text();
.replace(" ", "") output = Parser.matchGroup1("([0-9,\\. ]*)", input)
.replace(".", "") .replace(" ", "")
.replace(",", ""); .replace(".", "")
.replace(",", "");
if(Long.parseLong(output) == 30) { if(Long.parseLong(output) == 30) {
Log.d(TAG, "bla"); Log.d(TAG, "bla");
} }
return Long.parseLong(output); return Long.parseLong(output);
} }
private String getThumbnailUrl(Element item) { @Override
String url; public String getThumbnailUrl() throws ParsingException {
Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first() String url;
.select("img").first(); Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first()
url = te.attr("abs:src"); .select("img").first();
// Sometimes youtube sends links to gif files which somehow seem to not exist url = te.attr("abs:src");
// anymore. Items with such gif also offer a secondary image source. So we are going // Sometimes youtube sends links to gif files which somehow seem to not exist
// to use that if we've caught such an item. // anymore. Items with such gif also offer a secondary image source. So we are going
if (url.contains(".gif")) { // to use that if we've caught such an item.
url = te.attr("abs:data-thumb"); if (url.contains(".gif")) {
} url = te.attr("abs:data-thumb");
}
return url; return url;
}
};
} }
} }