mirror of https://github.com/searx/searx
add time range search with yahoo
This commit is contained in:
parent
3a9c3fbd68
commit
93c0c49e9a
|
@ -42,7 +42,8 @@ engine_default_args = {'paging': False,
|
||||||
'shortcut': '-',
|
'shortcut': '-',
|
||||||
'disabled': False,
|
'disabled': False,
|
||||||
'suspend_end_time': 0,
|
'suspend_end_time': 0,
|
||||||
'continuous_errors': 0}
|
'continuous_errors': 0,
|
||||||
|
'time_range_support': False}
|
||||||
|
|
||||||
|
|
||||||
def load_module(filename):
|
def load_module(filename):
|
||||||
|
|
|
@ -20,10 +20,12 @@ from searx.engines.xpath import extract_text, extract_url
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
paging = True
|
paging = True
|
||||||
language_support = True
|
language_support = True
|
||||||
|
time_range_support = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://search.yahoo.com/'
|
base_url = 'https://search.yahoo.com/'
|
||||||
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
|
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
|
||||||
|
search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
|
||||||
|
|
||||||
# specific xpath variables
|
# specific xpath variables
|
||||||
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
|
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
|
||||||
|
@ -32,6 +34,9 @@ title_xpath = './/h3/a'
|
||||||
content_xpath = './/div[@class="compText aAbs"]'
|
content_xpath = './/div[@class="compText aAbs"]'
|
||||||
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
|
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
|
||||||
|
|
||||||
|
time_range_dict = {'day': ['1d', 'd'],
|
||||||
|
'week': ['1w', 'w'],
|
||||||
|
'month': ['1m', 'm']}
|
||||||
|
|
||||||
# remove yahoo-specific tracking-url
|
# remove yahoo-specific tracking-url
|
||||||
def parse_url(url_string):
|
def parse_url(url_string):
|
||||||
|
@ -51,18 +56,30 @@ def parse_url(url_string):
|
||||||
return unquote(url_string[start:end])
|
return unquote(url_string[start:end])
|
||||||
|
|
||||||
|
|
||||||
|
def _get_url(query, offset, language, time_range):
|
||||||
|
if time_range:
|
||||||
|
return base_url + search_url_with_time.format(offset=offset,
|
||||||
|
query=urlencode({'p': query}),
|
||||||
|
lang=language,
|
||||||
|
age=time_range_dict[time_range][0],
|
||||||
|
btf=time_range_dict[time_range][1])
|
||||||
|
return base_url + search_url.format(offset=offset,
|
||||||
|
query=urlencode({'p': query}),
|
||||||
|
lang=language)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_language(params):
|
||||||
|
if params['language'] == 'all':
|
||||||
|
return 'en'
|
||||||
|
return params['language'].split('_')[0]
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = (params['pageno'] - 1) * 10 + 1
|
offset = (params['pageno'] - 1) * 10 + 1
|
||||||
|
language = _get_language(params)
|
||||||
|
|
||||||
if params['language'] == 'all':
|
params['url'] = _get_url(query, offset, language, params['time_range'])
|
||||||
language = 'en'
|
|
||||||
else:
|
|
||||||
language = params['language'].split('_')[0]
|
|
||||||
|
|
||||||
params['url'] = base_url + search_url.format(offset=offset,
|
|
||||||
query=urlencode({'p': query}),
|
|
||||||
lang=language)
|
|
||||||
|
|
||||||
# TODO required?
|
# TODO required?
|
||||||
params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
|
params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
|
||||||
|
|
|
@ -138,6 +138,7 @@ class Search(object):
|
||||||
self.paging = False
|
self.paging = False
|
||||||
self.pageno = 1
|
self.pageno = 1
|
||||||
self.lang = 'all'
|
self.lang = 'all'
|
||||||
|
self.time_range = None
|
||||||
|
|
||||||
# set blocked engines
|
# set blocked engines
|
||||||
self.disabled_engines = request.preferences.engines.get_disabled()
|
self.disabled_engines = request.preferences.engines.get_disabled()
|
||||||
|
@ -178,9 +179,9 @@ class Search(object):
|
||||||
if len(query_obj.languages):
|
if len(query_obj.languages):
|
||||||
self.lang = query_obj.languages[-1]
|
self.lang = query_obj.languages[-1]
|
||||||
|
|
||||||
self.engines = query_obj.engines
|
self.time_range = self.request_data.get('time_range')
|
||||||
|
|
||||||
self.categories = []
|
self.engines = query_obj.engines
|
||||||
|
|
||||||
# if engines are calculated from query,
|
# if engines are calculated from query,
|
||||||
# set categories by using that informations
|
# set categories by using that informations
|
||||||
|
@ -279,6 +280,9 @@ class Search(object):
|
||||||
if self.lang != 'all' and not engine.language_support:
|
if self.lang != 'all' and not engine.language_support:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if self.time_range and not engine.time_range_support:
|
||||||
|
continue
|
||||||
|
|
||||||
# set default request parameters
|
# set default request parameters
|
||||||
request_params = default_request_params()
|
request_params = default_request_params()
|
||||||
request_params['headers']['User-Agent'] = user_agent
|
request_params['headers']['User-Agent'] = user_agent
|
||||||
|
@ -293,6 +297,7 @@ class Search(object):
|
||||||
|
|
||||||
# 0 = None, 1 = Moderate, 2 = Strict
|
# 0 = None, 1 = Moderate, 2 = Strict
|
||||||
request_params['safesearch'] = request.preferences.get_value('safesearch')
|
request_params['safesearch'] = request.preferences.get_value('safesearch')
|
||||||
|
request_params['time_range'] = self.time_range
|
||||||
|
|
||||||
# update request parameters dependent on
|
# update request parameters dependent on
|
||||||
# search-engine (contained in engines folder)
|
# search-engine (contained in engines folder)
|
||||||
|
|
|
@ -459,6 +459,7 @@ def index():
|
||||||
paging=search.paging,
|
paging=search.paging,
|
||||||
number_of_results=format_decimal(number_of_results),
|
number_of_results=format_decimal(number_of_results),
|
||||||
pageno=search.pageno,
|
pageno=search.pageno,
|
||||||
|
time_range=search.time_range,
|
||||||
base_url=get_base_url(),
|
base_url=get_base_url(),
|
||||||
suggestions=search.result_container.suggestions,
|
suggestions=search.result_container.suggestions,
|
||||||
answers=search.result_container.answers,
|
answers=search.result_container.answers,
|
||||||
|
|
Loading…
Reference in New Issue