From cfd21bc4751e48e0c27539012bed42aca8322a0d Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 9 Oct 2020 15:01:40 +0200 Subject: [PATCH] [fix] fix duckduckgo engine - remove paging support: a "vqd" parameter is required between each request. This parameter is uniq for each request - update the URL (no redirect), use the POST method - language support: works if there is no more than request per minute, otherwise it is ignored ! --- searx/engines/duckduckgo.py | 43 +++++++++++-------------------------- 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 4a42fe7f..a5397232 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -21,7 +21,7 @@ from searx.utils import extract_text, match_language, eval_xpath # engine dependent config categories = ['general'] -paging = True +paging = False language_support = True supported_languages_url = 'https://duckduckgo.com/util/u172.js' time_range_support = True @@ -37,9 +37,7 @@ language_aliases = { } # search-url -url = 'https://duckduckgo.com/html?{query}&s={offset}&dc={dc_param}' -time_range_url = '&df={range}' - +url = 'https://html.duckduckgo.com/html' time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm'} @@ -65,36 +63,21 @@ def get_region_code(lang, lang_list=[]): def request(query, params): - if params['time_range'] not in (None, 'None', '') and params['time_range'] not in time_range_dict: + if params['time_range'] is not None and params['time_range'] not in time_range_dict: return params - offset = (params['pageno'] - 1) * 30 + params['url'] = url + params['method'] = 'POST' + params['data']['b'] = '' + params['data']['q'] = query + params['data']['df'] = '' region_code = get_region_code(params['language'], supported_languages) - params['url'] = 'https://duckduckgo.com/html/' - if params['pageno'] > 1: - params['method'] = 'POST' - params['data']['q'] = query - params['data']['s'] = offset - params['data']['dc'] = 30 - params['data']['nextParams'] = '' - params['data']['v'] = 'l' - params['data']['o'] = 'json' - params['data']['api'] = '/d.js' - if params['time_range'] in time_range_dict: - params['data']['df'] = time_range_dict[params['time_range']] - if region_code: - params['data']['kl'] = region_code - else: - if region_code: - params['url'] = url.format( - query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset) - else: - params['url'] = url.format( - query=urlencode({'q': query}), offset=offset, dc_param=offset) - - if params['time_range'] in time_range_dict: - params['url'] += time_range_url.format(range=time_range_dict[params['time_range']]) + if region_code: + params['data']['kl'] = region_code + params['cookies']['kl'] = region_code + if params['time_range'] in time_range_dict: + params['data']['df'] = time_range_dict[params['time_range']] return params