1
0
mirror of https://github.com/searx/searx synced 2024-12-12 08:46:26 +01:00

[fix] startpage engine - avoid captcha

Startpage has introduced new anti-scraping measures that make SearXNG instances
run into captchas:

1. some arguments has been removed and a new `sc` has been added.
2. search path changed from `do/search` to `sp/search`
3. POST request is no longer needed

Closes: https://github.com/searxng/searxng/issues/692
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-01-05 13:00:52 +01:00 committed by Noémi Ványi
parent 99128537a8
commit 4750586fb0

View File

@ -3,6 +3,8 @@
Startpage (Web) Startpage (Web)
""" """
from urllib.parse import urlencode
from lxml import html from lxml import html
from dateutil import parser from dateutil import parser
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -33,7 +35,7 @@ supported_languages_url = 'https://www.startpage.com/do/settings'
# search-url # search-url
base_url = 'https://startpage.com/' base_url = 'https://startpage.com/'
search_url = base_url + 'do/search' search_url = base_url + 'sp/search?'
# specific xpath variables # specific xpath variables
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
@ -46,14 +48,12 @@ content_xpath = './/p[@class="w-gl__description"]'
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url args = {
params['method'] = 'POST'
params['data'] = {
'query': query, 'query': query,
'page': params['pageno'], 'page': params['pageno'],
'cat': 'web', 'cat': 'web',
'cmd': 'process_search', # 'abp': "-1",
'engine0': 'v1all', 'sc': 'Mj4jZy61QETj20',
} }
# set language if specified # set language if specified
@ -61,9 +61,10 @@ def request(query, params):
lang_code = match_language(params['language'], supported_languages, fallback=None) lang_code = match_language(params['language'], supported_languages, fallback=None)
if lang_code: if lang_code:
language_name = supported_languages[lang_code]['alias'] language_name = supported_languages[lang_code]['alias']
params['data']['language'] = language_name args['language'] = language_name
params['data']['lui'] = language_name args['lui'] = language_name
params['url'] = search_url + urlencode(args)
return params return params