mirror of
https://github.com/searx/searx
synced 2024-12-12 08:46:26 +01:00
[fix] startpage engine - avoid captcha
Startpage has introduced new anti-scraping measures that make SearXNG instances run into captchas: 1. some arguments has been removed and a new `sc` has been added. 2. search path changed from `do/search` to `sp/search` 3. POST request is no longer needed Closes: https://github.com/searxng/searxng/issues/692 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
99128537a8
commit
4750586fb0
@ -3,6 +3,8 @@
|
|||||||
Startpage (Web)
|
Startpage (Web)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
@ -33,7 +35,7 @@ supported_languages_url = 'https://www.startpage.com/do/settings'
|
|||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://startpage.com/'
|
base_url = 'https://startpage.com/'
|
||||||
search_url = base_url + 'do/search'
|
search_url = base_url + 'sp/search?'
|
||||||
|
|
||||||
# specific xpath variables
|
# specific xpath variables
|
||||||
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
|
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
|
||||||
@ -46,14 +48,12 @@ content_xpath = './/p[@class="w-gl__description"]'
|
|||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
params['url'] = search_url
|
args = {
|
||||||
params['method'] = 'POST'
|
|
||||||
params['data'] = {
|
|
||||||
'query': query,
|
'query': query,
|
||||||
'page': params['pageno'],
|
'page': params['pageno'],
|
||||||
'cat': 'web',
|
'cat': 'web',
|
||||||
'cmd': 'process_search',
|
# 'abp': "-1",
|
||||||
'engine0': 'v1all',
|
'sc': 'Mj4jZy61QETj20',
|
||||||
}
|
}
|
||||||
|
|
||||||
# set language if specified
|
# set language if specified
|
||||||
@ -61,9 +61,10 @@ def request(query, params):
|
|||||||
lang_code = match_language(params['language'], supported_languages, fallback=None)
|
lang_code = match_language(params['language'], supported_languages, fallback=None)
|
||||||
if lang_code:
|
if lang_code:
|
||||||
language_name = supported_languages[lang_code]['alias']
|
language_name = supported_languages[lang_code]['alias']
|
||||||
params['data']['language'] = language_name
|
args['language'] = language_name
|
||||||
params['data']['lui'] = language_name
|
args['lui'] = language_name
|
||||||
|
|
||||||
|
params['url'] = search_url + urlencode(args)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user