mirror of https://github.com/searx/searx
Remove `httpx` and use `requests` instead (#3305)
## What does this PR do? This PR prepares for removing `httpx`, and reverts back to `requests`. ## Why is this change important? `httpx` hasn't proven itself to be faster or better than `requests`. On the other hand it has caused issues on Windows. ============================================= Please update your environment to use requests instead of httpx. =============================================
This commit is contained in:
parent
210e59c68c
commit
85034b49ef
|
@ -130,12 +130,14 @@ Global Settings
|
|||
request_timeout : 2.0 # default timeout in seconds, can be override by engine
|
||||
# max_request_timeout: 10.0 # the maximum timeout in seconds
|
||||
useragent_suffix : "" # informations like an email address to the administrator
|
||||
pool_connections : 100 # Maximum number of allowable connections, or None for no limits. The default is 100.
|
||||
pool_maxsize : 10 # Number of allowable keep-alive connections, or None to always allow. The default is 10.
|
||||
enable_http2: True # See https://www.python-httpx.org/http2/
|
||||
pool_connections : 100 # Number of different hosts
|
||||
pool_maxsize : 10 # Number of simultaneous requests by host
|
||||
# uncomment below section if you want to use a proxy
|
||||
# proxies:
|
||||
# all://:
|
||||
# http:
|
||||
# - http://proxy1:8080
|
||||
# - http://proxy2:8080
|
||||
# https:
|
||||
# - http://proxy1:8080
|
||||
# - http://proxy2:8080
|
||||
# uncomment below section only if you have more than one network interface
|
||||
|
@ -143,7 +145,6 @@ Global Settings
|
|||
# source_ips:
|
||||
# - 1.1.1.1
|
||||
# - 1.1.1.2
|
||||
# - fe80::/126
|
||||
|
||||
|
||||
``request_timeout`` :
|
||||
|
@ -156,46 +157,20 @@ Global Settings
|
|||
Suffix to the user-agent searx uses to send requests to others engines. If an
|
||||
engine wish to block you, a contact info here may be useful to avoid that.
|
||||
|
||||
``keepalive_expiry``:
|
||||
Number of seconds to keep a connection in the pool. By default 5.0 seconds.
|
||||
|
||||
.. _httpx proxies: https://www.python-httpx.org/advanced/#http-proxying
|
||||
.. _requests proxies: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
|
||||
.. _PySocks: https://pypi.org/project/PySocks/
|
||||
|
||||
``proxies`` :
|
||||
Define one or more proxies you wish to use, see `httpx proxies`_.
|
||||
Define one or more proxies you wish to use, see `requests proxies`_.
|
||||
If there are more than one proxy for one protocol (http, https),
|
||||
requests to the engines are distributed in a round-robin fashion.
|
||||
|
||||
- Proxy: `see <https://2.python-requests.org/en/latest/user/advanced/#proxies>`__.
|
||||
- SOCKS proxies are also supported: `see <https://2.python-requests.org/en/latest/user/advanced/#socks>`__
|
||||
|
||||
``source_ips`` :
|
||||
If you use multiple network interfaces, define from which IP the requests must
|
||||
be made. Example:
|
||||
|
||||
* ``0.0.0.0`` any local IPv4 address.
|
||||
* ``::`` any local IPv6 address.
|
||||
* ``192.168.0.1``
|
||||
* ``[ 192.168.0.1, 192.168.0.2 ]`` these two specific IP addresses
|
||||
* ``fe80::60a2:1691:e5a2:ee1f``
|
||||
* ``fe80::60a2:1691:e5a2:ee1f/126`` all IP addresses in this network.
|
||||
* ``[ 192.168.0.1, fe80::/126 ]``
|
||||
|
||||
``retries`` :
|
||||
Number of retry in case of an HTTP error.
|
||||
On each retry, searx uses an different proxy and source ip.
|
||||
|
||||
``retry_on_http_error`` :
|
||||
Retry request on some HTTP status code.
|
||||
|
||||
Example:
|
||||
|
||||
* ``true`` : on HTTP status code between 400 and 599.
|
||||
* ``403`` : on HTTP status code 403.
|
||||
* ``[403, 429]``: on HTTP status code 403 and 429.
|
||||
|
||||
``enable_http2`` :
|
||||
Enable by default. Set to ``False`` to disable HTTP/2.
|
||||
|
||||
``max_redirects`` :
|
||||
30 by default. Maximum redirect before it is an error.
|
||||
be made. This parameter is ignored when ``proxies`` is set.
|
||||
|
||||
|
||||
``locales:``
|
||||
|
@ -241,13 +216,6 @@ Engine settings
|
|||
api_key : 'apikey'
|
||||
disabled : True
|
||||
language : en_US
|
||||
#enable_http: False
|
||||
#enable_http2: False
|
||||
#retries: 1
|
||||
#retry_on_http_error: True # or 403 or [404, 429]
|
||||
#max_connections: 100
|
||||
#max_keepalive_connections: 10
|
||||
#keepalive_expiry: 5.0
|
||||
#proxies:
|
||||
# http:
|
||||
# - http://proxy1:8080
|
||||
|
@ -302,12 +270,6 @@ Engine settings
|
|||
``display_error_messages`` : default ``True``
|
||||
When an engine returns an error, the message is displayed on the user interface.
|
||||
|
||||
``network``: optional
|
||||
Use the network configuration from another engine.
|
||||
In addition, there are two default networks:
|
||||
* ``ipv4`` set ``local_addresses`` to ``0.0.0.0`` (use only IPv4 local addresses)
|
||||
* ``ipv6`` set ``local_addresses`` to ``::`` (use only IPv6 local addresses)
|
||||
|
||||
.. note::
|
||||
|
||||
A few more options are possible, but they are pretty specific to some
|
||||
|
|
3
manage
3
manage
|
@ -107,8 +107,7 @@ fi
|
|||
export DOCS_BUILD
|
||||
|
||||
buildenv() {
|
||||
SEARX_DEBUG=1 pyenv.cmd python utils/build_env.py 2>&1 \
|
||||
| prefix_stdout "${_Blue}BUILDENV${_creset} "
|
||||
SEARX_DEBUG=1 pyenv.cmd python utils/build_env.py 2>&1
|
||||
return "${PIPESTATUS[0]}"
|
||||
}
|
||||
|
||||
|
|
|
@ -17,4 +17,3 @@ sphinx-tabs==3.2.0
|
|||
sphinxcontrib-programoutput==0.17
|
||||
sphinx-autobuild==2021.3.14
|
||||
linuxdoc==20211220
|
||||
aiounittest==1.4.1
|
||||
|
|
|
@ -1,16 +1,13 @@
|
|||
certifi==2022.5.18.1
|
||||
Brotli==1.0.9
|
||||
babel==2.9.1
|
||||
certifi==2022.5.18.1
|
||||
flask-babel==2.0.0
|
||||
flask==2.1.1
|
||||
jinja2==3.1.2
|
||||
langdetect==1.0.9
|
||||
lxml==4.9.0
|
||||
pygments==2.8.0
|
||||
python-dateutil==2.8.2
|
||||
pyyaml==6.0
|
||||
httpx[http2]==0.23.0
|
||||
Brotli==1.0.9
|
||||
uvloop==0.16.0; python_version >= '3.7'
|
||||
uvloop==0.14.0; python_version < '3.7'
|
||||
httpx-socks[asyncio]==0.7.4
|
||||
langdetect==1.0.9
|
||||
requests[socks]==2.28.1
|
||||
setproctitle==1.2.2
|
||||
|
|
|
@ -20,12 +20,10 @@ from lxml import etree
|
|||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from httpx import HTTPError
|
||||
|
||||
from requests import RequestException
|
||||
|
||||
from searx import settings
|
||||
from searx.data import ENGINES_LANGUAGES
|
||||
from searx.network import get as http_get
|
||||
from searx.poolrequests import get as http_get
|
||||
from searx.exceptions import SearxEngineResponseException
|
||||
|
||||
|
||||
|
@ -154,5 +152,5 @@ def search_autocomplete(backend_name, query, lang):
|
|||
|
||||
try:
|
||||
return backend(query, lang)
|
||||
except (HTTPError, SearxEngineResponseException):
|
||||
except (RequestException, SearxEngineResponseException):
|
||||
return []
|
||||
|
|
|
@ -27,7 +27,7 @@ from searx import settings
|
|||
from searx import logger
|
||||
from searx.data import ENGINES_LANGUAGES
|
||||
from searx.exceptions import SearxEngineResponseException
|
||||
from searx.network import get, initialize as initialize_network, set_context_network_name
|
||||
from searx.poolrequests import get, get_proxy_cycles
|
||||
from searx.utils import load_module, match_language, get_engine_from_settings, gen_useragent
|
||||
|
||||
|
||||
|
@ -89,6 +89,8 @@ def load_engine(engine_data):
|
|||
engine.categories = []
|
||||
else:
|
||||
engine.categories = list(map(str.strip, param_value.split(',')))
|
||||
elif param_name == 'proxies':
|
||||
engine.proxies = get_proxy_cycles(param_value)
|
||||
else:
|
||||
setattr(engine, param_name, param_value)
|
||||
|
||||
|
@ -283,3 +285,24 @@ def load_engines(engine_list):
|
|||
if engine is not None:
|
||||
engines[engine.name] = engine
|
||||
return engines
|
||||
|
||||
|
||||
def initialize_engines(engine_list):
|
||||
load_engines(engine_list)
|
||||
|
||||
def engine_init(engine_name, init_fn):
|
||||
try:
|
||||
init_fn(get_engine_from_settings(engine_name))
|
||||
except SearxEngineResponseException as exc:
|
||||
logger.warn('%s engine: Fail to initialize // %s', engine_name, exc)
|
||||
except Exception:
|
||||
logger.exception('%s engine: Fail to initialize', engine_name)
|
||||
else:
|
||||
logger.debug('%s engine: Initialized', engine_name)
|
||||
|
||||
for engine_name, engine in engines.items():
|
||||
if hasattr(engine, 'init'):
|
||||
init_fn = getattr(engine, 'init')
|
||||
if init_fn:
|
||||
logger.debug('%s engine: Starting background initialization', engine_name)
|
||||
threading.Thread(target=engine_init, args=(engine_name, init_fn)).start()
|
||||
|
|
|
@ -52,7 +52,7 @@ def response(resp):
|
|||
to_results.append(to_result.text_content())
|
||||
|
||||
results.append({
|
||||
'url': urljoin(str(resp.url), '?%d' % k),
|
||||
'url': urljoin(resp.url, '?%d' % k),
|
||||
'title': from_result.text_content(),
|
||||
'content': '; '.join(to_results)
|
||||
})
|
||||
|
|
|
@ -1,18 +1,24 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""
|
||||
DuckDuckGo (Web)
|
||||
"""DuckDuckGo Lite
|
||||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import match_language, HTMLTextExtractor
|
||||
import re
|
||||
from searx.network import get
|
||||
|
||||
from lxml.html import fromstring
|
||||
|
||||
from searx.utils import (
|
||||
dict_subset,
|
||||
eval_xpath,
|
||||
eval_xpath_getindex,
|
||||
extract_text,
|
||||
match_language,
|
||||
)
|
||||
from searx.poolrequests import get
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://duckduckgo.com/',
|
||||
"website": 'https://lite.duckduckgo.com/lite',
|
||||
"wikidata_id": 'Q12805',
|
||||
"official_api_documentation": 'https://duckduckgo.com/api',
|
||||
"use_official_api": False,
|
||||
|
@ -21,13 +27,11 @@ about = {
|
|||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general']
|
||||
categories = ['general', 'web']
|
||||
paging = True
|
||||
supported_languages_url = 'https://duckduckgo.com/util/u172.js'
|
||||
number_of_results = 10
|
||||
supported_languages_url = 'https://duckduckgo.com/util/u588.js'
|
||||
time_range_support = True
|
||||
safesearch = True
|
||||
VQD_REGEX = r"vqd='(\d+-\d+-\d+)'"
|
||||
|
||||
language_aliases = {
|
||||
'ar-SA': 'ar-XA',
|
||||
'es-419': 'es-XL',
|
||||
|
@ -35,16 +39,14 @@ language_aliases = {
|
|||
'ko': 'kr-KR',
|
||||
'sl-SI': 'sl-SL',
|
||||
'zh-TW': 'tzh-TW',
|
||||
'zh-HK': 'tzh-HK'
|
||||
'zh-HK': 'tzh-HK',
|
||||
}
|
||||
|
||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||
|
||||
# search-url
|
||||
url = 'https://links.duckduckgo.com/d.js?'
|
||||
url_ping = 'https://duckduckgo.com/t/sl_h'
|
||||
time_range_dict = {'day': 'd',
|
||||
'week': 'w',
|
||||
'month': 'm',
|
||||
'year': 'y'}
|
||||
url = 'https://lite.duckduckgo.com/lite'
|
||||
url_ping = 'https://duckduckgo.com/t/sl_l'
|
||||
|
||||
|
||||
# match query's language to a region code that duckduckgo will accept
|
||||
|
@ -59,103 +61,111 @@ def get_region_code(lang, lang_list=None):
|
|||
return lang_parts[1].lower() + '-' + lang_parts[0].lower()
|
||||
|
||||
|
||||
def get_vqd(query, headers):
|
||||
resp = get(f"https://duckduckgo.com/?q={query}&ia=web", headers=headers)
|
||||
resp = re.findall(VQD_REGEX, resp.text)
|
||||
return resp[0]
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
params['method'] = 'GET'
|
||||
params['url'] = url
|
||||
params['method'] = 'POST'
|
||||
|
||||
vqd = get_vqd(query, params['headers'])
|
||||
dl, ct = match_language(params['language'], supported_languages, language_aliases, 'wt-WT').split('-')
|
||||
query_dict = {
|
||||
'q': query,
|
||||
't': 'D',
|
||||
'l': params['language'],
|
||||
'kl': f'{ct}-{dl}',
|
||||
's': (params['pageno'] - 1) * number_of_results,
|
||||
'dl': dl,
|
||||
'ct': ct,
|
||||
'ss_mkt': get_region_code(params['language'], supported_languages),
|
||||
'df': params['time_range'],
|
||||
'vqd': vqd,
|
||||
'ex': -2,
|
||||
'sp': '1',
|
||||
'bpa': '1',
|
||||
'biaexp': 'b',
|
||||
'msvrtexp': 'b'
|
||||
}
|
||||
if params['safesearch'] == 2: # STRICT
|
||||
del query_dict['t']
|
||||
query_dict['p'] = 1
|
||||
query_dict.update({
|
||||
'videxp': 'a',
|
||||
'nadse': 'b',
|
||||
'eclsexp': 'a',
|
||||
'stiaexp': 'a',
|
||||
'tjsexp': 'b',
|
||||
'related': 'b',
|
||||
'msnexp': 'a'
|
||||
})
|
||||
elif params['safesearch'] == 1: # MODERATE
|
||||
query_dict['ex'] = -1
|
||||
query_dict.update({
|
||||
'nadse': 'b',
|
||||
'eclsexp': 'b',
|
||||
'tjsexp': 'b'
|
||||
})
|
||||
else: # OFF
|
||||
query_dict['ex'] = -2
|
||||
query_dict.update({
|
||||
'nadse': 'b',
|
||||
'eclsexp': 'b',
|
||||
'tjsexp': 'b'
|
||||
})
|
||||
params['data']['q'] = query
|
||||
|
||||
params['allow_redirects'] = False
|
||||
params['data'] = query_dict
|
||||
params['cookies']['kl'] = params['data']['kl']
|
||||
# The API is not documented, so we do some reverse engineering and emulate
|
||||
# what https://lite.duckduckgo.com/lite/ does when you press "next Page"
|
||||
# link again and again ..
|
||||
|
||||
params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
|
||||
# initial page does not have an offset
|
||||
if params['pageno'] == 2:
|
||||
# second page does have an offset of 30
|
||||
offset = (params['pageno'] - 1) * 30
|
||||
params['data']['s'] = offset
|
||||
params['data']['dc'] = offset + 1
|
||||
|
||||
elif params['pageno'] > 2:
|
||||
# third and following pages do have an offset of 30 + n*50
|
||||
offset = 30 + (params['pageno'] - 2) * 50
|
||||
params['data']['s'] = offset
|
||||
params['data']['dc'] = offset + 1
|
||||
|
||||
# initial page does not have additional data in the input form
|
||||
if params['pageno'] > 1:
|
||||
# request the second page (and more pages) needs 'o' and 'api' arguments
|
||||
params['data']['o'] = 'json'
|
||||
params['data']['api'] = 'd.js'
|
||||
|
||||
# initial page does not have additional data in the input form
|
||||
if params['pageno'] > 2:
|
||||
# request the third page (and more pages) some more arguments
|
||||
params['data']['nextParams'] = ''
|
||||
params['data']['v'] = ''
|
||||
params['data']['vqd'] = ''
|
||||
|
||||
region_code = get_region_code(params['language'], supported_languages)
|
||||
if region_code:
|
||||
params['data']['kl'] = region_code
|
||||
params['cookies']['kl'] = region_code
|
||||
|
||||
params['data']['df'] = ''
|
||||
if params['time_range'] in time_range_dict:
|
||||
params['data']['df'] = time_range_dict[params['time_range']]
|
||||
params['cookies']['df'] = time_range_dict[params['time_range']]
|
||||
params['url'] = url + urlencode(params['data'])
|
||||
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
|
||||
headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
|
||||
get(url_ping, headers=headers_ping)
|
||||
|
||||
if resp.status_code == 303:
|
||||
return []
|
||||
|
||||
# parse the response
|
||||
results = []
|
||||
doc = fromstring(resp.text)
|
||||
|
||||
data = re.findall(r"DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'", str(resp.text))
|
||||
try:
|
||||
search_data = loads(data[0].replace('/\t/g', ' '))
|
||||
except IndexError:
|
||||
return
|
||||
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
|
||||
if not len(result_table) >= 3:
|
||||
# no more results
|
||||
return []
|
||||
result_table = result_table[2]
|
||||
|
||||
if len(search_data) == 1 and ('n' not in search_data[0]):
|
||||
only_result = search_data[0]
|
||||
if ((only_result.get('da') is not None and only_result.get('t') == 'EOF') or
|
||||
only_result.get('a') is not None or only_result.get('d') == 'google.com search'):
|
||||
return
|
||||
tr_rows = eval_xpath(result_table, './/tr')
|
||||
|
||||
for search_result in search_data:
|
||||
if 'n' in search_result:
|
||||
# In the last <tr> is the form of the 'previous/next page' links
|
||||
tr_rows = tr_rows[:-1]
|
||||
|
||||
len_tr_rows = len(tr_rows)
|
||||
offset = 0
|
||||
|
||||
while len_tr_rows >= offset + 4:
|
||||
|
||||
# assemble table rows we need to scrap
|
||||
tr_title = tr_rows[offset]
|
||||
tr_content = tr_rows[offset + 1]
|
||||
offset += 4
|
||||
|
||||
# ignore sponsored Adds <tr class="result-sponsored">
|
||||
if tr_content.get('class') == 'result-sponsored':
|
||||
continue
|
||||
title = HTMLTextExtractor()
|
||||
title.feed(search_result.get('t'))
|
||||
content = HTMLTextExtractor()
|
||||
content.feed(search_result.get('a'))
|
||||
|
||||
results.append({'title': title.get_text(),
|
||||
'content': content.get_text(),
|
||||
'url': search_result.get('u')})
|
||||
a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
|
||||
if a_tag is None:
|
||||
continue
|
||||
|
||||
td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
|
||||
if td_content is None:
|
||||
continue
|
||||
|
||||
results.append(
|
||||
{
|
||||
'title': a_tag.text_content(),
|
||||
'content': extract_text(td_content),
|
||||
'url': a_tag.get('href'),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
@ -165,7 +175,7 @@ def _fetch_supported_languages(resp):
|
|||
# response is a js file with regions as an embedded object
|
||||
response_page = resp.text
|
||||
response_page = response_page[response_page.find('regions:{') + 8:]
|
||||
response_page = response_page[:response_page.find('}') + 1]
|
||||
response_page = response_page[: response_page.find('}') + 1]
|
||||
|
||||
regions_json = loads(response_page)
|
||||
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
|
||||
|
|
|
@ -8,7 +8,7 @@ from urllib.parse import urlencode
|
|||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.engines.duckduckgo import get_region_code
|
||||
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
|
||||
from searx.network import get
|
||||
from searx.poolrequests import get
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
"""
|
||||
|
||||
from json import loads, dumps
|
||||
from requests.auth import HTTPBasicAuth
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
|
||||
|
||||
|
@ -31,7 +32,7 @@ def request(query, params):
|
|||
return params
|
||||
|
||||
if username and password:
|
||||
params['auth'] = (username, password)
|
||||
params['auth'] = HTTPBasicAuth(username, password)
|
||||
|
||||
params['url'] = search_url
|
||||
params['method'] = 'GET'
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
import re
|
||||
from json import loads, JSONDecodeError
|
||||
from urllib.parse import urlencode
|
||||
from searx.network import get
|
||||
from searx.exceptions import SearxEngineResponseException
|
||||
from searx.poolrequests import get
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
|
@ -10,7 +10,7 @@ Definitions`_.
|
|||
|
||||
# pylint: disable=invalid-name, missing-function-docstring, too-many-branches
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from urllib.parse import urlencode, urlparse
|
||||
from lxml import html
|
||||
from searx import logger
|
||||
from searx.utils import match_language, extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
||||
|
@ -194,7 +194,8 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
|||
return ret_val
|
||||
|
||||
def detect_google_sorry(resp):
|
||||
if resp.url.host == 'sorry.google.com' or resp.url.path.startswith('/sorry'):
|
||||
resp_url = urlparse(resp.url)
|
||||
if resp_url.netloc == 'sorry.google.com' or resp_url.path.startswith('/sorry'):
|
||||
raise SearxEngineCaptchaException()
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from flask_babel import gettext
|
|||
from lxml import etree
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
from searx.network import get
|
||||
from searx.poolrequests import get
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
|
@ -33,7 +33,7 @@ from flask_babel import gettext
|
|||
|
||||
from searx.utils import match_language
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.network import raise_for_httperror
|
||||
from searx.raise_for_httperror import raise_for_httperror
|
||||
|
||||
|
||||
# about
|
||||
|
@ -86,15 +86,14 @@ def request(query, params):
|
|||
|
||||
# add language tag
|
||||
if params['language'] == 'all':
|
||||
params['url'] += '&locale=en_us'
|
||||
params['url'] += '&locale=en_US'
|
||||
else:
|
||||
language = match_language(
|
||||
params['language'],
|
||||
# pylint: disable=undefined-variable
|
||||
supported_languages,
|
||||
language_aliases,
|
||||
)
|
||||
params['url'] += '&locale=' + language.replace('-', '_').lower()
|
||||
params['url'] += '&locale=' + language.replace('-', '_')
|
||||
|
||||
params['raise_for_httperror'] = False
|
||||
return params
|
||||
|
@ -113,7 +112,14 @@ def response(resp):
|
|||
|
||||
# check for an API error
|
||||
if search_results.get('status') != 'success':
|
||||
msg = ",".join(data.get('message', ['unknown', ]))
|
||||
msg = ",".join(
|
||||
data.get(
|
||||
'message',
|
||||
[
|
||||
'unknown',
|
||||
],
|
||||
)
|
||||
)
|
||||
raise SearxEngineAPIException('API error::' + msg)
|
||||
|
||||
# raise for other errors
|
||||
|
@ -155,11 +161,13 @@ def response(resp):
|
|||
|
||||
if mainline_type == 'web':
|
||||
content = item['desc']
|
||||
results.append({
|
||||
'title': title,
|
||||
'url': res_url,
|
||||
'content': content,
|
||||
})
|
||||
results.append(
|
||||
{
|
||||
'title': title,
|
||||
'url': res_url,
|
||||
'content': content,
|
||||
}
|
||||
)
|
||||
|
||||
elif mainline_type == 'news':
|
||||
|
||||
|
@ -170,23 +178,27 @@ def response(resp):
|
|||
img_src = None
|
||||
if news_media:
|
||||
img_src = news_media[0].get('pict', {}).get('url', None)
|
||||
results.append({
|
||||
'title': title,
|
||||
'url': res_url,
|
||||
'publishedDate': pub_date,
|
||||
'img_src': img_src,
|
||||
})
|
||||
results.append(
|
||||
{
|
||||
'title': title,
|
||||
'url': res_url,
|
||||
'publishedDate': pub_date,
|
||||
'img_src': img_src,
|
||||
}
|
||||
)
|
||||
|
||||
elif mainline_type == 'images':
|
||||
thumbnail = item['thumbnail']
|
||||
img_src = item['media']
|
||||
results.append({
|
||||
'title': title,
|
||||
'url': res_url,
|
||||
'template': 'images.html',
|
||||
'thumbnail_src': thumbnail,
|
||||
'img_src': img_src,
|
||||
})
|
||||
results.append(
|
||||
{
|
||||
'title': title,
|
||||
'url': res_url,
|
||||
'template': 'images.html',
|
||||
'thumbnail_src': thumbnail,
|
||||
'img_src': img_src,
|
||||
}
|
||||
)
|
||||
|
||||
elif mainline_type == 'videos':
|
||||
# some videos do not have a description: while qwant-video
|
||||
|
@ -210,19 +222,18 @@ def response(resp):
|
|||
thumbnail = item['thumbnail']
|
||||
# from some locations (DE and others?) the s2 link do
|
||||
# response a 'Please wait ..' but does not deliver the thumbnail
|
||||
thumbnail = thumbnail.replace(
|
||||
'https://s2.qwant.com',
|
||||
'https://s1.qwant.com', 1
|
||||
thumbnail = thumbnail.replace('https://s2.qwant.com', 'https://s1.qwant.com', 1)
|
||||
results.append(
|
||||
{
|
||||
'title': title,
|
||||
'url': res_url,
|
||||
'content': content,
|
||||
'publishedDate': pub_date,
|
||||
'thumbnail': thumbnail,
|
||||
'template': 'videos.html',
|
||||
'length': length,
|
||||
}
|
||||
)
|
||||
results.append({
|
||||
'title': title,
|
||||
'url': res_url,
|
||||
'content': content,
|
||||
'publishedDate': pub_date,
|
||||
'thumbnail': thumbnail,
|
||||
'template': 'videos.html',
|
||||
'length': length,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
@ -232,7 +243,7 @@ def _fetch_supported_languages(resp):
|
|||
# list of regions is embedded in page as a js object
|
||||
response_text = resp.text
|
||||
response_text = response_text[response_text.find('INITIAL_PROPS'):]
|
||||
response_text = response_text[response_text.find('{'):response_text.find('</script>')]
|
||||
response_text = response_text[response_text.find('{'): response_text.find('</script>')]
|
||||
|
||||
regions_json = loads(response_text)
|
||||
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
Seznam
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from urllib.parse import urlencode, urlparse
|
||||
from lxml import html
|
||||
from searx.network import get
|
||||
from searx.poolrequests import get
|
||||
from searx.exceptions import SearxEngineAccessDeniedException
|
||||
from searx.utils import (
|
||||
extract_text,
|
||||
|
@ -46,7 +46,8 @@ def request(query, params):
|
|||
|
||||
|
||||
def response(resp):
|
||||
if resp.url.path.startswith('/verify'):
|
||||
resp_url = urlparse(resp.url)
|
||||
if resp_url.path.startswith('/verify'):
|
||||
raise SearxEngineAccessDeniedException()
|
||||
|
||||
results = []
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
from lxml.html import fromstring
|
||||
from searx import logger
|
||||
from searx.utils import extract_text
|
||||
from searx.network import raise_for_httperror
|
||||
from searx.raise_for_httperror import raise_for_httperror
|
||||
|
||||
logger = logger.getChild('sjp engine')
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ from lxml import html
|
|||
from dateutil import parser
|
||||
from urllib.parse import quote_plus, urlencode
|
||||
from searx import logger
|
||||
from searx.network import get as http_get
|
||||
from searx.poolrequests import get as http_get
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
|
@ -5,10 +5,9 @@
|
|||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
import requests
|
||||
import base64
|
||||
|
||||
from searx.network import post as http_post
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://www.spotify.com',
|
||||
|
@ -39,7 +38,7 @@ def request(query, params):
|
|||
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
|
||||
|
||||
r = http_post(
|
||||
r = requests.post(
|
||||
'https://accounts.spotify.com/api/token',
|
||||
data={'grant_type': 'client_credentials'},
|
||||
headers={'Authorization': 'Basic ' + base64.b64encode(
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
Stackoverflow (IT)
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode, urljoin, urlparse
|
||||
from lxml import html
|
||||
from searx.utils import extract_text
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://stackoverflow.com/',
|
||||
"wikidata_id": 'Q549037',
|
||||
"official_api_documentation": 'https://api.stackexchange.com/docs',
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
paging = True
|
||||
|
||||
# search-url
|
||||
url = 'https://stackoverflow.com/'
|
||||
search_url = url + 'search?{query}&page={pageno}'
|
||||
|
||||
# specific xpath variables
|
||||
results_xpath = '//div[contains(@class,"question-summary")]'
|
||||
link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
|
||||
content_xpath = './/div[@class="excerpt"]'
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
|
||||
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
resp_url = urlparse(resp.url)
|
||||
if resp_url.path.startswith('/nocaptcha'):
|
||||
raise SearxEngineCaptchaException()
|
||||
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath(results_xpath):
|
||||
link = result.xpath(link_xpath)[0]
|
||||
href = urljoin(url, link.attrib.get('href'))
|
||||
title = extract_text(link)
|
||||
content = extract_text(result.xpath(content_xpath))
|
||||
|
||||
# append result
|
||||
results.append({'url': href,
|
||||
'title': title,
|
||||
'content': content})
|
||||
|
||||
# return results
|
||||
return results
|
|
@ -17,7 +17,7 @@ from babel import Locale
|
|||
from babel.localedata import locale_identifiers
|
||||
|
||||
from searx import logger
|
||||
from searx.network import get
|
||||
from searx.poolrequests import get
|
||||
from searx.utils import extract_text, eval_xpath, match_language
|
||||
from searx.exceptions import (
|
||||
SearxEngineResponseException,
|
||||
|
|
|
@ -12,7 +12,7 @@ from babel.dates import format_datetime, format_date, format_time, get_datetime_
|
|||
|
||||
from searx import logger
|
||||
from searx.data import WIKIDATA_UNITS
|
||||
from searx.network import post, get
|
||||
from searx.poolrequests import post, get
|
||||
from searx.utils import match_language, searx_useragent, get_string_replaces_function
|
||||
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
|
||||
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
|
||||
|
|
|
@ -7,7 +7,7 @@ from urllib.parse import quote
|
|||
from json import loads
|
||||
from lxml.html import fromstring
|
||||
from searx.utils import match_language, searx_useragent
|
||||
from searx.network import raise_for_httperror
|
||||
from searx.raise_for_httperror import raise_for_httperror
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
|
@ -7,7 +7,7 @@ from json import loads
|
|||
from time import time
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.network import get as http_get
|
||||
from searx.poolrequests import get as http_get
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
from lxml.html import fromstring
|
||||
from searx import logger
|
||||
from searx.utils import extract_text
|
||||
from searx.network import raise_for_httperror
|
||||
from searx.raise_for_httperror import raise_for_httperror
|
||||
|
||||
logger = logger.getChild('Wordnik engine')
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from json import loads
|
|||
from dateutil import parser
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from httpx import DigestAuth
|
||||
from requests.auth import HTTPDigestAuth
|
||||
|
||||
from searx.utils import html_to_text
|
||||
|
||||
|
@ -56,7 +56,7 @@ def request(query, params):
|
|||
search_type=search_type)
|
||||
|
||||
if http_digest_auth_user and http_digest_auth_pass:
|
||||
params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
|
||||
params['auth'] = HTTPDigestAuth(http_digest_auth_user, http_digest_auth_pass)
|
||||
|
||||
# add language tag if specified
|
||||
if params['language'] != 'all':
|
||||
|
|
|
@ -8,7 +8,7 @@ from operator import itemgetter
|
|||
from datetime import datetime
|
||||
from urllib.parse import quote
|
||||
from searx.utils import extract_text, get_torrent_size
|
||||
from searx.network import get as http_get
|
||||
from searx.poolrequests import get as http_get
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
@ -39,7 +39,7 @@ cookies = dict()
|
|||
def init(engine_settings=None):
|
||||
global cookies # pylint: disable=global-variable-not-assigned
|
||||
# initial cookies
|
||||
resp = http_get(url, follow_redirects=False)
|
||||
resp = http_get(url)
|
||||
if resp.ok:
|
||||
for r in resp.history:
|
||||
cookies.update(r.cookies)
|
||||
|
|
|
@ -3,7 +3,7 @@ import inspect
|
|||
import logging
|
||||
from json import JSONDecodeError
|
||||
from urllib.parse import urlparse
|
||||
from httpx import HTTPError, HTTPStatusError
|
||||
from requests.exceptions import RequestException
|
||||
from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
|
||||
SearxEngineAccessDeniedException)
|
||||
from searx import logger
|
||||
|
@ -60,28 +60,28 @@ def get_trace(traces):
|
|||
return traces[-1]
|
||||
|
||||
|
||||
def get_hostname(exc: HTTPError) -> typing.Optional[None]:
|
||||
def get_hostname(exc: RequestException) -> typing.Optional[None]:
|
||||
url = exc.request.url
|
||||
if url is None and exc.response is not None:
|
||||
url = exc.response.url
|
||||
return urlparse(url).netloc
|
||||
|
||||
|
||||
def get_request_exception_messages(exc: HTTPError)\
|
||||
def get_request_exception_messages(exc: RequestException)\
|
||||
-> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
|
||||
url = None
|
||||
status_code = None
|
||||
reason = None
|
||||
hostname = None
|
||||
if hasattr(exc, 'request') and exc.request is not None:
|
||||
if exc.request is not None:
|
||||
url = exc.request.url
|
||||
if url is None and hasattr(exc, 'response') and exc.respones is not None:
|
||||
if url is None and exc.response is not None:
|
||||
url = exc.response.url
|
||||
if url is not None:
|
||||
hostname = url.host
|
||||
if isinstance(exc, HTTPStatusError):
|
||||
hostname = str(urlparse(url).netloc)
|
||||
if exc.response is not None:
|
||||
status_code = str(exc.response.status_code)
|
||||
reason = exc.response.reason_phrase
|
||||
reason = exc.response.reason
|
||||
return (status_code, reason, hostname)
|
||||
|
||||
|
||||
|
@ -92,7 +92,7 @@ def get_messages(exc, filename) -> typing.Tuple:
|
|||
return (str(exc), )
|
||||
if isinstance(exc, ValueError) and 'lxml' in filename:
|
||||
return (str(exc), )
|
||||
if isinstance(exc, HTTPError):
|
||||
if isinstance(exc, RequestException):
|
||||
return get_request_exception_messages(exc)
|
||||
if isinstance(exc, SearxXPathSyntaxException):
|
||||
return (exc.xpath_str, exc.message)
|
||||
|
|
|
@ -1,188 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
import asyncio
|
||||
import threading
|
||||
import concurrent.futures
|
||||
from time import time
|
||||
from queue import SimpleQueue
|
||||
from types import MethodType
|
||||
|
||||
import httpx
|
||||
import h2.exceptions
|
||||
|
||||
from .network import get_network, initialize, check_network_configuration
|
||||
from .client import get_loop
|
||||
from .raise_for_httperror import raise_for_httperror
|
||||
|
||||
|
||||
THREADLOCAL = threading.local()
|
||||
|
||||
|
||||
def reset_time_for_thread():
|
||||
THREADLOCAL.total_time = 0
|
||||
|
||||
|
||||
def get_time_for_thread():
|
||||
return THREADLOCAL.total_time
|
||||
|
||||
|
||||
def set_timeout_for_thread(timeout, start_time=None):
|
||||
THREADLOCAL.timeout = timeout
|
||||
THREADLOCAL.start_time = start_time
|
||||
|
||||
|
||||
def set_context_network_name(network_name):
|
||||
THREADLOCAL.network = get_network(network_name)
|
||||
|
||||
|
||||
def get_context_network():
|
||||
try:
|
||||
return THREADLOCAL.network
|
||||
except AttributeError:
|
||||
return get_network()
|
||||
|
||||
|
||||
def request(method, url, **kwargs):
|
||||
"""same as requests/requests/api.py request(...)"""
|
||||
time_before_request = time()
|
||||
|
||||
# timeout (httpx)
|
||||
if 'timeout' in kwargs:
|
||||
timeout = kwargs['timeout']
|
||||
else:
|
||||
timeout = getattr(THREADLOCAL, 'timeout', None)
|
||||
if timeout is not None:
|
||||
kwargs['timeout'] = timeout
|
||||
|
||||
# 2 minutes timeout for the requests without timeout
|
||||
timeout = timeout or 120
|
||||
|
||||
# ajdust actual timeout
|
||||
timeout += 0.2 # overhead
|
||||
start_time = getattr(THREADLOCAL, 'start_time', time_before_request)
|
||||
if start_time:
|
||||
timeout -= time() - start_time
|
||||
|
||||
# raise_for_error
|
||||
check_for_httperror = True
|
||||
if 'raise_for_httperror' in kwargs:
|
||||
check_for_httperror = kwargs['raise_for_httperror']
|
||||
del kwargs['raise_for_httperror']
|
||||
|
||||
# requests compatibility
|
||||
if isinstance(url, bytes):
|
||||
url = url.decode()
|
||||
|
||||
# network
|
||||
network = get_context_network()
|
||||
|
||||
# do request
|
||||
future = asyncio.run_coroutine_threadsafe(network.request(method, url, **kwargs), get_loop())
|
||||
try:
|
||||
response = future.result(timeout)
|
||||
except concurrent.futures.TimeoutError as e:
|
||||
raise httpx.TimeoutException('Timeout', request=None) from e
|
||||
|
||||
# requests compatibility
|
||||
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
||||
response.ok = not response.is_error
|
||||
|
||||
# update total_time.
|
||||
# See get_time_for_thread() and reset_time_for_thread()
|
||||
if hasattr(THREADLOCAL, 'total_time'):
|
||||
time_after_request = time()
|
||||
THREADLOCAL.total_time += time_after_request - time_before_request
|
||||
|
||||
# raise an exception
|
||||
if check_for_httperror:
|
||||
raise_for_httperror(response)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def get(url, **kwargs):
|
||||
kwargs.setdefault('follow_redirects', True)
|
||||
return request('get', url, **kwargs)
|
||||
|
||||
|
||||
def options(url, **kwargs):
|
||||
kwargs.setdefault('follow_redirects', True)
|
||||
return request('options', url, **kwargs)
|
||||
|
||||
|
||||
def head(url, **kwargs):
|
||||
kwargs.setdefault('follow_redirects', False)
|
||||
return request('head', url, **kwargs)
|
||||
|
||||
|
||||
def post(url, data=None, **kwargs):
|
||||
return request('post', url, data=data, **kwargs)
|
||||
|
||||
|
||||
def put(url, data=None, **kwargs):
|
||||
return request('put', url, data=data, **kwargs)
|
||||
|
||||
|
||||
def patch(url, data=None, **kwargs):
|
||||
return request('patch', url, data=data, **kwargs)
|
||||
|
||||
|
||||
def delete(url, **kwargs):
|
||||
return request('delete', url, **kwargs)
|
||||
|
||||
|
||||
async def stream_chunk_to_queue(network, q, method, url, **kwargs):
|
||||
try:
|
||||
async with await network.stream(method, url, **kwargs) as response:
|
||||
q.put(response)
|
||||
# aiter_raw: access the raw bytes on the response without applying any HTTP content decoding
|
||||
# https://www.python-httpx.org/quickstart/#streaming-responses
|
||||
async for chunk in response.aiter_bytes(65536):
|
||||
if len(chunk) > 0:
|
||||
q.put(chunk)
|
||||
except httpx.ResponseClosed as e:
|
||||
# the response was closed
|
||||
pass
|
||||
except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e:
|
||||
q.put(e)
|
||||
finally:
|
||||
q.put(None)
|
||||
|
||||
|
||||
def _close_response_method(self):
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
self.aclose(),
|
||||
get_loop()
|
||||
)
|
||||
|
||||
|
||||
def stream(method, url, **kwargs):
|
||||
"""Replace httpx.stream.
|
||||
|
||||
Usage:
|
||||
stream = poolrequests.stream(...)
|
||||
response = next(stream)
|
||||
for chunk in stream:
|
||||
...
|
||||
|
||||
httpx.Client.stream requires to write the httpx.HTTPTransport version of the
|
||||
the httpx.AsyncHTTPTransport declared above.
|
||||
"""
|
||||
q = SimpleQueue()
|
||||
future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(get_network(), q, method, url, **kwargs),
|
||||
get_loop())
|
||||
# yield response
|
||||
response = q.get()
|
||||
if isinstance(response, Exception):
|
||||
raise response
|
||||
response.close = MethodType(_close_response_method, response)
|
||||
yield response
|
||||
|
||||
# yield chunks
|
||||
chunk_or_exception = q.get()
|
||||
while chunk_or_exception is not None:
|
||||
if isinstance(chunk_or_exception, Exception):
|
||||
raise chunk_or_exception
|
||||
yield chunk_or_exception
|
||||
chunk_or_exception = q.get()
|
||||
future.result()
|
|
@ -1,167 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import threading
|
||||
import uvloop
|
||||
|
||||
import httpx
|
||||
from httpx_socks import AsyncProxyTransport
|
||||
from python_socks import (
|
||||
parse_proxy_url,
|
||||
ProxyConnectionError,
|
||||
ProxyTimeoutError,
|
||||
ProxyError
|
||||
)
|
||||
import python_socks._errors
|
||||
|
||||
from searx import logger
|
||||
|
||||
|
||||
logger = logger.getChild('searx.http.client')
|
||||
LOOP = None
|
||||
SSLCONTEXTS = {}
|
||||
TRANSPORT_KWARGS = {
|
||||
'trust_env': False,
|
||||
}
|
||||
|
||||
|
||||
def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False):
|
||||
global SSLCONTEXTS
|
||||
key = (proxy_url, cert, verify, trust_env, http2)
|
||||
if key not in SSLCONTEXTS:
|
||||
SSLCONTEXTS[key] = httpx.create_ssl_context(cert, verify, trust_env, http2)
|
||||
return SSLCONTEXTS[key]
|
||||
|
||||
|
||||
class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport):
|
||||
"""Block HTTP request"""
|
||||
|
||||
async def handle_async_request(self, request):
|
||||
raise httpx.UnsupportedProtocol('HTTP protocol is disabled')
|
||||
|
||||
|
||||
class AsyncProxyTransportFixed(AsyncProxyTransport):
|
||||
"""Fix httpx_socks.AsyncProxyTransport
|
||||
|
||||
Map python_socks exceptions to httpx.ProxyError exceptions
|
||||
"""
|
||||
|
||||
async def handle_async_request(self, request):
|
||||
try:
|
||||
return await super().handle_async_request(request)
|
||||
except ProxyConnectionError as e:
|
||||
raise httpx.ProxyError("ProxyConnectionError: " + e.strerror, request=request) from e
|
||||
except ProxyTimeoutError as e:
|
||||
raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e
|
||||
except ProxyError as e:
|
||||
raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e
|
||||
|
||||
|
||||
def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries):
|
||||
# support socks5h (requests compatibility):
|
||||
# https://requests.readthedocs.io/en/master/user/advanced/#socks
|
||||
# socks5:// hostname is resolved on client side
|
||||
# socks5h:// hostname is resolved on proxy side
|
||||
rdns = False
|
||||
socks5h = 'socks5h://'
|
||||
if proxy_url.startswith(socks5h):
|
||||
proxy_url = 'socks5://' + proxy_url[len(socks5h):]
|
||||
rdns = True
|
||||
|
||||
proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url)
|
||||
verify = get_sslcontexts(proxy_url, None, True, False, http2) if verify is True else verify
|
||||
return AsyncProxyTransportFixed(
|
||||
proxy_type=proxy_type,
|
||||
proxy_host=proxy_host,
|
||||
proxy_port=proxy_port,
|
||||
username=proxy_username,
|
||||
password=proxy_password,
|
||||
rdns=rdns,
|
||||
loop=get_loop(),
|
||||
verify=verify,
|
||||
http2=http2,
|
||||
local_address=local_address,
|
||||
limits=limit,
|
||||
retries=retries,
|
||||
**TRANSPORT_KWARGS,
|
||||
)
|
||||
|
||||
|
||||
def get_transport(verify, http2, local_address, proxy_url, limit, retries):
|
||||
verify = get_sslcontexts(None, None, True, False, http2) if verify is True else verify
|
||||
return httpx.AsyncHTTPTransport(
|
||||
# pylint: disable=protected-access
|
||||
verify=verify,
|
||||
http2=http2,
|
||||
limits=limit,
|
||||
proxy=httpx._config.Proxy(proxy_url) if proxy_url else None,
|
||||
local_address=local_address,
|
||||
retries=retries,
|
||||
**TRANSPORT_KWARGS,
|
||||
)
|
||||
|
||||
|
||||
def iter_proxies(proxies):
|
||||
# https://www.python-httpx.org/compatibility/#proxy-keys
|
||||
if isinstance(proxies, str):
|
||||
yield 'all://', proxies
|
||||
elif isinstance(proxies, dict):
|
||||
for pattern, proxy_url in proxies.items():
|
||||
yield pattern, proxy_url
|
||||
|
||||
|
||||
def new_client(enable_http, verify, enable_http2,
|
||||
max_connections, max_keepalive_connections, keepalive_expiry,
|
||||
proxies, local_address, retries, max_redirects, hook_log_response):
|
||||
limit = httpx.Limits(max_connections=max_connections,
|
||||
max_keepalive_connections=max_keepalive_connections,
|
||||
keepalive_expiry=keepalive_expiry)
|
||||
# See https://www.python-httpx.org/advanced/#routing
|
||||
mounts = {}
|
||||
for pattern, proxy_url in iter_proxies(proxies):
|
||||
if not enable_http and (pattern == 'http' or pattern.startswith('http://')):
|
||||
continue
|
||||
if proxy_url.startswith('socks4://') \
|
||||
or proxy_url.startswith('socks5://') \
|
||||
or proxy_url.startswith('socks5h://'):
|
||||
mounts[pattern] = get_transport_for_socks_proxy(verify, enable_http2, local_address, proxy_url, limit,
|
||||
retries)
|
||||
else:
|
||||
mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries)
|
||||
|
||||
if not enable_http:
|
||||
mounts['http://'] = AsyncHTTPTransportNoHttp()
|
||||
|
||||
transport = get_transport(verify, enable_http2, local_address, None, limit, retries)
|
||||
event_hooks = None
|
||||
if hook_log_response:
|
||||
event_hooks = {'response': [hook_log_response]}
|
||||
return httpx.AsyncClient(transport=transport, mounts=mounts, max_redirects=max_redirects, event_hooks=event_hooks)
|
||||
|
||||
|
||||
def get_loop():
|
||||
global LOOP
|
||||
return LOOP
|
||||
|
||||
|
||||
def init():
|
||||
# log
|
||||
for logger_name in ('hpack.hpack', 'hpack.table'):
|
||||
logging.getLogger(logger_name).setLevel(logging.WARNING)
|
||||
|
||||
# loop
|
||||
def loop_thread():
|
||||
global LOOP
|
||||
LOOP = asyncio.new_event_loop()
|
||||
LOOP.run_forever()
|
||||
|
||||
th = threading.Thread(
|
||||
target=loop_thread,
|
||||
name='asyncio_loop',
|
||||
daemon=True,
|
||||
)
|
||||
th.start()
|
||||
|
||||
|
||||
init()
|
|
@ -1,402 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
# pylint: disable=global-statement
|
||||
# pylint: disable=missing-module-docstring, missing-class-docstring
|
||||
|
||||
import atexit
|
||||
import asyncio
|
||||
import ipaddress
|
||||
from itertools import cycle
|
||||
from typing import Dict
|
||||
|
||||
import httpx
|
||||
|
||||
from searx import logger, searx_debug
|
||||
from .client import new_client, get_loop, AsyncHTTPTransportNoHttp
|
||||
|
||||
|
||||
logger = logger.getChild('network')
|
||||
DEFAULT_NAME = '__DEFAULT__'
|
||||
NETWORKS: Dict[str, 'Network'] = {}
|
||||
# requests compatibility when reading proxy settings from settings.yml
|
||||
PROXY_PATTERN_MAPPING = {
|
||||
'http': 'http://',
|
||||
'https': 'https://',
|
||||
'socks4': 'socks4://',
|
||||
'socks5': 'socks5://',
|
||||
'socks5h': 'socks5h://',
|
||||
'http:': 'http://',
|
||||
'https:': 'https://',
|
||||
'socks4:': 'socks4://',
|
||||
'socks5:': 'socks5://',
|
||||
'socks5h:': 'socks5h://',
|
||||
}
|
||||
|
||||
ADDRESS_MAPPING = {'ipv4': '0.0.0.0', 'ipv6': '::'}
|
||||
|
||||
|
||||
class Network:
|
||||
|
||||
__slots__ = (
|
||||
'enable_http',
|
||||
'verify',
|
||||
'enable_http2',
|
||||
'max_connections',
|
||||
'max_keepalive_connections',
|
||||
'keepalive_expiry',
|
||||
'local_addresses',
|
||||
'proxies',
|
||||
'using_tor_proxy',
|
||||
'max_redirects',
|
||||
'retries',
|
||||
'retry_on_http_error',
|
||||
'_local_addresses_cycle',
|
||||
'_proxies_cycle',
|
||||
'_clients',
|
||||
'_logger',
|
||||
)
|
||||
|
||||
_TOR_CHECK_RESULT = {}
|
||||
|
||||
def __init__(
|
||||
# pylint: disable=too-many-arguments
|
||||
self,
|
||||
enable_http=True,
|
||||
verify=True,
|
||||
enable_http2=False,
|
||||
max_connections=None,
|
||||
max_keepalive_connections=None,
|
||||
keepalive_expiry=None,
|
||||
proxies=None,
|
||||
using_tor_proxy=False,
|
||||
local_addresses=None,
|
||||
retries=0,
|
||||
retry_on_http_error=None,
|
||||
max_redirects=30,
|
||||
logger_name=None,
|
||||
):
|
||||
|
||||
self.enable_http = enable_http
|
||||
self.verify = verify
|
||||
self.enable_http2 = enable_http2
|
||||
self.max_connections = max_connections
|
||||
self.max_keepalive_connections = max_keepalive_connections
|
||||
self.keepalive_expiry = keepalive_expiry
|
||||
self.proxies = proxies
|
||||
self.using_tor_proxy = using_tor_proxy
|
||||
self.local_addresses = local_addresses
|
||||
self.retries = retries
|
||||
self.retry_on_http_error = retry_on_http_error
|
||||
self.max_redirects = max_redirects
|
||||
self._local_addresses_cycle = self.get_ipaddress_cycle()
|
||||
self._proxies_cycle = self.get_proxy_cycles()
|
||||
self._clients = {}
|
||||
self._logger = logger.getChild(logger_name) if logger_name else logger
|
||||
self.check_parameters()
|
||||
|
||||
def check_parameters(self):
|
||||
for address in self.iter_ipaddresses():
|
||||
if '/' in address:
|
||||
ipaddress.ip_network(address, False)
|
||||
else:
|
||||
ipaddress.ip_address(address)
|
||||
|
||||
if self.proxies is not None and not isinstance(self.proxies, (str, dict)):
|
||||
raise ValueError('proxies type has to be str, dict or None')
|
||||
|
||||
def iter_ipaddresses(self):
|
||||
local_addresses = self.local_addresses
|
||||
if not local_addresses:
|
||||
return
|
||||
if isinstance(local_addresses, str):
|
||||
local_addresses = [local_addresses]
|
||||
for address in local_addresses:
|
||||
yield address
|
||||
|
||||
def get_ipaddress_cycle(self):
|
||||
while True:
|
||||
count = 0
|
||||
for address in self.iter_ipaddresses():
|
||||
if '/' in address:
|
||||
for a in ipaddress.ip_network(address, False).hosts():
|
||||
yield str(a)
|
||||
count += 1
|
||||
else:
|
||||
a = ipaddress.ip_address(address)
|
||||
yield str(a)
|
||||
count += 1
|
||||
if count == 0:
|
||||
yield None
|
||||
|
||||
def iter_proxies(self):
|
||||
if not self.proxies:
|
||||
return
|
||||
# https://www.python-httpx.org/compatibility/#proxy-keys
|
||||
if isinstance(self.proxies, str):
|
||||
yield 'all://', [self.proxies]
|
||||
else:
|
||||
for pattern, proxy_url in self.proxies.items():
|
||||
pattern = PROXY_PATTERN_MAPPING.get(pattern, pattern)
|
||||
if isinstance(proxy_url, str):
|
||||
proxy_url = [proxy_url]
|
||||
yield pattern, proxy_url
|
||||
|
||||
def get_proxy_cycles(self):
|
||||
proxy_settings = {}
|
||||
for pattern, proxy_urls in self.iter_proxies():
|
||||
proxy_settings[pattern] = cycle(proxy_urls)
|
||||
while True:
|
||||
# pylint: disable=stop-iteration-return
|
||||
yield tuple((pattern, next(proxy_url_cycle)) for pattern, proxy_url_cycle in proxy_settings.items())
|
||||
|
||||
async def log_response(self, response: httpx.Response):
|
||||
request = response.request
|
||||
status = f"{response.status_code} {response.reason_phrase}"
|
||||
response_line = f"{response.http_version} {status}"
|
||||
content_type = response.headers.get("Content-Type")
|
||||
content_type = f' ({content_type})' if content_type else ''
|
||||
self._logger.debug(f'HTTP Request: {request.method} {request.url} "{response_line}"{content_type}')
|
||||
|
||||
@staticmethod
|
||||
async def check_tor_proxy(client: httpx.AsyncClient, proxies) -> bool:
|
||||
if proxies in Network._TOR_CHECK_RESULT:
|
||||
return Network._TOR_CHECK_RESULT[proxies]
|
||||
|
||||
result = True
|
||||
# ignore client._transport because it is not used with all://
|
||||
for transport in client._mounts.values(): # pylint: disable=protected-access
|
||||
if isinstance(transport, AsyncHTTPTransportNoHttp):
|
||||
continue
|
||||
if getattr(transport, '_pool') and getattr(transport._pool, '_rdns', False):
|
||||
continue
|
||||
return False
|
||||
response = await client.get("https://check.torproject.org/api/ip", timeout=10)
|
||||
if not response.json()["IsTor"]:
|
||||
result = False
|
||||
Network._TOR_CHECK_RESULT[proxies] = result
|
||||
return result
|
||||
|
||||
async def get_client(self, verify=None, max_redirects=None):
|
||||
verify = self.verify if verify is None else verify
|
||||
max_redirects = self.max_redirects if max_redirects is None else max_redirects
|
||||
local_address = next(self._local_addresses_cycle)
|
||||
proxies = next(self._proxies_cycle) # is a tuple so it can be part of the key
|
||||
key = (verify, max_redirects, local_address, proxies)
|
||||
hook_log_response = self.log_response if searx_debug else None
|
||||
if key not in self._clients or self._clients[key].is_closed:
|
||||
client = new_client(
|
||||
self.enable_http,
|
||||
verify,
|
||||
self.enable_http2,
|
||||
self.max_connections,
|
||||
self.max_keepalive_connections,
|
||||
self.keepalive_expiry,
|
||||
dict(proxies),
|
||||
local_address,
|
||||
0,
|
||||
max_redirects,
|
||||
hook_log_response,
|
||||
)
|
||||
if self.using_tor_proxy and not await self.check_tor_proxy(client, proxies):
|
||||
await client.aclose()
|
||||
raise httpx.ProxyError('Network configuration problem: not using Tor')
|
||||
self._clients[key] = client
|
||||
return self._clients[key]
|
||||
|
||||
async def aclose(self):
|
||||
async def close_client(client):
|
||||
try:
|
||||
await client.aclose()
|
||||
except httpx.HTTPError:
|
||||
pass
|
||||
|
||||
await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False)
|
||||
|
||||
@staticmethod
|
||||
def extract_kwargs_clients(kwargs):
|
||||
kwargs_clients = {}
|
||||
if 'verify' in kwargs:
|
||||
kwargs_clients['verify'] = kwargs.pop('verify')
|
||||
if 'max_redirects' in kwargs:
|
||||
kwargs_clients['max_redirects'] = kwargs.pop('max_redirects')
|
||||
if 'allow_redirects' in kwargs:
|
||||
# see https://github.com/encode/httpx/pull/1808
|
||||
kwargs['follow_redirects'] = kwargs.pop('allow_redirects')
|
||||
return kwargs_clients
|
||||
|
||||
def is_valid_response(self, response):
|
||||
# pylint: disable=too-many-boolean-expressions
|
||||
if (
|
||||
(self.retry_on_http_error is True and 400 <= response.status_code <= 599)
|
||||
or (isinstance(self.retry_on_http_error, list) and response.status_code in self.retry_on_http_error)
|
||||
or (isinstance(self.retry_on_http_error, int) and response.status_code == self.retry_on_http_error)
|
||||
):
|
||||
return False
|
||||
return True
|
||||
|
||||
async def call_client(self, stream, method, url, **kwargs):
|
||||
retries = self.retries
|
||||
was_disconnected = False
|
||||
kwargs_clients = Network.extract_kwargs_clients(kwargs)
|
||||
while retries >= 0: # pragma: no cover
|
||||
client = await self.get_client(**kwargs_clients)
|
||||
try:
|
||||
if stream:
|
||||
response = client.stream(method, url, **kwargs)
|
||||
else:
|
||||
response = await client.request(method, url, **kwargs)
|
||||
if self.is_valid_response(response) or retries <= 0:
|
||||
return response
|
||||
except httpx.RemoteProtocolError as e:
|
||||
if not was_disconnected:
|
||||
# the server has closed the connection:
|
||||
# try again without decreasing the retries variable & with a new HTTP client
|
||||
was_disconnected = True
|
||||
await client.aclose()
|
||||
self._logger.warning('httpx.RemoteProtocolError: the server has disconnected, retrying')
|
||||
continue
|
||||
if retries <= 0:
|
||||
raise e
|
||||
except (httpx.RequestError, httpx.HTTPStatusError) as e:
|
||||
if retries <= 0:
|
||||
raise e
|
||||
retries -= 1
|
||||
|
||||
async def request(self, method, url, **kwargs):
|
||||
return await self.call_client(False, method, url, **kwargs)
|
||||
|
||||
async def stream(self, method, url, **kwargs):
|
||||
return await self.call_client(True, method, url, **kwargs)
|
||||
|
||||
@classmethod
|
||||
async def aclose_all(cls):
|
||||
await asyncio.gather(*[network.aclose() for network in NETWORKS.values()], return_exceptions=False)
|
||||
|
||||
|
||||
def get_network(name=None):
|
||||
return NETWORKS.get(name or DEFAULT_NAME)
|
||||
|
||||
|
||||
def check_network_configuration():
|
||||
async def check():
|
||||
exception_count = 0
|
||||
for network in NETWORKS.values():
|
||||
if network.using_tor_proxy:
|
||||
try:
|
||||
await network.get_client()
|
||||
except Exception: # pylint: disable=broad-except
|
||||
network._logger.exception('Error') # pylint: disable=protected-access
|
||||
exception_count += 1
|
||||
return exception_count
|
||||
|
||||
future = asyncio.run_coroutine_threadsafe(check(), get_loop())
|
||||
exception_count = future.result()
|
||||
if exception_count > 0:
|
||||
raise RuntimeError("Invalid network configuration")
|
||||
|
||||
|
||||
def initialize(settings_engines=None, settings_outgoing=None):
|
||||
# pylint: disable=import-outside-toplevel)
|
||||
from searx.engines import engines
|
||||
from searx import settings
|
||||
|
||||
# pylint: enable=import-outside-toplevel)
|
||||
|
||||
settings_engines = settings_engines or settings['engines']
|
||||
settings_outgoing = settings_outgoing or settings['outgoing']
|
||||
|
||||
# default parameters for AsyncHTTPTransport
|
||||
# see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121 # nopep8
|
||||
default_params = {
|
||||
'enable_http': False,
|
||||
'verify': True,
|
||||
'enable_http2': settings_outgoing.get('enable_http2', True),
|
||||
'max_connections': settings_outgoing.get('pool_connections', 100),
|
||||
'max_keepalive_connections': settings_outgoing.get('pool_maxsize', 10),
|
||||
'keepalive_expiry': settings_outgoing.get('keepalive_expiry', 5.0),
|
||||
'local_addresses': settings_outgoing.get('source_ips', []),
|
||||
'using_tor_proxy': settings_outgoing.get('using_tor_proxy', False),
|
||||
'proxies': settings_outgoing.get('proxies', None),
|
||||
'max_redirects': settings_outgoing.get('max_redirects', 30),
|
||||
'retries': settings_outgoing.get('retries', 0),
|
||||
'retry_on_http_error': None,
|
||||
}
|
||||
|
||||
def new_network(params, logger_name=None):
|
||||
nonlocal default_params
|
||||
result = {}
|
||||
result.update(default_params)
|
||||
result.update(params)
|
||||
if logger_name:
|
||||
result['logger_name'] = logger_name
|
||||
return Network(**result)
|
||||
|
||||
def iter_networks():
|
||||
nonlocal settings_engines
|
||||
for engine_spec in settings_engines:
|
||||
engine_name = engine_spec['name']
|
||||
engine = engines.get(engine_name)
|
||||
if engine is None:
|
||||
continue
|
||||
network = getattr(engine, 'network', None)
|
||||
yield engine_name, engine, network
|
||||
|
||||
if NETWORKS:
|
||||
done()
|
||||
NETWORKS.clear()
|
||||
NETWORKS[DEFAULT_NAME] = new_network({}, logger_name='default')
|
||||
NETWORKS['ipv4'] = new_network({'local_addresses': '0.0.0.0'}, logger_name='ipv4')
|
||||
NETWORKS['ipv6'] = new_network({'local_addresses': '::'}, logger_name='ipv6')
|
||||
|
||||
# define networks from outgoing.networks
|
||||
for network_name, network in settings_outgoing.get('networks', {}).items():
|
||||
NETWORKS[network_name] = new_network(network, logger_name=network_name)
|
||||
|
||||
# define networks from engines.[i].network (except references)
|
||||
for engine_name, engine, network in iter_networks():
|
||||
if network is None:
|
||||
network = {}
|
||||
for attribute_name, attribute_value in default_params.items():
|
||||
if hasattr(engine, attribute_name):
|
||||
network[attribute_name] = getattr(engine, attribute_name)
|
||||
else:
|
||||
network[attribute_name] = attribute_value
|
||||
NETWORKS[engine_name] = new_network(network, logger_name=engine_name)
|
||||
elif isinstance(network, dict):
|
||||
NETWORKS[engine_name] = new_network(network, logger_name=engine_name)
|
||||
|
||||
# define networks from engines.[i].network (references)
|
||||
for engine_name, engine, network in iter_networks():
|
||||
if isinstance(network, str):
|
||||
NETWORKS[engine_name] = NETWORKS[network]
|
||||
|
||||
# the /image_proxy endpoint has a dedicated network.
|
||||
# same parameters than the default network, but HTTP/2 is disabled.
|
||||
# It decreases the CPU load average, and the total time is more or less the same
|
||||
if 'image_proxy' not in NETWORKS:
|
||||
image_proxy_params = default_params.copy()
|
||||
image_proxy_params['enable_http2'] = False
|
||||
NETWORKS['image_proxy'] = new_network(image_proxy_params, logger_name='image_proxy')
|
||||
|
||||
|
||||
@atexit.register
|
||||
def done():
|
||||
"""Close all HTTP client
|
||||
|
||||
Avoid a warning at exit
|
||||
see https://github.com/encode/httpx/blob/1a6e254f72d9fd5694a1c10a28927e193ab4f76b/httpx/_client.py#L1785
|
||||
|
||||
Note: since Network.aclose has to be async, it is not possible to call this method on Network.__del__
|
||||
So Network.aclose is called here using atexit.register
|
||||
"""
|
||||
try:
|
||||
loop = get_loop()
|
||||
if loop:
|
||||
future = asyncio.run_coroutine_threadsafe(Network.aclose_all(), loop)
|
||||
# wait 3 seconds to close the HTTP clients
|
||||
future.result(3)
|
||||
finally:
|
||||
NETWORKS.clear()
|
||||
|
||||
|
||||
NETWORKS[DEFAULT_NAME] = Network()
|
|
@ -0,0 +1,235 @@
|
|||
import sys
|
||||
from time import time
|
||||
from itertools import cycle
|
||||
from threading import local
|
||||
|
||||
import requests
|
||||
|
||||
from searx import settings
|
||||
from searx import logger
|
||||
from searx.raise_for_httperror import raise_for_httperror
|
||||
|
||||
|
||||
logger = logger.getChild('poolrequests')
|
||||
|
||||
|
||||
try:
|
||||
import ssl
|
||||
if ssl.OPENSSL_VERSION_INFO[0:3] < (1, 0, 2):
|
||||
# https://github.com/certifi/python-certifi#1024-bit-root-certificates
|
||||
logger.critical('You are using an old openssl version({0}), please upgrade above 1.0.2!'
|
||||
.format(ssl.OPENSSL_VERSION))
|
||||
sys.exit(1)
|
||||
except ImportError:
|
||||
ssl = None
|
||||
if not getattr(ssl, "HAS_SNI", False):
|
||||
try:
|
||||
import OpenSSL # pylint: disable=unused-import
|
||||
except ImportError:
|
||||
logger.critical("ssl doesn't support SNI and the pyopenssl module is not installed.\n"
|
||||
"Some HTTPS connections will fail")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class HTTPAdapterWithConnParams(requests.adapters.HTTPAdapter):
|
||||
|
||||
def __init__(self, pool_connections=requests.adapters.DEFAULT_POOLSIZE,
|
||||
pool_maxsize=requests.adapters.DEFAULT_POOLSIZE,
|
||||
max_retries=requests.adapters.DEFAULT_RETRIES,
|
||||
pool_block=requests.adapters.DEFAULT_POOLBLOCK,
|
||||
**conn_params):
|
||||
if max_retries == requests.adapters.DEFAULT_RETRIES:
|
||||
self.max_retries = requests.adapters.Retry(0, read=False)
|
||||
else:
|
||||
self.max_retries = requests.adapters.Retry.from_int(max_retries)
|
||||
self.config = {}
|
||||
self.proxy_manager = {}
|
||||
|
||||
super().__init__()
|
||||
|
||||
self._pool_connections = pool_connections
|
||||
self._pool_maxsize = pool_maxsize
|
||||
self._pool_block = pool_block
|
||||
self._conn_params = conn_params
|
||||
|
||||
self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block, **conn_params)
|
||||
|
||||
def __setstate__(self, state):
|
||||
# Can't handle by adding 'proxy_manager' to self.__attrs__ because
|
||||
# because self.poolmanager uses a lambda function, which isn't pickleable.
|
||||
self.proxy_manager = {}
|
||||
self.config = {}
|
||||
|
||||
for attr, value in state.items():
|
||||
setattr(self, attr, value)
|
||||
|
||||
self.init_poolmanager(self._pool_connections, self._pool_maxsize,
|
||||
block=self._pool_block, **self._conn_params)
|
||||
|
||||
|
||||
threadLocal = local()
|
||||
connect = settings['outgoing'].get('pool_connections', 100) # Magic number kept from previous code
|
||||
maxsize = settings['outgoing'].get('pool_maxsize', requests.adapters.DEFAULT_POOLSIZE) # Picked from constructor
|
||||
if settings['outgoing'].get('source_ips'):
|
||||
http_adapters = cycle(HTTPAdapterWithConnParams(pool_connections=connect, pool_maxsize=maxsize,
|
||||
source_address=(source_ip, 0))
|
||||
for source_ip in settings['outgoing']['source_ips'])
|
||||
https_adapters = cycle(HTTPAdapterWithConnParams(pool_connections=connect, pool_maxsize=maxsize,
|
||||
source_address=(source_ip, 0))
|
||||
for source_ip in settings['outgoing']['source_ips'])
|
||||
else:
|
||||
http_adapters = cycle((HTTPAdapterWithConnParams(pool_connections=connect, pool_maxsize=maxsize), ))
|
||||
https_adapters = cycle((HTTPAdapterWithConnParams(pool_connections=connect, pool_maxsize=maxsize), ))
|
||||
|
||||
|
||||
class SessionSinglePool(requests.Session):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
# reuse the same adapters
|
||||
self.adapters.clear()
|
||||
|
||||
https_adapter = threadLocal.__dict__.setdefault('https_adapter', next(https_adapters))
|
||||
self.mount('https://', https_adapter)
|
||||
if get_enable_http_protocol():
|
||||
http_adapter = threadLocal.__dict__.setdefault('http_adapter', next(http_adapters))
|
||||
self.mount('http://', http_adapter)
|
||||
|
||||
def close(self):
|
||||
"""Call super, but clear adapters since there are managed globaly"""
|
||||
self.adapters.clear()
|
||||
super().close()
|
||||
|
||||
|
||||
def set_timeout_for_thread(timeout, start_time=None):
|
||||
threadLocal.timeout = timeout
|
||||
threadLocal.start_time = start_time
|
||||
|
||||
|
||||
def set_enable_http_protocol(enable_http):
|
||||
threadLocal.enable_http = enable_http
|
||||
|
||||
|
||||
def get_enable_http_protocol():
|
||||
try:
|
||||
return threadLocal.enable_http
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
|
||||
def reset_time_for_thread():
|
||||
threadLocal.total_time = 0
|
||||
|
||||
|
||||
def get_time_for_thread():
|
||||
return threadLocal.total_time
|
||||
|
||||
|
||||
def get_proxy_cycles(proxy_settings):
|
||||
if not proxy_settings:
|
||||
return None
|
||||
# Backwards compatibility for single proxy in settings.yml
|
||||
for protocol, proxy in proxy_settings.items():
|
||||
if isinstance(proxy, str):
|
||||
proxy_settings[protocol] = [proxy]
|
||||
|
||||
for protocol in proxy_settings:
|
||||
proxy_settings[protocol] = cycle(proxy_settings[protocol])
|
||||
return proxy_settings
|
||||
|
||||
|
||||
GLOBAL_PROXY_CYCLES = get_proxy_cycles(settings['outgoing'].get('proxies'))
|
||||
|
||||
|
||||
def get_proxies(proxy_cycles):
|
||||
if proxy_cycles:
|
||||
return {protocol: next(proxy_cycle) for protocol, proxy_cycle in proxy_cycles.items()}
|
||||
return None
|
||||
|
||||
|
||||
def get_global_proxies():
|
||||
return get_proxies(GLOBAL_PROXY_CYCLES)
|
||||
|
||||
|
||||
def request(method, url, **kwargs):
|
||||
"""same as requests/requests/api.py request(...)"""
|
||||
time_before_request = time()
|
||||
|
||||
# session start
|
||||
session = SessionSinglePool()
|
||||
|
||||
# proxies
|
||||
if not kwargs.get('proxies'):
|
||||
kwargs['proxies'] = get_global_proxies()
|
||||
|
||||
# timeout
|
||||
if 'timeout' in kwargs:
|
||||
timeout = kwargs['timeout']
|
||||
else:
|
||||
timeout = getattr(threadLocal, 'timeout', None)
|
||||
if timeout is not None:
|
||||
kwargs['timeout'] = timeout
|
||||
|
||||
# raise_for_error
|
||||
check_for_httperror = True
|
||||
if 'raise_for_httperror' in kwargs:
|
||||
check_for_httperror = kwargs['raise_for_httperror']
|
||||
del kwargs['raise_for_httperror']
|
||||
|
||||
# do request
|
||||
response = session.request(method=method, url=url, **kwargs)
|
||||
|
||||
time_after_request = time()
|
||||
|
||||
# is there a timeout for this engine ?
|
||||
if timeout is not None:
|
||||
timeout_overhead = 0.2 # seconds
|
||||
# start_time = when the user request started
|
||||
start_time = getattr(threadLocal, 'start_time', time_before_request)
|
||||
search_duration = time_after_request - start_time
|
||||
if search_duration > timeout + timeout_overhead:
|
||||
raise requests.exceptions.Timeout(response=response)
|
||||
|
||||
# session end
|
||||
session.close()
|
||||
|
||||
if hasattr(threadLocal, 'total_time'):
|
||||
threadLocal.total_time += time_after_request - time_before_request
|
||||
|
||||
# raise an exception
|
||||
if check_for_httperror:
|
||||
raise_for_httperror(response)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def get(url, **kwargs):
|
||||
kwargs.setdefault('allow_redirects', True)
|
||||
return request('get', url, **kwargs)
|
||||
|
||||
|
||||
def options(url, **kwargs):
|
||||
kwargs.setdefault('allow_redirects', True)
|
||||
return request('options', url, **kwargs)
|
||||
|
||||
|
||||
def head(url, **kwargs):
|
||||
kwargs.setdefault('allow_redirects', False)
|
||||
return request('head', url, **kwargs)
|
||||
|
||||
|
||||
def post(url, data=None, **kwargs):
|
||||
return request('post', url, data=data, **kwargs)
|
||||
|
||||
|
||||
def put(url, data=None, **kwargs):
|
||||
return request('put', url, data=data, **kwargs)
|
||||
|
||||
|
||||
def patch(url, data=None, **kwargs):
|
||||
return request('patch', url, data=data, **kwargs)
|
||||
|
||||
|
||||
def delete(url, **kwargs):
|
||||
return request('delete', url, **kwargs)
|
|
@ -25,13 +25,11 @@ from _thread import start_new_thread
|
|||
from searx import settings
|
||||
from searx.answerers import ask
|
||||
from searx.external_bang import get_bang_url
|
||||
from searx.engines import load_engines
|
||||
from searx.results import ResultContainer
|
||||
from searx import logger
|
||||
from searx.plugins import plugins
|
||||
from searx.search.models import EngineRef, SearchQuery
|
||||
from searx.search.processors import PROCESSORS, initialize as initialize_processors
|
||||
from searx.network import check_network_configuration, initialize as initialize_network
|
||||
from searx.search.processors import processors, initialize as initialize_processors
|
||||
from searx.search.checker import initialize as initialize_checker
|
||||
|
||||
|
||||
|
@ -49,14 +47,9 @@ else:
|
|||
sys.exit(1)
|
||||
|
||||
|
||||
def initialize(settings_engines=None, enable_checker=False, check_network=False):
|
||||
def initialize(settings_engines=None, enable_checker=False):
|
||||
settings_engines = settings_engines or settings['engines']
|
||||
load_engines(settings_engines)
|
||||
initialize_network(settings_engines, settings['outgoing'])
|
||||
if check_network:
|
||||
check_network_configuration()
|
||||
initialize_processors(settings_engines)
|
||||
|
||||
if enable_checker:
|
||||
initialize_checker()
|
||||
|
||||
|
@ -111,7 +104,7 @@ class Search:
|
|||
|
||||
# start search-reqest for all selected engines
|
||||
for engineref in self.search_query.engineref_list:
|
||||
processor = PROCESSORS[engineref.name]
|
||||
processor = processors[engineref.name]
|
||||
|
||||
# set default request parameters
|
||||
request_params = processor.get_params(self.search_query, engineref.category)
|
||||
|
@ -154,7 +147,7 @@ class Search:
|
|||
|
||||
for engine_name, query, request_params in requests:
|
||||
th = threading.Thread(
|
||||
target=PROCESSORS[engine_name].search,
|
||||
target=processors[engine_name].search,
|
||||
args=(query, request_params, self.result_container, self.start_time, self.actual_timeout),
|
||||
name=search_id,
|
||||
)
|
||||
|
|
|
@ -9,7 +9,7 @@ import signal
|
|||
|
||||
from searx import logger, settings, searx_debug
|
||||
from searx.exceptions import SearxSettingsException
|
||||
from searx.search.processors import PROCESSORS
|
||||
from searx.search.processors import processors
|
||||
from searx.search.checker import Checker
|
||||
from searx.shared import schedule, storage
|
||||
|
||||
|
@ -55,7 +55,7 @@ def run():
|
|||
'status': 'ok',
|
||||
'engines': {}
|
||||
}
|
||||
for name, processor in PROCESSORS.items():
|
||||
for name, processor in processors.items():
|
||||
logger.debug('Checking %s engine', name)
|
||||
checker = Checker(processor)
|
||||
checker.run()
|
||||
|
|
|
@ -11,9 +11,9 @@ from urllib.parse import urlparse
|
|||
import re
|
||||
from langdetect import detect_langs
|
||||
from langdetect.lang_detect_exception import LangDetectException
|
||||
import httpx
|
||||
import requests.exceptions
|
||||
|
||||
from searx import network, logger
|
||||
from searx import poolrequests, logger
|
||||
from searx.results import ResultContainer
|
||||
from searx.search.models import SearchQuery, EngineRef
|
||||
from searx.search.processors import EngineProcessor
|
||||
|
@ -75,8 +75,8 @@ def _is_url_image(image_url):
|
|||
while retry > 0:
|
||||
a = time()
|
||||
try:
|
||||
network.set_timeout_for_thread(10.0, time())
|
||||
r = network.get(image_url, timeout=10.0, follow_redirects=True, headers={
|
||||
poolrequests.set_timeout_for_thread(10.0, time())
|
||||
r = poolrequests.get(image_url, timeout=10.0, allow_redirects=True, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US;q=0.5,en;q=0.3',
|
||||
|
@ -90,10 +90,10 @@ def _is_url_image(image_url):
|
|||
if r.headers["content-type"].startswith('image/'):
|
||||
return True
|
||||
return False
|
||||
except httpx.TimeoutException:
|
||||
except requests.exceptions.Timeout:
|
||||
logger.error('Timeout for %s: %i', image_url, int(time() - a))
|
||||
retry -= 1
|
||||
except httpx.HTTPError:
|
||||
except requests.exceptions.RequestException:
|
||||
logger.exception('Exception for %s', image_url)
|
||||
return False
|
||||
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
import threading
|
||||
|
||||
from .online import OnlineProcessor
|
||||
from .offline import OfflineProcessor
|
||||
from .online_dictionary import OnlineDictionaryProcessor
|
||||
|
@ -12,9 +10,9 @@ import searx.engines as engines
|
|||
|
||||
|
||||
__all__ = ['EngineProcessor', 'OfflineProcessor', 'OnlineProcessor',
|
||||
'OnlineDictionaryProcessor', 'OnlineCurrencyProcessor', 'PROCESSORS']
|
||||
'OnlineDictionaryProcessor', 'OnlineCurrencyProcessor', 'processors']
|
||||
logger = logger.getChild('search.processors')
|
||||
PROCESSORS = {}
|
||||
processors = {}
|
||||
|
||||
|
||||
def get_processor_class(engine_type):
|
||||
|
@ -29,27 +27,15 @@ def get_processor(engine, engine_name):
|
|||
processor_class = get_processor_class(engine_type)
|
||||
if processor_class:
|
||||
return processor_class(engine, engine_name)
|
||||
return None
|
||||
|
||||
|
||||
def initialize_processor(processor):
|
||||
"""Initialize one processor
|
||||
Call the init function of the engine
|
||||
"""
|
||||
if processor.has_initialize_function:
|
||||
t = threading.Thread(target=processor.initialize, daemon=True)
|
||||
t.start()
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def initialize(engine_list):
|
||||
"""Initialize all engines and store a processor for each engine in :py:obj:`PROCESSORS`."""
|
||||
for engine_data in engine_list:
|
||||
engine_name = engine_data['name']
|
||||
engine = engines.engines.get(engine_name)
|
||||
if engine:
|
||||
processor = get_processor(engine, engine_name)
|
||||
initialize_processor(processor)
|
||||
if processor is None:
|
||||
engine.logger.error('Error get processor for engine %s', engine_name)
|
||||
else:
|
||||
PROCESSORS[engine_name] = processor
|
||||
engines.initialize_engines(engine_list)
|
||||
for engine_name, engine in engines.engines.items():
|
||||
processor = get_processor(engine, engine_name)
|
||||
if processor is None:
|
||||
logger.error('Error get processor for engine %s', engine_name)
|
||||
else:
|
||||
processors[engine_name] = processor
|
||||
|
|
|
@ -2,32 +2,17 @@
|
|||
|
||||
from abc import abstractmethod, ABC
|
||||
from searx import logger
|
||||
from searx.engines import engines
|
||||
from searx.utils import get_engine_from_settings
|
||||
|
||||
|
||||
logger = logger.getChild('searx.search.processor')
|
||||
|
||||
|
||||
class EngineProcessor(ABC):
|
||||
|
||||
def __init__(self, engine, engine_name):
|
||||
self.engine = engine
|
||||
self.engine_name = engine_name
|
||||
|
||||
def initialize(self):
|
||||
try:
|
||||
self.engine.init(get_engine_from_settings(self.engine_name))
|
||||
except SearxEngineResponseException as exc:
|
||||
logger.warn('Fail to initialize %s // %s', self.engine_name, exc)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
logger.exception('Fail to initialize %s', self.engine_name)
|
||||
else:
|
||||
logger.debug('Initialized %s', self.engine_name)
|
||||
|
||||
@property
|
||||
def has_initialize_function(self):
|
||||
return hasattr(self.engine, 'init')
|
||||
|
||||
def get_params(self, search_query, engine_category):
|
||||
# if paging is not supported, skip
|
||||
if search_query.pageno > 1 and not self.engine.paging:
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from time import time
|
||||
import threading
|
||||
import asyncio
|
||||
|
||||
import httpx
|
||||
import requests.exceptions
|
||||
|
||||
import searx.network
|
||||
import searx.poolrequests as poolrequests
|
||||
from searx.engines import settings
|
||||
from searx import logger
|
||||
from searx.utils import gen_useragent
|
||||
|
@ -64,6 +64,10 @@ class OnlineProcessor(EngineProcessor):
|
|||
auth=params['auth']
|
||||
)
|
||||
|
||||
# setting engine based proxies
|
||||
if hasattr(self.engine, 'proxies'):
|
||||
request_args['proxies'] = poolrequests.get_proxies(self.engine.proxies)
|
||||
|
||||
# max_redirects
|
||||
max_redirects = params.get('max_redirects')
|
||||
if max_redirects:
|
||||
|
@ -82,9 +86,9 @@ class OnlineProcessor(EngineProcessor):
|
|||
|
||||
# specific type of request (GET or POST)
|
||||
if params['method'] == 'GET':
|
||||
req = searx.network.get
|
||||
req = poolrequests.get
|
||||
else:
|
||||
req = searx.network.post
|
||||
req = poolrequests.post
|
||||
|
||||
request_args['data'] = params['data']
|
||||
|
||||
|
@ -96,8 +100,8 @@ class OnlineProcessor(EngineProcessor):
|
|||
# unexpected redirect : record an error
|
||||
# but the engine might still return valid results.
|
||||
status_code = str(response.status_code or '')
|
||||
reason = response.reason_phrase or ''
|
||||
hostname = response.url.host
|
||||
reason = response.reason or ''
|
||||
hostname = str(urlparse(response.url or '').netloc)
|
||||
record_error(self.engine_name,
|
||||
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
|
||||
(status_code, reason, hostname))
|
||||
|
@ -125,14 +129,14 @@ class OnlineProcessor(EngineProcessor):
|
|||
|
||||
def search(self, query, params, result_container, start_time, timeout_limit):
|
||||
# set timeout for all HTTP requests
|
||||
searx.network.set_timeout_for_thread(timeout_limit, start_time=start_time)
|
||||
poolrequests.set_timeout_for_thread(timeout_limit, start_time=start_time)
|
||||
# reset the HTTP total time
|
||||
searx.network.reset_time_for_thread()
|
||||
# set the network
|
||||
searx.network.set_context_network_name(self.engine_name)
|
||||
poolrequests.reset_time_for_thread()
|
||||
# enable HTTP only if explicitly enabled
|
||||
poolrequests.set_enable_http_protocol(self.engine.enable_http)
|
||||
|
||||
# suppose everything will be alright
|
||||
http_exception = False
|
||||
requests_exception = False
|
||||
suspended_time = None
|
||||
|
||||
try:
|
||||
|
@ -146,7 +150,7 @@ class OnlineProcessor(EngineProcessor):
|
|||
|
||||
# update engine time when there is no exception
|
||||
engine_time = time() - start_time
|
||||
page_load_time = searx.network.get_time_for_thread()
|
||||
page_load_time = poolrequests.get_time_for_thread()
|
||||
result_container.add_timing(self.engine_name, engine_time, page_load_time)
|
||||
with threading.RLock():
|
||||
self.engine.stats['engine_time'] += engine_time
|
||||
|
@ -159,27 +163,27 @@ class OnlineProcessor(EngineProcessor):
|
|||
|
||||
# Timing
|
||||
engine_time = time() - start_time
|
||||
page_load_time = searx.network.get_time_for_thread()
|
||||
page_load_time = poolrequests.get_time_for_thread()
|
||||
result_container.add_timing(self.engine_name, engine_time, page_load_time)
|
||||
|
||||
# Record the errors
|
||||
with threading.RLock():
|
||||
self.engine.stats['errors'] += 1
|
||||
|
||||
if (issubclass(e.__class__, (httpx.TimeoutException, asyncio.TimeoutError))):
|
||||
if (issubclass(e.__class__, requests.exceptions.Timeout)):
|
||||
result_container.add_unresponsive_engine(self.engine_name, 'HTTP timeout')
|
||||
# requests timeout (connect or read)
|
||||
logger.error("engine {0} : HTTP requests timeout"
|
||||
"(search duration : {1} s, timeout: {2} s) : {3}"
|
||||
.format(self.engine_name, engine_time, timeout_limit, e.__class__.__name__))
|
||||
http_exception = True
|
||||
elif (issubclass(e.__class__, (httpx.HTTPError, httpx.StreamError))):
|
||||
requests_exception = True
|
||||
elif (issubclass(e.__class__, requests.exceptions.RequestException)):
|
||||
result_container.add_unresponsive_engine(self.engine_name, 'HTTP error')
|
||||
# other requests exception
|
||||
logger.exception("engine {0} : requests exception"
|
||||
"(search duration : {1} s, timeout: {2} s) : {3}"
|
||||
.format(self.engine_name, engine_time, timeout_limit, e))
|
||||
http_exception = True
|
||||
requests_exception = True
|
||||
elif (issubclass(e.__class__, SearxEngineCaptchaException)):
|
||||
result_container.add_unresponsive_engine(self.engine_name, 'CAPTCHA required')
|
||||
logger.exception('engine {0} : CAPTCHA'.format(self.engine_name))
|
||||
|
@ -203,7 +207,7 @@ class OnlineProcessor(EngineProcessor):
|
|||
# suspend the engine if there is an HTTP error
|
||||
# or suspended_time is defined
|
||||
with threading.RLock():
|
||||
if http_exception or suspended_time:
|
||||
if requests_exception or suspended_time:
|
||||
# update continuous_errors / suspend_end_time
|
||||
self.engine.continuous_errors += 1
|
||||
if suspended_time is None:
|
||||
|
|
|
@ -69,17 +69,19 @@ ui:
|
|||
# key : !!binary "your_morty_proxy_key"
|
||||
|
||||
outgoing: # communication with search engines
|
||||
request_timeout : 3.0 # default timeout in seconds, can be override by engine
|
||||
request_timeout : 2.0 # default timeout in seconds, can be override by engine
|
||||
# max_request_timeout: 10.0 # the maximum timeout in seconds
|
||||
useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator
|
||||
pool_connections : 100 # The maximum number of concurrent connections that may be established.
|
||||
pool_maxsize : 20 # Allow the connection pool to maintain keep-alive connections below this point.
|
||||
enable_http2: True # See https://www.python-httpx.org/http2/
|
||||
pool_connections : 100 # Number of different hosts
|
||||
pool_maxsize : 10 # Number of simultaneous requests by host
|
||||
# uncomment below section if you want to use a proxy
|
||||
# see https://2.python-requests.org/en/latest/user/advanced/#proxies
|
||||
# SOCKS proxies are also supported: see https://2.python-requests.org/en/latest/user/advanced/#socks
|
||||
# proxies:
|
||||
# all://:
|
||||
# http:
|
||||
# - http://proxy1:8080
|
||||
# - http://proxy2:8080
|
||||
# https:
|
||||
# - http://proxy1:8080
|
||||
# - http://proxy2:8080
|
||||
# using_tor_proxy : True
|
||||
|
@ -89,7 +91,6 @@ outgoing: # communication with search engines
|
|||
# source_ips:
|
||||
# - 1.1.1.1
|
||||
# - 1.1.1.2
|
||||
# - fe80::/126
|
||||
|
||||
# External plugin configuration
|
||||
# See https://searx.github.io/searx/dev/plugins.html for more details
|
||||
|
@ -1026,18 +1027,16 @@ engines:
|
|||
additional_tests:
|
||||
rosebud: *test_rosebud
|
||||
|
||||
- name : qwant images
|
||||
engine : qwant
|
||||
shortcut : qwi
|
||||
disabled: True
|
||||
categories : images
|
||||
|
||||
- name : qwant news
|
||||
engine : qwant
|
||||
shortcut : qwn
|
||||
categories : news
|
||||
network: qwant
|
||||
|
||||
- name: qwant images
|
||||
engine: qwant
|
||||
shortcut: qwi
|
||||
categories: images
|
||||
disabled: True
|
||||
network: qwant
|
||||
|
||||
- name: qwant videos
|
||||
engine: qwant
|
||||
|
|
|
@ -10,8 +10,8 @@ import traceback
|
|||
|
||||
from os.path import dirname, join, abspath, realpath
|
||||
|
||||
from unittest import TestCase
|
||||
from splinter import Browser
|
||||
import aiounittest
|
||||
|
||||
|
||||
class SearxTestLayer:
|
||||
|
@ -82,7 +82,7 @@ def run_robot_tests(tests):
|
|||
test(browser)
|
||||
|
||||
|
||||
class SearxTestCase(aiounittest.AsyncTestCase):
|
||||
class SearxTestCase(TestCase):
|
||||
"""Base test case for non-robot tests."""
|
||||
|
||||
layer = SearxTestLayer
|
||||
|
|
|
@ -45,7 +45,7 @@ def searx_useragent():
|
|||
"""Return the searx User Agent"""
|
||||
return 'searx/{searx_version} {suffix}'.format(
|
||||
searx_version=VERSION_STRING,
|
||||
suffix=settings['outgoing'].get('useragent_suffix', '')).strip()
|
||||
suffix=settings['outgoing'].get('useragent_suffix', ''))
|
||||
|
||||
|
||||
def gen_useragent(os=None):
|
||||
|
|
119
searx/webapp.py
119
searx/webapp.py
|
@ -26,26 +26,12 @@ if __name__ == '__main__':
|
|||
from os.path import realpath, dirname
|
||||
sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
|
||||
|
||||
# set Unix thread name
|
||||
try:
|
||||
import setproctitle
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
import threading
|
||||
old_thread_init = threading.Thread.__init__
|
||||
|
||||
def new_thread_init(self, *args, **kwargs):
|
||||
old_thread_init(self, *args, **kwargs)
|
||||
setproctitle.setthreadtitle(self._name)
|
||||
threading.Thread.__init__ = new_thread_init
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
|
||||
from searx import logger
|
||||
logger = logger.getChild('webapp')
|
||||
|
@ -94,7 +80,7 @@ from searx.plugins import plugins
|
|||
from searx.plugins.oa_doi_rewrite import get_doi_resolver
|
||||
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
|
||||
from searx.answerers import answerers
|
||||
from searx.network import stream as http_stream, set_context_network_name
|
||||
from searx.poolrequests import get_global_proxies
|
||||
from searx.answerers import ask
|
||||
from searx.metrology.error_recorder import errors_per_engines
|
||||
from searx.settings_loader import get_default_settings_path
|
||||
|
@ -153,7 +139,7 @@ werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__mai
|
|||
# initialize the engines except on the first run of the werkzeug server.
|
||||
if not werkzeug_reloader\
|
||||
or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"):
|
||||
search_initialize(enable_checker=True, check_network=True)
|
||||
search_initialize(enable_checker=True)
|
||||
|
||||
babel = Babel(app)
|
||||
|
||||
|
@ -921,84 +907,57 @@ def _is_selected_language_supported(engine, preferences):
|
|||
|
||||
@app.route('/image_proxy', methods=['GET'])
|
||||
def image_proxy():
|
||||
# pylint: disable=too-many-return-statements, too-many-branches
|
||||
|
||||
url = request.args.get('url')
|
||||
url = request.args.get('url').encode()
|
||||
|
||||
if not url:
|
||||
return '', 400
|
||||
|
||||
h = new_hmac(settings['server']['secret_key'], url.encode())
|
||||
h = new_hmac(settings['server']['secret_key'], url)
|
||||
|
||||
if h != request.args.get('h'):
|
||||
return '', 400
|
||||
|
||||
maximum_size = 5 * 1024 * 1024
|
||||
forward_resp = False
|
||||
resp = None
|
||||
try:
|
||||
request_headers = {
|
||||
'User-Agent': gen_useragent(),
|
||||
'Accept': 'image/webp,*/*',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Sec-GPC': '1',
|
||||
'DNT': '1',
|
||||
}
|
||||
set_context_network_name('image_proxy')
|
||||
stream = http_stream(
|
||||
method='GET',
|
||||
url=url,
|
||||
headers=request_headers,
|
||||
timeout=settings['outgoing']['request_timeout'],
|
||||
follow_redirects=True,
|
||||
max_redirects=20)
|
||||
headers = {
|
||||
'User-Agent': gen_useragent(),
|
||||
'Accept': 'image/webp,*/*',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Sec-GPC': '1',
|
||||
'DNT': '1',
|
||||
}
|
||||
headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
|
||||
|
||||
resp = next(stream)
|
||||
content_length = resp.headers.get('Content-Length')
|
||||
if content_length and content_length.isdigit() and int(content_length) > maximum_size:
|
||||
return 'Max size', 400
|
||||
resp = requests.get(url,
|
||||
stream=True,
|
||||
timeout=settings['outgoing']['request_timeout'],
|
||||
headers=headers,
|
||||
proxies=get_global_proxies())
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
|
||||
if resp.status_code >= 400:
|
||||
return '', resp.status_code
|
||||
return '', 400
|
||||
if resp.status_code == 304:
|
||||
return '', resp.status_code
|
||||
|
||||
if not resp.headers.get('Content-Type', '').startswith('image/'):
|
||||
logger.debug('image-proxy: wrong content-type: %s', resp.headers.get('Content-Type', ''))
|
||||
return '', 400
|
||||
|
||||
forward_resp = True
|
||||
except httpx.HTTPError:
|
||||
logger.exception('HTTP error')
|
||||
if resp.status_code != 200:
|
||||
logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
|
||||
if resp.status_code >= 400:
|
||||
return '', resp.status_code
|
||||
return '', 400
|
||||
finally:
|
||||
if resp and not forward_resp:
|
||||
# the code is about to return an HTTP 400 error to the browser
|
||||
# we make sure to close the response between searxng and the HTTP server
|
||||
try:
|
||||
resp.close()
|
||||
except httpx.HTTPError:
|
||||
logger.exception('HTTP error on closing')
|
||||
|
||||
try:
|
||||
headers = dict_subset(
|
||||
resp.headers,
|
||||
{'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'}
|
||||
)
|
||||
|
||||
def forward_chunk():
|
||||
total_length = 0
|
||||
for chunk in stream:
|
||||
total_length += len(chunk)
|
||||
if total_length > maximum_size:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers)
|
||||
except httpx.HTTPError:
|
||||
if not resp.headers.get('content-type', '').startswith('image/'):
|
||||
logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
|
||||
return '', 400
|
||||
|
||||
img = b''
|
||||
chunk_counter = 0
|
||||
|
||||
for chunk in resp.iter_content(1024 * 1024):
|
||||
chunk_counter += 1
|
||||
if chunk_counter > 5:
|
||||
return '', 502 # Bad gateway - file is too big (>5M)
|
||||
img += chunk
|
||||
|
||||
headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})
|
||||
|
||||
return Response(img, mimetype=resp.headers['content-type'], headers=headers)
|
||||
|
||||
|
||||
@app.route('/stats', methods=['GET'])
|
||||
def stats():
|
||||
|
|
|
@ -10,7 +10,7 @@ from searx.engines.wikidata import send_wikidata_query
|
|||
from searx.utils import extract_text
|
||||
import searx
|
||||
import searx.search
|
||||
import searx.network
|
||||
import searx.poolrequests
|
||||
|
||||
SPARQL_WIKIPEDIA_ARTICLE = """
|
||||
SELECT DISTINCT ?item ?name
|
||||
|
@ -59,7 +59,7 @@ def get_wikipedia_summary(language, pageid):
|
|||
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
|
||||
url = search_url.format(title=quote(pageid), language=language)
|
||||
try:
|
||||
response = searx.network.get(url)
|
||||
response = searx.poolrequests.get(url)
|
||||
response.raise_for_status()
|
||||
api_result = json.loads(response.text)
|
||||
return api_result.get('extract')
|
||||
|
@ -89,7 +89,7 @@ def get_website_description(url, lang1, lang2=None):
|
|||
lang_list.append(lang2)
|
||||
headers['Accept-Language'] = f'{",".join(lang_list)};q=0.8'
|
||||
try:
|
||||
response = searx.network.get(url, headers=headers, timeout=10)
|
||||
response = searx.poolrequests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
except Exception:
|
||||
return (None, None)
|
||||
|
|
|
@ -17,7 +17,7 @@ import json
|
|||
import re
|
||||
from os.path import join
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
|
||||
from searx import searx_dir # pylint: disable=E0401 C0413
|
||||
|
||||
|
@ -30,7 +30,7 @@ HTTP_COLON = 'http:'
|
|||
|
||||
|
||||
def get_bang_url():
|
||||
response = httpx.get(URL_BV1)
|
||||
response = requests.get(URL_BV1)
|
||||
response.raise_for_status()
|
||||
|
||||
r = RE_BANG_VERSION.findall(response.text)
|
||||
|
@ -38,7 +38,7 @@ def get_bang_url():
|
|||
|
||||
|
||||
def fetch_ddg_bangs(url):
|
||||
response = httpx.get(url)
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
return json.loads(response.content.decode())
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ import collections
|
|||
from pathlib import Path
|
||||
|
||||
from searx import searx_dir
|
||||
from searx.network import set_timeout_for_thread
|
||||
from searx.poolrequests import set_timeout_for_thread
|
||||
from searx.engines.wikidata import send_wikidata_query
|
||||
from searx.languages import language_codes
|
||||
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
|
||||
|
|
|
@ -1,241 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
from mock import patch
|
||||
|
||||
import httpx
|
||||
|
||||
from searx.network.network import Network, NETWORKS, initialize
|
||||
from searx.testing import SearxTestCase
|
||||
|
||||
|
||||
class TestNetwork(SearxTestCase):
|
||||
|
||||
def setUp(self):
|
||||
initialize()
|
||||
|
||||
def test_simple(self):
|
||||
network = Network()
|
||||
|
||||
self.assertEqual(next(network._local_addresses_cycle), None)
|
||||
self.assertEqual(next(network._proxies_cycle), ())
|
||||
|
||||
def test_ipaddress_cycle(self):
|
||||
network = NETWORKS['ipv6']
|
||||
self.assertEqual(next(network._local_addresses_cycle), '::')
|
||||
self.assertEqual(next(network._local_addresses_cycle), '::')
|
||||
|
||||
network = NETWORKS['ipv4']
|
||||
self.assertEqual(next(network._local_addresses_cycle), '0.0.0.0')
|
||||
self.assertEqual(next(network._local_addresses_cycle), '0.0.0.0')
|
||||
|
||||
network = Network(local_addresses=['192.168.0.1', '192.168.0.2'])
|
||||
self.assertEqual(next(network._local_addresses_cycle), '192.168.0.1')
|
||||
self.assertEqual(next(network._local_addresses_cycle), '192.168.0.2')
|
||||
self.assertEqual(next(network._local_addresses_cycle), '192.168.0.1')
|
||||
|
||||
network = Network(local_addresses=['192.168.0.0/30'])
|
||||
self.assertEqual(next(network._local_addresses_cycle), '192.168.0.1')
|
||||
self.assertEqual(next(network._local_addresses_cycle), '192.168.0.2')
|
||||
self.assertEqual(next(network._local_addresses_cycle), '192.168.0.1')
|
||||
self.assertEqual(next(network._local_addresses_cycle), '192.168.0.2')
|
||||
|
||||
network = Network(local_addresses=['fe80::/10'])
|
||||
self.assertEqual(next(network._local_addresses_cycle), 'fe80::1')
|
||||
self.assertEqual(next(network._local_addresses_cycle), 'fe80::2')
|
||||
self.assertEqual(next(network._local_addresses_cycle), 'fe80::3')
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
Network(local_addresses=['not_an_ip_address'])
|
||||
|
||||
def test_proxy_cycles(self):
|
||||
network = Network(proxies='http://localhost:1337')
|
||||
self.assertEqual(next(network._proxies_cycle), (('all://', 'http://localhost:1337'),))
|
||||
|
||||
network = Network(proxies={
|
||||
'https': 'http://localhost:1337',
|
||||
'http': 'http://localhost:1338'
|
||||
})
|
||||
self.assertEqual(next(network._proxies_cycle),
|
||||
(('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338')))
|
||||
self.assertEqual(next(network._proxies_cycle),
|
||||
(('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338')))
|
||||
|
||||
network = Network(proxies={
|
||||
'https': ['http://localhost:1337', 'http://localhost:1339'],
|
||||
'http': 'http://localhost:1338'
|
||||
})
|
||||
self.assertEqual(next(network._proxies_cycle),
|
||||
(('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338')))
|
||||
self.assertEqual(next(network._proxies_cycle),
|
||||
(('https://', 'http://localhost:1339'), ('http://', 'http://localhost:1338')))
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
Network(proxies=1)
|
||||
|
||||
def test_get_kwargs_clients(self):
|
||||
kwargs = {
|
||||
'verify': True,
|
||||
'max_redirects': 5,
|
||||
'timeout': 2,
|
||||
'allow_redirects': True,
|
||||
}
|
||||
kwargs_client = Network.extract_kwargs_clients(kwargs)
|
||||
|
||||
self.assertEqual(len(kwargs_client), 2)
|
||||
self.assertEqual(len(kwargs), 2)
|
||||
|
||||
self.assertEqual(kwargs['timeout'], 2)
|
||||
self.assertEqual(kwargs['follow_redirects'], True)
|
||||
|
||||
self.assertTrue(kwargs_client['verify'])
|
||||
self.assertEqual(kwargs_client['max_redirects'], 5)
|
||||
|
||||
async def test_get_client(self):
|
||||
network = Network(verify=True)
|
||||
client1 = await network.get_client()
|
||||
client2 = await network.get_client(verify=True)
|
||||
client3 = await network.get_client(max_redirects=10)
|
||||
client4 = await network.get_client(verify=True)
|
||||
client5 = await network.get_client(verify=False)
|
||||
client6 = await network.get_client(max_redirects=10)
|
||||
|
||||
self.assertEqual(client1, client2)
|
||||
self.assertEqual(client1, client4)
|
||||
self.assertNotEqual(client1, client3)
|
||||
self.assertNotEqual(client1, client5)
|
||||
self.assertEqual(client3, client6)
|
||||
|
||||
await network.aclose()
|
||||
|
||||
async def test_aclose(self):
|
||||
network = Network(verify=True)
|
||||
await network.get_client()
|
||||
await network.aclose()
|
||||
|
||||
async def test_request(self):
|
||||
a_text = 'Lorem Ipsum'
|
||||
response = httpx.Response(status_code=200, text=a_text)
|
||||
with patch.object(httpx.AsyncClient, 'request', return_value=response):
|
||||
network = Network(enable_http=True)
|
||||
response = await network.request('GET', 'https://example.com/')
|
||||
self.assertEqual(response.text, a_text)
|
||||
await network.aclose()
|
||||
|
||||
|
||||
class TestNetworkRequestRetries(SearxTestCase):
|
||||
|
||||
TEXT = 'Lorem Ipsum'
|
||||
|
||||
@classmethod
|
||||
def get_response_404_then_200(cls):
|
||||
first = True
|
||||
|
||||
async def get_response(*args, **kwargs):
|
||||
nonlocal first
|
||||
if first:
|
||||
first = False
|
||||
return httpx.Response(status_code=403, text=TestNetworkRequestRetries.TEXT)
|
||||
return httpx.Response(status_code=200, text=TestNetworkRequestRetries.TEXT)
|
||||
return get_response
|
||||
|
||||
async def test_retries_ok(self):
|
||||
with patch.object(httpx.AsyncClient, 'request', new=TestNetworkRequestRetries.get_response_404_then_200()):
|
||||
network = Network(enable_http=True, retries=1, retry_on_http_error=403)
|
||||
response = await network.request('GET', 'https://example.com/')
|
||||
self.assertEqual(response.text, TestNetworkRequestRetries.TEXT)
|
||||
await network.aclose()
|
||||
|
||||
async def test_retries_fail_int(self):
|
||||
with patch.object(httpx.AsyncClient, 'request', new=TestNetworkRequestRetries.get_response_404_then_200()):
|
||||
network = Network(enable_http=True, retries=0, retry_on_http_error=403)
|
||||
response = await network.request('GET', 'https://example.com/')
|
||||
self.assertEqual(response.status_code, 403)
|
||||
await network.aclose()
|
||||
|
||||
async def test_retries_fail_list(self):
|
||||
with patch.object(httpx.AsyncClient, 'request', new=TestNetworkRequestRetries.get_response_404_then_200()):
|
||||
network = Network(enable_http=True, retries=0, retry_on_http_error=[403, 429])
|
||||
response = await network.request('GET', 'https://example.com/')
|
||||
self.assertEqual(response.status_code, 403)
|
||||
await network.aclose()
|
||||
|
||||
async def test_retries_fail_bool(self):
|
||||
with patch.object(httpx.AsyncClient, 'request', new=TestNetworkRequestRetries.get_response_404_then_200()):
|
||||
network = Network(enable_http=True, retries=0, retry_on_http_error=True)
|
||||
response = await network.request('GET', 'https://example.com/')
|
||||
self.assertEqual(response.status_code, 403)
|
||||
await network.aclose()
|
||||
|
||||
async def test_retries_exception_then_200(self):
|
||||
request_count = 0
|
||||
|
||||
async def get_response(*args, **kwargs):
|
||||
nonlocal request_count
|
||||
request_count += 1
|
||||
if request_count < 3:
|
||||
raise httpx.RequestError('fake exception', request=None)
|
||||
return httpx.Response(status_code=200, text=TestNetworkRequestRetries.TEXT)
|
||||
|
||||
with patch.object(httpx.AsyncClient, 'request', new=get_response):
|
||||
network = Network(enable_http=True, retries=2)
|
||||
response = await network.request('GET', 'https://example.com/')
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.text, TestNetworkRequestRetries.TEXT)
|
||||
await network.aclose()
|
||||
|
||||
async def test_retries_exception(self):
|
||||
async def get_response(*args, **kwargs):
|
||||
raise httpx.RequestError('fake exception', request=None)
|
||||
|
||||
with patch.object(httpx.AsyncClient, 'request', new=get_response):
|
||||
network = Network(enable_http=True, retries=0)
|
||||
with self.assertRaises(httpx.RequestError):
|
||||
await network.request('GET', 'https://example.com/')
|
||||
await network.aclose()
|
||||
|
||||
|
||||
class TestNetworkStreamRetries(SearxTestCase):
|
||||
|
||||
TEXT = 'Lorem Ipsum'
|
||||
|
||||
@classmethod
|
||||
def get_response_exception_then_200(cls):
|
||||
first = True
|
||||
|
||||
def stream(*args, **kwargs):
|
||||
nonlocal first
|
||||
if first:
|
||||
first = False
|
||||
raise httpx.RequestError('fake exception', request=None)
|
||||
return httpx.Response(status_code=200, text=TestNetworkStreamRetries.TEXT)
|
||||
return stream
|
||||
|
||||
async def test_retries_ok(self):
|
||||
with patch.object(httpx.AsyncClient, 'stream', new=TestNetworkStreamRetries.get_response_exception_then_200()):
|
||||
network = Network(enable_http=True, retries=1, retry_on_http_error=403)
|
||||
response = await network.stream('GET', 'https://example.com/')
|
||||
self.assertEqual(response.text, TestNetworkStreamRetries.TEXT)
|
||||
await network.aclose()
|
||||
|
||||
async def test_retries_fail(self):
|
||||
with patch.object(httpx.AsyncClient, 'stream', new=TestNetworkStreamRetries.get_response_exception_then_200()):
|
||||
network = Network(enable_http=True, retries=0, retry_on_http_error=403)
|
||||
with self.assertRaises(httpx.RequestError):
|
||||
await network.stream('GET', 'https://example.com/')
|
||||
await network.aclose()
|
||||
|
||||
async def test_retries_exception(self):
|
||||
first = True
|
||||
|
||||
def stream(*args, **kwargs):
|
||||
nonlocal first
|
||||
if first:
|
||||
first = False
|
||||
return httpx.Response(status_code=403, text=TestNetworkRequestRetries.TEXT)
|
||||
return httpx.Response(status_code=200, text=TestNetworkRequestRetries.TEXT)
|
||||
|
||||
with patch.object(httpx.AsyncClient, 'stream', new=stream):
|
||||
network = Network(enable_http=True, retries=0, retry_on_http_error=403)
|
||||
response = await network.stream('GET', 'https://example.com/')
|
||||
self.assertEqual(response.status_code, 403)
|
||||
await network.aclose()
|
|
@ -23,7 +23,7 @@ class TestEnginesInit(SearxTestCase):
|
|||
engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'},
|
||||
{'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}]
|
||||
|
||||
engines.load_engines(engine_list)
|
||||
engines.initialize_engines(engine_list)
|
||||
self.assertEqual(len(engines.engines), 1)
|
||||
self.assertIn('engine1', engines.engines)
|
||||
self.assertNotIn('onions', engines.categories)
|
||||
|
@ -35,7 +35,7 @@ class TestEnginesInit(SearxTestCase):
|
|||
'timeout': 20.0, 'onion_url': 'http://engine1.onion'},
|
||||
{'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}]
|
||||
|
||||
engines.load_engines(engine_list)
|
||||
engines.initialize_engines(engine_list)
|
||||
self.assertEqual(len(engines.engines), 2)
|
||||
self.assertIn('engine1', engines.engines)
|
||||
self.assertIn('engine2', engines.engines)
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
from unittest.mock import patch
|
||||
from requests.models import Response
|
||||
|
||||
from searx.testing import SearxTestCase
|
||||
|
||||
import searx.poolrequests
|
||||
from searx.poolrequests import get_proxy_cycles, get_proxies
|
||||
|
||||
|
||||
CONFIG = {'http': ['http://localhost:9090', 'http://localhost:9092'],
|
||||
'https': ['http://localhost:9091', 'http://localhost:9093']}
|
||||
|
||||
|
||||
class TestProxy(SearxTestCase):
|
||||
|
||||
def test_noconfig(self):
|
||||
cycles = get_proxy_cycles(None)
|
||||
self.assertIsNone(cycles)
|
||||
|
||||
cycles = get_proxy_cycles(False)
|
||||
self.assertIsNone(cycles)
|
||||
|
||||
def test_oldconfig(self):
|
||||
config = {
|
||||
'http': 'http://localhost:9090',
|
||||
'https': 'http://localhost:9091',
|
||||
}
|
||||
cycles = get_proxy_cycles(config)
|
||||
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||
|
||||
def test_one_proxy(self):
|
||||
config = {
|
||||
'http': ['http://localhost:9090'],
|
||||
'https': ['http://localhost:9091'],
|
||||
}
|
||||
cycles = get_proxy_cycles(config)
|
||||
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||
|
||||
def test_multiple_proxies(self):
|
||||
cycles = get_proxy_cycles(CONFIG)
|
||||
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||
self.assertEqual(next(cycles['http']), 'http://localhost:9092')
|
||||
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||
self.assertEqual(next(cycles['https']), 'http://localhost:9093')
|
||||
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||
|
||||
def test_getproxies_none(self):
|
||||
self.assertIsNone(get_proxies(None))
|
||||
|
||||
def test_getproxies_config(self):
|
||||
cycles = get_proxy_cycles(CONFIG)
|
||||
self.assertEqual(get_proxies(cycles), {
|
||||
'http': 'http://localhost:9090',
|
||||
'https': 'http://localhost:9091'
|
||||
})
|
||||
self.assertEqual(get_proxies(cycles), {
|
||||
'http': 'http://localhost:9092',
|
||||
'https': 'http://localhost:9093'
|
||||
})
|
||||
|
||||
@patch('searx.poolrequests.get_global_proxies')
|
||||
def test_request(self, mock_get_global_proxies):
|
||||
method = 'GET'
|
||||
url = 'http://localhost'
|
||||
custom_proxies = {
|
||||
'https': 'http://localhost:1080'
|
||||
}
|
||||
global_proxies = {
|
||||
'http': 'http://localhost:9092',
|
||||
'https': 'http://localhost:9093'
|
||||
}
|
||||
mock_get_global_proxies.return_value = global_proxies
|
||||
|
||||
# check the global proxies usage
|
||||
with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method:
|
||||
searx.poolrequests.request(method, url)
|
||||
mock_method.assert_called_once_with(method=method, url=url, proxies=global_proxies)
|
||||
|
||||
# check if the proxies parameter overrides the global proxies
|
||||
with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method:
|
||||
searx.poolrequests.request(method, url, proxies=custom_proxies)
|
||||
mock_method.assert_called_once_with(method=method, url=url, proxies=custom_proxies)
|
|
@ -1,9 +1,11 @@
|
|||
from searx import settings
|
||||
from searx.engines import load_engines
|
||||
from mock import patch
|
||||
|
||||
from searx.search import initialize
|
||||
from searx.query import RawTextQuery
|
||||
from searx.testing import SearxTestCase
|
||||
|
||||
import searx.engines
|
||||
|
||||
|
||||
TEST_ENGINES = [
|
||||
{
|
||||
|
@ -279,6 +281,10 @@ class TestBang(SearxTestCase):
|
|||
self.assertEqual(query.getQuery(), '!dum the query')
|
||||
|
||||
def test_bang_autocomplete_empty(self):
|
||||
load_engines(settings['engines'])
|
||||
query = RawTextQuery('the query !', [])
|
||||
self.assertEqual(query.autocomplete_list, ['!images', '!wikipedia', '!osm'])
|
||||
with patch.object(searx.engines, 'initialize_engines', searx.engines.load_engines):
|
||||
initialize()
|
||||
query = RawTextQuery('the query !', [])
|
||||
self.assertEqual(query.autocomplete_list, ['!images', '!wikipedia', '!osm'])
|
||||
|
||||
query = RawTextQuery('the query ?', ['osm'])
|
||||
self.assertEqual(query.autocomplete_list, ['?images', '?wikipedia'])
|
||||
|
|
|
@ -3,20 +3,16 @@
|
|||
import json
|
||||
from urllib.parse import ParseResult
|
||||
from mock import Mock
|
||||
|
||||
import searx.search.processors
|
||||
from searx.testing import SearxTestCase
|
||||
from searx.search import Search
|
||||
import searx.engines
|
||||
|
||||
|
||||
class ViewsTestCase(SearxTestCase):
|
||||
|
||||
def setUp(self):
|
||||
# skip init function (no external HTTP request)
|
||||
def dummy(*args, **kwargs):
|
||||
pass
|
||||
|
||||
self.setattr4test(searx.search.processors, 'initialize_processor', dummy)
|
||||
self.setattr4test(searx.engines, 'initialize_engines', searx.engines.load_engines)
|
||||
|
||||
from searx import webapp # pylint disable=import-outside-toplevel
|
||||
|
||||
|
|
Loading…
Reference in New Issue