mirror of https://github.com/searx/searx
37 lines
1.0 KiB
Python
37 lines
1.0 KiB
Python
'''
|
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
|
'''
|
|
|
|
from hashlib import md5
|
|
from os.path import join
|
|
from urllib.parse import urlparse
|
|
from searx import searx_dir
|
|
|
|
name = "Ahmia blacklist"
|
|
description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
|
|
default_on = True
|
|
preference_section = 'onions'
|
|
|
|
ahmia_blacklist = None
|
|
|
|
|
|
def get_ahmia_blacklist():
|
|
global ahmia_blacklist
|
|
if not ahmia_blacklist:
|
|
with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f:
|
|
ahmia_blacklist = f.read().split()
|
|
return ahmia_blacklist
|
|
|
|
|
|
def not_blacklisted(result):
|
|
if not result.get('is_onion'):
|
|
return True
|
|
result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest()
|
|
return result_hash not in get_ahmia_blacklist()
|
|
|
|
|
|
def post_search(request, search):
|
|
filtered_results = list(filter(not_blacklisted, search.result_container._merged_results))
|
|
search.result_container._merged_results = filtered_results
|
|
return True
|