Created new plugin type custom_results. Added new plugin bang_redirect (#2027)

* Made first attempt at the bangs redirects plugin.

* It redirects. But in a messy way via javascript.

* First version with custom plugin

* Added a help page and a operator to see all the bangs available.

* Changed to .format because of support

* Changed to .format because of support

* Removed : in params

* Fixed path to json file and changed bang operator

* Changed bang operator back to &

* Made first attempt at the bangs redirects plugin.

* It redirects. But in a messy way via javascript.

* First version with custom plugin

* Added a help page and a operator to see all the bangs available.

* Changed to .format because of support

* Changed to .format because of support

* Removed : in params

* Fixed path to json file and changed bang operator

* Changed bang operator back to &

* Refactored getting search query. Also changed bang operator to ! and is now working.

* Removed prints

* Removed temporary bangs_redirect.js file. Updated plugin documentation

* Added unit test for the bangs plugin

* Fixed a unit test and added 2 more for bangs plugin

* Changed back to default settings.yml

* Added myself to AUTHORS.rst

* Refacored working of custom plugin.

* Refactored _get_bangs_data from list to dict to improve search speed.

* Decoupled bangs plugin from webserver with redirect_url

* Refactored bangs unit tests

* Fixed unit test bangs. Removed dubbel parsing in bangs.py

* Removed a dumb print statement

* Refactored bangs plugin to core engine.

* Removed bangs plugin.

* Refactored external bangs unit tests from plugin to core.

* Removed custom_results/bangs documentation from plugins.rst

* Added newline in settings.yml so the PR stays clean.

* Changed searx/plugins/__init__.py back to the old file

* Removed newline search.py

* Refactored get_external_bang_operator from utils to external_bang.py

* Removed unnecessary import form test_plugins.py

* Removed _parseExternalBang and _isExternalBang from query.py

* Removed get_external_bang_operator since it was not necessary

* Simplified external_bang.py

* Simplified external_bang.py

* Moved external_bangs unit tests to test_webapp.py. Fixed return in search with external_bang

* Refactored query parsing to unicode to support python2

* Refactored query parsing to unicode to support python2

* Refactored bangs plugin to core engine.

* Refactored search parameter to search_query in external_bang.py
This commit is contained in:
Lukas van den Berk 2020-07-03 15:25:04 +02:00 committed by GitHub
parent c21220c671
commit 4829a76aae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 75458 additions and 6 deletions

View File

@ -124,3 +124,4 @@ generally made searx better:
- @CaffeinatedTech
- Robin Schneider @ypid
- @splintah
- Lukas van den Berk @lukasvdberk

View File

@ -30,6 +30,14 @@ Example plugin
ctx['search'].suggestions.add('example')
return True
Register your plugin
====================
To enable your plugin register your plugin in
searx > plugin > __init__.py.
And at the bottom of the file add your plugin like.
``plugins.register(name_of_python_file)``
Plugin entry points
===================

75351
searx/data/bangs.json Normal file

File diff suppressed because it is too large Load Diff

43
searx/external_bang.py Normal file
View File

@ -0,0 +1,43 @@
import json
from os.path import join
from searx import searx_dir
# bangs data coming from the following url convert to json with
# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
# https://pseitz.github.io/toml-to-json-online-converter/
# NOTE only use the get_bang_url
bangs_data = {}
with open(join(searx_dir, 'data/bangs.json')) as json_file:
for bang in json.load(json_file)['bang']:
for trigger in bang["triggers"]:
bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
def get_bang_url(search_query):
"""
Redirects if the user supplied a correct bang search.
:param search_query: This is a search_query object which contains preferences and the submitted queries.
:return: None if the bang was invalid, else a string of the redirect url.
"""
if search_query.external_bang:
query = search_query.query.decode('utf-8', 'ignore')
bang = _get_bang(search_query.external_bang)
if bang and query:
# TODO add region support.
bang_url = bang["regions"]["default"]
return bang_url.replace("{{{term}}}", query)
return None
def _get_bang(user_bang):
"""
Searches if the supplied user bang is available. Returns None if not found.
:param user_bang: The parsed user bang. For example yt
:return: Returns a dict with bangs data (check bangs_data.json for the structure)
"""
return bangs_data.get(user_bang)

View File

@ -44,10 +44,11 @@ class RawTextQuery(object):
self.engines = []
self.languages = []
self.timeout_limit = None
self.external_bang = None
self.specific = False
# parse query, if tags are set, which
# change the serch engine or search-language
# change the search engine or search-language
def parse_query(self):
self.query_parts = []
@ -120,6 +121,11 @@ class RawTextQuery(object):
self.languages.append(lang)
parse_next = True
# external bang
if query_part[0:2] == "!!":
self.external_bang = query_part[2:]
parse_next = True
continue
# this force a engine or category
if query_part[0] == '!' or query_part[0] == '?':
prefix = query_part[1:].replace('-', ' ').replace('_', ' ')
@ -178,7 +184,7 @@ class SearchQuery(object):
"""container for all the search parameters (query, language, etc...)"""
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
timeout_limit=None, preferences=None):
timeout_limit=None, preferences=None, external_bang=None):
self.query = query.encode('utf-8')
self.engines = engines
self.categories = categories
@ -188,6 +194,7 @@ class SearchQuery(object):
self.time_range = None if time_range in ('', 'None', None) else time_range
self.timeout_limit = timeout_limit
self.preferences = preferences
self.external_bang = external_bang
def __str__(self):
return str(self.query) + ";" + str(self.engines)

View File

@ -138,6 +138,7 @@ class ResultContainer(object):
self.paging = False
self.unresponsive_engines = set()
self.timings = []
self.redirect_url = None
def extend(self, engine_name, results):
for result in list(results):

View File

@ -20,6 +20,8 @@ import sys
import threading
from time import time
from uuid import uuid4
import six
from flask_babel import gettext
import requests.exceptions
import searx.poolrequests as requests_lib
@ -27,6 +29,7 @@ from searx.engines import (
categories, engines, settings
)
from searx.answerers import ask
from searx.external_bang import get_bang_url
from searx.utils import gen_useragent
from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
from searx.results import ResultContainer
@ -54,6 +57,7 @@ else:
else:
logger.critical('outgoing.max_request_timeout if defined has to be float')
from sys import exit
exit(1)
@ -397,15 +401,16 @@ def get_search_query_from_webapp(preferences, form):
if (engine.name, categ) not in disabled_engines)
query_engines = deduplicate_query_engines(query_engines)
external_bang = raw_text_query.external_bang
return (SearchQuery(query, query_engines, query_categories,
query_lang, query_safesearch, query_pageno,
query_time_range, query_timeout, preferences),
query_time_range, query_timeout, preferences,
external_bang=external_bang),
raw_text_query)
class Search(object):
"""Search information container"""
def __init__(self, search_query):
@ -419,6 +424,14 @@ class Search(object):
def search(self):
global number_of_searches
# Check if there is a external bang. After that we can stop because the search will terminate.
if self.search_query.external_bang:
self.result_container.redirect_url = get_bang_url(self.search_query)
# This means there was a valid bang and the
# rest of the search does not need to be continued
if isinstance(self.result_container.redirect_url, six.string_types):
return self.result_container
# start time
start_time = time()
@ -521,7 +534,6 @@ class Search(object):
class SearchWithPlugins(Search):
"""Similar to the Search class but call the plugins."""
def __init__(self, search_query, ordered_plugin_list, request):

View File

@ -575,7 +575,9 @@ def index():
search_query, raw_text_query = get_search_query_from_webapp(request.preferences, request.form)
# search = Search(search_query) # without plugins
search = SearchWithPlugins(search_query, request.user_plugins, request)
result_container = search.search()
except Exception as e:
# log exception
logger.exception('search error')
@ -592,6 +594,10 @@ def index():
if number_of_results < result_container.results_length():
number_of_results = 0
# checkin for a external bang
if result_container.redirect_url:
return redirect(result_container.redirect_url)
# UI
advanced_search = request.form.get('advanced_search', None)
@ -665,6 +671,7 @@ def index():
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8'))
response.headers.add('Content-Disposition', cont_disp)
return response
elif output_format == 'rss':
response_rss = render(
'opensearch_response_rss.xml',

View File

@ -110,3 +110,24 @@ class SearchTestCase(SearxTestCase):
search = searx.search.Search(search_query)
results = search.search()
self.assertEquals(results.results_length(), 1)
def test_external_bang(self):
search_query = searx.query.SearchQuery('yes yes',
[{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, None,
preferences=Preferences(['oscar'], ['general'], engines, [],),
external_bang="yt")
search = searx.search.Search(search_query)
results = search.search()
# For checking if the user redirected with the youtube external bang
self.assertTrue(results.redirect_url is not None)
search_query = searx.query.SearchQuery('youtube never gonna give you up',
[{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, None,
preferences=Preferences(['oscar'], ['general'], engines, []),)
search = searx.search.Search(search_query)
results = search.search()
# This should not redirect
self.assertTrue(results.redirect_url is None)

View File

@ -56,7 +56,8 @@ class ViewsTestCase(SearxTestCase):
results=test_results,
results_number=lambda: 3,
results_length=lambda: len(test_results),
get_timings=lambda: timings)
get_timings=lambda: timings,
redirect_url=None)
self.setattr4test(Search, 'search', search_mock)