tests for _fetch_supported_languages in engines

and refactor method to make it testable without making requests
This commit is contained in:
marc 2016-12-15 00:34:43 -06:00
parent e0c270bd72
commit af35eee10b
27 changed files with 387 additions and 3388 deletions

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -21,6 +21,7 @@ import sys
from flask_babel import gettext from flask_babel import gettext
from operator import itemgetter from operator import itemgetter
from json import loads from json import loads
from requests import get
from searx import settings from searx import settings
from searx import logger from searx import logger
from searx.utils import load_module from searx.utils import load_module
@ -79,9 +80,6 @@ def load_engine(engine_data):
if not hasattr(engine, arg_name): if not hasattr(engine, arg_name):
setattr(engine, arg_name, arg_value) setattr(engine, arg_name, arg_value)
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
# checking required variables # checking required variables
for engine_attr in dir(engine): for engine_attr in dir(engine):
if engine_attr.startswith('_'): if engine_attr.startswith('_'):
@ -91,6 +89,15 @@ def load_engine(engine_data):
.format(engine.name, engine_attr)) .format(engine.name, engine_attr))
sys.exit(1) sys.exit(1)
# assign supported languages from json file
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
setattr(engine, 'fetch_supported_languages',
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
engine.stats = { engine.stats = {
'result_count': 0, 'result_count': 0,
'search_count': 0, 'search_count': 0,

View File

@ -15,7 +15,6 @@
from urllib import urlencode from urllib import urlencode
from lxml import html from lxml import html
from requests import get
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
# engine dependent config # engine dependent config
@ -86,10 +85,9 @@ def response(resp):
# get supported languages from their site # get supported languages from their site
def fetch_supported_languages(): def _fetch_supported_languages(resp):
supported_languages = [] supported_languages = []
response = get(supported_languages_url) dom = html.fromstring(resp.text)
dom = html.fromstring(response.text)
options = dom.xpath('//div[@id="limit-languages"]//input') options = dom.xpath('//div[@id="limit-languages"]//input')
for option in options: for option in options:
code = option.xpath('./@id')[0].replace('_', '-') code = option.xpath('./@id')[0].replace('_', '-')

View File

@ -19,7 +19,7 @@ from urllib import urlencode
from lxml import html from lxml import html
from json import loads from json import loads
import re import re
from searx.engines.bing import fetch_supported_languages from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']

View File

@ -17,7 +17,7 @@ from datetime import datetime
from dateutil import parser from dateutil import parser
from lxml import etree from lxml import etree
from searx.utils import list_get from searx.utils import list_get
from searx.engines.bing import fetch_supported_languages from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config # engine dependent config
categories = ['news'] categories = ['news']

View File

@ -80,11 +80,10 @@ def response(resp):
# get supported languages from their site # get supported languages from their site
def fetch_supported_languages(): def _fetch_supported_languages(resp):
supported_languages = {} supported_languages = {}
response = get(supported_languages_url) response_json = loads(resp.text)
response_json = loads(response.text)
for language in response_json['list']: for language in response_json['list']:
supported_languages[language['code']] = {} supported_languages[language['code']] = {}

View File

@ -119,11 +119,10 @@ def response(resp):
# get supported languages from their site # get supported languages from their site
def fetch_supported_languages(): def _fetch_supported_languages(resp):
response = get(supported_languages_url)
# response is a js file with regions as an embedded object # response is a js file with regions as an embedded object
response_page = response.text response_page = resp.text
response_page = response_page[response_page.find('regions:{') + 8:] response_page = response_page[response_page.find('regions:{') + 8:]
response_page = response_page[:response_page.find('}') + 1] response_page = response_page[:response_page.find('}') + 1]

View File

@ -4,7 +4,7 @@ from re import compile, sub
from lxml import html from lxml import html
from searx.utils import html_to_text from searx.utils import html_to_text
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import fetch_supported_languages from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
url = 'https://api.duckduckgo.com/'\ url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

View File

@ -14,7 +14,6 @@ from json import loads
from random import randint from random import randint
from time import time from time import time
from urllib import urlencode from urllib import urlencode
from requests import get
from lxml.html import fromstring from lxml.html import fromstring
# engine dependent config # engine dependent config
@ -91,10 +90,9 @@ def response(resp):
# get supported languages from their site # get supported languages from their site
def fetch_supported_languages(): def _fetch_supported_languages(resp):
supported_languages = [] supported_languages = []
response = get(supported_languages_url) dom = fromstring(resp.text)
dom = fromstring(response.text)
links = dom.xpath('//span[@id="menu2"]/a') links = dom.xpath('//span[@id="menu2"]/a')
for link in links: for link in links:
code = link.xpath('./@href')[0][-2:] code = link.xpath('./@href')[0][-2:]

View File

@ -12,7 +12,6 @@ import re
from urllib import urlencode from urllib import urlencode
from urlparse import urlparse, parse_qsl from urlparse import urlparse, parse_qsl
from lxml import html, etree from lxml import html, etree
from requests import get
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.search import logger from searx.search import logger
@ -364,14 +363,13 @@ def attributes_to_html(attributes):
# get supported languages from their site # get supported languages from their site
def fetch_supported_languages(): def _fetch_supported_languages(resp):
supported_languages = {} supported_languages = {}
response = get(supported_languages_url) dom = html.fromstring(resp.text)
dom = html.fromstring(response.text) options = dom.xpath('//table//td/font/label/span')
options = dom.xpath('//select[@name="hl"]/option')
for option in options: for option in options:
code = option.xpath('./@value')[0].split('-')[0] code = option.xpath('./@id')[0][1:]
name = option.text[:-1].title() name = option.text.title()
supported_languages[code] = {"name": name} supported_languages[code] = {"name": name}
return supported_languages return supported_languages

View File

@ -13,7 +13,7 @@
from lxml import html from lxml import html
from urllib import urlencode from urllib import urlencode
from json import loads from json import loads
from searx.engines.google import fetch_supported_languages from searx.engines.google import _fetch_supported_languages, supported_languages_url
# search-url # search-url
categories = ['news'] categories = ['news']

View File

@ -13,7 +13,6 @@
from json import loads from json import loads
from urllib import urlencode, unquote from urllib import urlencode, unquote
import re import re
from requests import get
from lxml.html import fromstring from lxml.html import fromstring
# engine dependent config # engine dependent config
@ -25,6 +24,8 @@ language_support = True
base_url = 'https://swisscows.ch/' base_url = 'https://swisscows.ch/'
search_string = '?{query}&page={page}' search_string = '?{query}&page={page}'
supported_languages_url = base_url
# regex # regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment') regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*') regex_json_remove_start = re.compile(r'^initialData:\s*')
@ -113,10 +114,9 @@ def response(resp):
# get supported languages from their site # get supported languages from their site
def fetch_supported_languages(): def _fetch_supported_languages(resp):
supported_languages = [] supported_languages = []
response = get(base_url) dom = fromstring(resp.text)
dom = fromstring(response.text)
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a') options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
for option in options: for option in options:
code = option.xpath('./@data-val')[0] code = option.xpath('./@data-val')[0]

View File

@ -15,7 +15,7 @@ from searx import logger
from searx.poolrequests import get from searx.poolrequests import get
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import format_date_by_locale from searx.utils import format_date_by_locale
from searx.engines.wikipedia import fetch_supported_languages from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from json import loads from json import loads
from lxml.html import fromstring from lxml.html import fromstring

View File

@ -12,7 +12,6 @@
from json import loads from json import loads
from urllib import urlencode, quote from urllib import urlencode, quote
from requests import get
from lxml.html import fromstring from lxml.html import fromstring
@ -119,10 +118,9 @@ def response(resp):
# get supported languages from their site # get supported languages from their site
def fetch_supported_languages(): def _fetch_supported_languages(resp):
supported_languages = {} supported_languages = {}
response = get(supported_languages_url) dom = fromstring(resp.text)
dom = fromstring(response.text)
tables = dom.xpath('//table[contains(@class,"sortable")]') tables = dom.xpath('//table[contains(@class,"sortable")]')
for table in tables: for table in tables:
# exclude header row # exclude header row

View File

@ -14,7 +14,6 @@
from urllib import urlencode from urllib import urlencode
from urlparse import unquote from urlparse import unquote
from lxml import html from lxml import html
from requests import get
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
# engine dependent config # engine dependent config
@ -144,13 +143,12 @@ def response(resp):
# get supported languages from their site # get supported languages from their site
def fetch_supported_languages(): def _fetch_supported_languages(resp):
supported_languages = [] supported_languages = []
response = get(supported_languages_url) dom = html.fromstring(resp.text)
dom = html.fromstring(response.text)
options = dom.xpath('//div[@id="yschlang"]/span/label/input') options = dom.xpath('//div[@id="yschlang"]/span/label/input')
for option in options: for option in options:
code = option.xpath('./@value')[0][5:] code = option.xpath('./@value')[0][5:].replace('_', '-')
supported_languages.append(code) supported_languages.append(code)
return supported_languages return supported_languages

View File

@ -12,7 +12,7 @@
from urllib import urlencode from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url, fetch_supported_languages from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta from datetime import datetime, timedelta
import re import re
from dateutil import parser from dateutil import parser

View File

@ -3,36 +3,27 @@
# this file is generated automatically by utils/update_search_languages.py # this file is generated automatically by utils/update_search_languages.py
language_codes = ( language_codes = (
(u"ach", u"Acoli", u"", u""),
(u"af", u"Afrikaans", u"", u""), (u"af", u"Afrikaans", u"", u""),
(u"ak", u"Akan", u"", u""), (u"am", u"አማርኛ", u"", u"Amharic"),
(u"am", u"አማርኛ", u"", u""),
(u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"), (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
(u"az", u"Azərbaycanca", u"", u"Azerbaijani"), (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
(u"ban", u"Balinese", u"", u""),
(u"be", u"Беларуская", u"", u"Belarusian"), (u"be", u"Беларуская", u"", u"Belarusian"),
(u"bem", u"Ichibemba", u"", u""),
(u"bg-BG", u"Български", u"България", u"Bulgarian"), (u"bg-BG", u"Български", u"България", u"Bulgarian"),
(u"bn", u"বাংলা", u"", u""), (u"bn", u"বাংলা", u"", u"Bengali"),
(u"br", u"Brezhoneg", u"", u""), (u"br", u"Brezhoneg", u"", u"Breton"),
(u"bs", u"Bosanski", u"", u""), (u"bs", u"Bosnian", u"", u"Bosnian"),
(u"ca", u"Català", u"", u"Catalan"), (u"ca", u"Català", u"", u"Catalan"),
(u"ca-CT", u"Català", u"", u"Catalan"), (u"ca-CT", u"Català", u"", u"Catalan"),
(u"ca-ES", u"Català", u"Espanya", u"Catalan"), (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
(u"ce", u"Нохчийн", u"", u"Chechen"), (u"ce", u"Нохчийн", u"", u"Chechen"),
(u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"), (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
(u"chr", u"ᏣᎳᎩ", u"", u""),
(u"ckb", u"Central Kurdish", u"", u""),
(u"co", u"Corsican", u"", u""),
(u"crs", u"Seychellois Creole", u"", u""),
(u"cs-CZ", u"Čeština", u"Česko", u"Czech"), (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
(u"cy", u"Cymraeg", u"", u""), (u"cy", u"Cymraeg", u"", u"Welsh"),
(u"da-DK", u"Dansk", u"Danmark", u"Danish"), (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
(u"de", u"Deutsch", u"", u"German"), (u"de", u"Deutsch", u"", u"German"),
(u"de-AT", u"Deutsch", u"Österreich", u"German"), (u"de-AT", u"Deutsch", u"Österreich", u"German"),
(u"de-CH", u"Deutsch", u"Schweiz", u"German"), (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
(u"de-DE", u"Deutsch", u"Deutschland", u"German"), (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
(u"ee", u"Eʋegbe", u"", u""),
(u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"), (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
(u"en", u"English", u"", u"English"), (u"en", u"English", u"", u"English"),
(u"en-AU", u"English", u"Australia", u"English"), (u"en-AU", u"English", u"Australia", u"English"),
@ -60,30 +51,20 @@ language_codes = (
(u"eu", u"Euskara", u"", u"Basque"), (u"eu", u"Euskara", u"", u"Basque"),
(u"fa", u"فارسی", u"", u"Persian"), (u"fa", u"فارسی", u"", u"Persian"),
(u"fi-FI", u"Suomi", u"Suomi", u"Finnish"), (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
(u"fo", u"Føroyskt", u"", u""),
(u"fr", u"Français", u"", u"French"), (u"fr", u"Français", u"", u"French"),
(u"fr-BE", u"Français", u"Belgique", u"French"), (u"fr-BE", u"Français", u"Belgique", u"French"),
(u"fr-CA", u"Français", u"Canada", u"French"), (u"fr-CA", u"Français", u"Canada", u"French"),
(u"fr-CH", u"Français", u"Suisse", u"French"), (u"fr-CH", u"Français", u"Suisse", u"French"),
(u"fr-FR", u"Français", u"France", u"French"), (u"fr-FR", u"Français", u"France", u"French"),
(u"fy", u"West-Frysk", u"", u""), (u"ga", u"Gaeilge", u"", u"Irish"),
(u"ga", u"Gaeilge", u"", u""),
(u"gaa", u"Ga", u"", u""),
(u"gd", u"Gàidhlig", u"", u""),
(u"gl", u"Galego", u"", u"Galician"), (u"gl", u"Galego", u"", u"Galician"),
(u"gn", u"Guarani", u"", u""), (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
(u"gu", u"ગુજરાતી", u"", u""),
(u"ha", u"Hausa", u"", u""),
(u"haw", u"ʻŌlelo HawaiʻI", u"", u""),
(u"he-IL", u"עברית", u"ישראל", u"Hebrew"), (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
(u"hi", u"हिन्दी", u"", u"Hindi"), (u"hi", u"हिन्दी", u"", u"Hindi"),
(u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"), (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
(u"ht", u"Haitian Creole", u"", u""),
(u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"), (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
(u"hy", u"Հայերեն", u"", u"Armenian"), (u"hy", u"Հայերեն", u"", u"Armenian"),
(u"ia", u"Interlingua", u"", u""),
(u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"), (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
(u"ig", u"Igbo", u"", u""),
(u"is", u"Íslenska", u"", u""), (u"is", u"Íslenska", u"", u""),
(u"it", u"Italiano", u"", u"Italian"), (u"it", u"Italiano", u"", u"Italian"),
(u"it-CH", u"Italiano", u"Svizzera", u"Italian"), (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
@ -91,86 +72,48 @@ language_codes = (
(u"iw", u"עברית", u"", u""), (u"iw", u"עברית", u"", u""),
(u"ja-JP", u"日本語", u"日本", u"Japanese"), (u"ja-JP", u"日本語", u"日本", u"Japanese"),
(u"ka", u"ქართული", u"", u"Georgian"), (u"ka", u"ქართული", u"", u"Georgian"),
(u"kg", u"Kongo", u"", u""),
(u"kk", u"Қазақша", u"", u"Kazakh"), (u"kk", u"Қазақша", u"", u"Kazakh"),
(u"km", u"ខ្មែរ", u"", u""), (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
(u"kn", u"ಕನ್ನಡ", u"", u""),
(u"ko-KR", u"한국어", u"대한민국", u"Korean"), (u"ko-KR", u"한국어", u"대한민국", u"Korean"),
(u"kri", u"Krio", u"", u""),
(u"ky", u"Кыргызча", u"", u""),
(u"la", u"Latina", u"", u"Latin"), (u"la", u"Latina", u"", u"Latin"),
(u"lg", u"Luganda", u"", u""),
(u"ln", u"Lingála", u"", u""),
(u"lo", u"ລາວ", u"", u""),
(u"loz", u"Lozi", u"", u""),
(u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"), (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
(u"lua", u"Luba-Lulua", u"", u""),
(u"lv-LV", u"Latviešu", u"Latvijas Republika", u""), (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
(u"mfe", u"Kreol Morisien", u"", u""), (u"mi", u"Reo Māori", u"", u"Maori"),
(u"mg", u"Malagasy", u"", u""),
(u"mi", u"Maori", u"", u""),
(u"min", u"Minangkabau", u"", u"Minangkabau"), (u"min", u"Minangkabau", u"", u"Minangkabau"),
(u"mk", u"Македонски", u"", u""), (u"mk", u"Македонски", u"", u"Macedonian"),
(u"ml", u"മലയാളം", u"", u""), (u"mn", u"Монгол", u"", u"Mongolian"),
(u"mn", u"Монгол", u"", u""), (u"mr", u"मराठी", u"", u"Marathi"),
(u"mr", u"मराठी", u"", u""),
(u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"), (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
(u"mt", u"Malti", u"", u""), (u"mt", u"Malti", u"", u"Maltese"),
(u"my", u"ဗမာ", u"", u""),
(u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"), (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
(u"ne", u"नेपाली", u"", u""),
(u"nl", u"Nederlands", u"", u"Dutch"), (u"nl", u"Nederlands", u"", u"Dutch"),
(u"nl-BE", u"Nederlands", u"België", u"Dutch"), (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
(u"nn", u"Nynorsk", u"", u"Norwegian"), (u"nn", u"Nynorsk", u"", u"Norwegian"),
(u"no-NO", u"Norsk", u"Norge", u"Norwegian"), (u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
(u"nso", u"Northern Sotho", u"", u""), (u"oc", u"Occitan", u"", u"Occitan"),
(u"ny", u"Nyanja", u"", u""), (u"or", u"Oriya", u"", u"Oriya"),
(u"nyn", u"Runyankore", u"", u""), (u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
(u"oc", u"Occitan", u"", u""),
(u"om", u"Oromoo", u"", u""),
(u"or", u"ଓଡ଼ିଆ", u"", u""),
(u"pa", u"ਪੰਜਾਬੀ", u"", u""),
(u"pcm", u"Nigerian Pidgin", u"", u""),
(u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"), (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
(u"ps", u"پښتو", u"", u""), (u"ps", u"Pushto", u"", u"Pushto"),
(u"pt", u"Português", u"", u"Portuguese"), (u"pt", u"Português", u"", u"Portuguese"),
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"), (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
(u"pt-PT", u"Português", u"Portugal", u"Portuguese"), (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
(u"qu", u"Runasimi", u"", u""),
(u"rm", u"Rumantsch", u"", u""),
(u"rn", u"Ikirundi", u"", u""),
(u"ro-RO", u"Română", u"România", u"Romanian"), (u"ro-RO", u"Română", u"România", u"Romanian"),
(u"ru-RU", u"Русский", u"Россия", u"Russian"), (u"ru-RU", u"Русский", u"Россия", u"Russian"),
(u"rw", u"Kinyarwanda", u"", u""), (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
(u"sd", u"Sindhi", u"", u""),
(u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"), (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
(u"si", u"සිංහල", u"", u""),
(u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"), (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
(u"sl", u"Slovenščina", u"", u"Slovenian"), (u"sl", u"Slovenščina", u"", u"Slovenian"),
(u"sn", u"Chishona", u"", u""),
(u"so", u"Soomaali", u"", u""),
(u"sq", u"Shqip", u"", u""),
(u"sr", u"Српски / Srpski", u"", u"Serbian"), (u"sr", u"Српски / Srpski", u"", u"Serbian"),
(u"st", u"Southern Sotho", u"", u""),
(u"su", u"Sundanese", u"", u""),
(u"sv-SE", u"Svenska", u"Sverige", u"Swedish"), (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
(u"sw", u"Kiswahili", u"", u""), (u"sw", u"Kiswahili", u"", u""),
(u"ta", u"தமிழ்", u"", u""), (u"ta", u"தமிழ்", u"", u"Tamil"),
(u"te", u"తెలుగు", u"", u""),
(u"tg", u"Tajik", u"", u""),
(u"th-TH", u"ไทย", u"ไทย", u"Thai"), (u"th-TH", u"ไทย", u"ไทย", u"Thai"),
(u"ti", u"ትግርኛ", u"", u""), (u"ti", u"ትግርኛ", u"", u"Tigrinya"),
(u"tk", u"Turkmen", u"", u""),
(u"tl-PH", u"Filipino", u"Pilipinas", u""), (u"tl-PH", u"Filipino", u"Pilipinas", u""),
(u"tlh", u"Klingon", u"", u""),
(u"tn", u"Tswana", u"", u""),
(u"to", u"Lea Fakatonga", u"", u""),
(u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"), (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
(u"tt", u"Tatar", u"", u""), (u"tt", u"Татарча", u"", u"Tatar"),
(u"tum", u"Tumbuka", u"", u""),
(u"tw", u"Twi", u"", u""),
(u"ug", u"ئۇيغۇرچە", u"", u""),
(u"uk-UA", u"Українська", u"Україна", u"Ukrainian"), (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
(u"ur", u"اردو", u"", u"Urdu"), (u"ur", u"اردو", u"", u"Urdu"),
(u"uz", u"Ozbek", u"", u"Uzbek"), (u"uz", u"Ozbek", u"", u"Uzbek"),
@ -179,13 +122,10 @@ language_codes = (
(u"vo", u"Volapük", u"", u"Volapük"), (u"vo", u"Volapük", u"", u"Volapük"),
(u"wa", u"Walon", u"", u"Walloon"), (u"wa", u"Walon", u"", u"Walloon"),
(u"war", u"Winaray", u"", u"Waray-Waray"), (u"war", u"Winaray", u"", u"Waray-Waray"),
(u"wo", u"Wolof", u"", u""), (u"xh", u"Xhosa", u"", u"Xhosa"),
(u"xh", u"Xhosa", u"", u""),
(u"yi", u"ייִדיש", u"", u""),
(u"yo", u"Èdè Yorùbá", u"", u""),
(u"zh", u"中文", u"", u"Chinese"), (u"zh", u"中文", u"", u"Chinese"),
(u"zh-CN", u"中文", u"中国", u"Chinese"), (u"zh-CN", u"中文", u"中国", u""),
(u"zh-HK", u"中文", u"香港", u"Chinese"), (u"zh-HK", u"中文", u"香港", u"Chinese"),
(u"zh-TW", u"中文", u"台湾", u"Chinese"), (u"zh-TW", u"中文", u"台湾", u""),
(u"zu", u"Isizulu", u"", u"") (u"zu", u"Isi-Zulu", u"", u"Zulu")
) )

View File

@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This should be the title') self.assertEqual(results[0]['title'], 'This should be the title')
self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
self.assertEqual(results[0]['content'], 'This should be the content.') self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = bing._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<body>
<form>
<div id="limit-languages">
<div>
<div><input id="es" value="es"></input></div>
</div>
<div>
<div><input id="pt_BR" value="pt_BR"></input></div>
<div><input id="pt_PT" value="pt_PT"></input></div>
</div>
</div>
</form>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = bing._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('es', languages)
self.assertIn('pt-BR', languages)
self.assertIn('pt-PT', languages)

View File

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from collections import defaultdict from collections import defaultdict
import mock import mock
from searx.engines import dailymotion from searx.engines import dailymotion
@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
results = dailymotion.response(response) results = dailymotion.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
json = r"""
{"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
"localized_name":"Afrikaans","display_name":"Afrikaans"},
{"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
"localized_name":"Arabic","display_name":"Arabic"},
{"code":"la","name":"Latin","native_name":null,
"localized_name":"Latin","display_name":"Latin"}
]}
"""
response = mock.Mock(text=json)
languages = dailymotion._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('af', languages)
self.assertIn('ar', languages)
self.assertIn('la', languages)
self.assertEqual(type(languages['af']), dict)
self.assertEqual(type(languages['ar']), dict)
self.assertEqual(type(languages['la']), dict)
self.assertIn('name', languages['af'])
self.assertIn('name', languages['ar'])
self.assertNotIn('name', languages['la'])
self.assertIn('english_name', languages['af'])
self.assertIn('english_name', languages['ar'])
self.assertIn('english_name', languages['la'])
self.assertEqual(languages['af']['name'], 'Afrikaans')
self.assertEqual(languages['af']['english_name'], 'Afrikaans')
self.assertEqual(languages['ar']['name'], u'العربية')
self.assertEqual(languages['ar']['english_name'], 'Arabic')
self.assertEqual(languages['la']['english_name'], 'Latin')

View File

@ -84,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This is the title') self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű') self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
self.assertEqual(results[0]['content'], 'This should be the content.') self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
js = """some code...regions:{
"wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
}some more code..."""
response = mock.Mock(text=js)
languages = duckduckgo._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 5)
self.assertIn('wt-WT', languages)
self.assertIn('es-AR', languages)
self.assertIn('en-AU', languages)
self.assertIn('de-AT', languages)
self.assertIn('fr-BE', languages)

View File

@ -89,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'South by Southwest 2016') self.assertEqual(results[0]['title'], 'South by Southwest 2016')
self.assertEqual(results[0]['url'], 'www.sxsw.com') self.assertEqual(results[0]['url'], 'www.sxsw.com')
self.assertEqual(results[0]['content'], 'This should be the content.') self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = gigablast._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<body>
<span id="menu2">
<a href="/search?&rxikd=1&qlang=xx"></a>
<a href="/search?&rxikd=1&qlang=en"></a>
<a href="/search?&rxikd=1&qlang=fr"></a>
</span>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = gigablast._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 2)
self.assertIn('en', languages)
self.assertIn('fr', languages)

View File

@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
self.assertEqual(results[0]['title'], '') self.assertEqual(results[0]['title'], '')
self.assertEqual(results[0]['content'], '') self.assertEqual(results[0]['content'], '')
self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg') self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
languages = google._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 0)
html = u"""
<html>
<body>
<table>
<tbody>
<tr>
<td>
<font>
<label>
<span id="ten">English</span>
</label>
</font>
</td>
<td>
<font>
<label>
<span id="tzh-CN">中文 (简体)</span>
</label>
<label>
<span id="tzh-TW">中文 (繁體)</span>
</label>
</font>
</td>
</tr>
</tbody>
</table>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = google._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('en', languages)
self.assertIn('zh-CN', languages)
self.assertIn('zh-TW', languages)
self.assertEquals(type(languages['en']), dict)
self.assertEquals(type(languages['zh-CN']), dict)
self.assertEquals(type(languages['zh-TW']), dict)
self.assertIn('name', languages['en'])
self.assertIn('name', languages['zh-CN'])
self.assertIn('name', languages['zh-TW'])
self.assertEquals(languages['en']['name'], 'English')
self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

View File

@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg') self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png') self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
self.assertEqual(results[2]['template'], 'images.html') self.assertEqual(results[2]['template'], 'images.html')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
languages = swisscows._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 0)
html = """
<html>
<div id="regions-popup">
<div>
<ul>
<li><a data-val="browser"></a></li>
<li><a data-val="de-CH"></a></li>
<li><a data-val="fr-CH"></a></li>
</ul>
</div>
</div>
</html>
"""
response = mock.Mock(text=html)
languages = swisscows._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('de-CH', languages)
self.assertIn('fr-CH', languages)

View File

@ -164,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
self.assertEqual(len(results), 2) self.assertEqual(len(results), 2)
self.assertEqual(results[1]['infobox'], u'披頭四樂隊') self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
self.assertIn(u'披头士乐队...', results[1]['content']) self.assertIn(u'披头士乐队...', results[1]['content'])
def test_fetch_supported_languages(self):
html = u"""<html></html>"""
response = mock.Mock(text=html)
languages = wikipedia._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 0)
html = u"""
<html>
<body>
<div>
<div>
<h3>Table header</h3>
<table class="sortable jquery-tablesorter">
<thead>
<tr>
<th>N</th>
<th>Language</th>
<th>Language (local)</th>
<th>Wiki</th>
<th>Articles</th>
</tr>
</thead>
<tbody>
<tr>
<td>2</td>
<td><a>Swedish</a></td>
<td><a>Svenska</a></td>
<td><a>sv</a></td>
<td><a><b>3000000</b></a></td>
</tr>
<tr>
<td>3</td>
<td><a>Cebuano</a></td>
<td><a>Sinugboanong Binisaya</a></td>
<td><a>ceb</a></td>
<td><a><b>3000000</b></a></td>
</tr>
</tbody>
</table>
<h3>Table header</h3>
<table class="sortable jquery-tablesorter">
<thead>
<tr>
<th>N</th>
<th>Language</th>
<th>Language (local)</th>
<th>Wiki</th>
<th>Articles</th>
</tr>
</thead>
<tbody>
<tr>
<td>2</td>
<td><a>Norwegian (Bokmål)</a></td>
<td><a>Norsk (Bokmål)</a></td>
<td><a>no</a></td>
<td><a><b>100000</b></a></td>
</tr>
</tbody>
</table>
</div>
</div>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = wikipedia._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('sv', languages)
self.assertIn('ceb', languages)
self.assertIn('no', languages)
self.assertEqual(type(languages['sv']), dict)
self.assertEqual(type(languages['ceb']), dict)
self.assertEqual(type(languages['no']), dict)
self.assertIn('name', languages['sv'])
self.assertIn('english_name', languages['sv'])
self.assertIn('articles', languages['sv'])
self.assertEqual(languages['sv']['name'], 'Svenska')
self.assertEqual(languages['sv']['english_name'], 'Swedish')
self.assertEqual(languages['sv']['articles'], 3000000)
self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
self.assertEqual(languages['ceb']['articles'], 3000000)
self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
self.assertEqual(languages['no']['articles'], 100000)

View File

@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
results = yahoo.response(response) results = yahoo.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = yahoo._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<div>
<div id="yschlang">
<span>
<label><input value="lang_ar"></input></label>
</span>
<span>
<label><input value="lang_zh_chs"></input></label>
<label><input value="lang_zh_cht"></input></label>
</span>
</div>
</div>
</html>
"""
response = mock.Mock(text=html)
languages = yahoo._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('ar', languages)
self.assertIn('zh-chs', languages)
self.assertIn('zh-cht', languages)

View File

@ -84,7 +84,7 @@ def fetch_supported_languages():
# write json file # write json file
f = io.open(engines_languages_file, "w", encoding="utf-8") f = io.open(engines_languages_file, "w", encoding="utf-8")
f.write(unicode(dumps(engines_languages, indent=4, ensure_ascii=False, encoding="utf-8"))) f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
f.close() f.close()
@ -110,18 +110,22 @@ def join_language_lists():
else: else:
languages[locale] = {} languages[locale] = {}
# get locales that have no name yet # get locales that have no name or country yet
for locale in languages.keys(): for locale in languages.keys():
if not languages[locale].get('name'): if not languages[locale].get('name'):
# try to get language and country names # try to get language names
name = languages.get(locale.split('-')[0], {}).get('name', None) name = languages.get(locale.split('-')[0], {}).get('name', None)
if name: if name:
languages[locale]['name'] = name languages[locale]['name'] = name
languages[locale]['country'] = get_country_name(locale) or ''
languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '') languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
else: else:
# filter out locales with no name # filter out locales with no name
del languages[locale] del languages[locale]
continue
# try to get country name
if locale.find('-') > 0 and not languages[locale].get('country'):
languages[locale]['country'] = get_country_name(locale) or ''
# Remove countryless language if language is featured in only one country. # Remove countryless language if language is featured in only one country.