mirror of https://github.com/searx/searx
Merge branch 'master' into onesearch-engine
This commit is contained in:
commit
0d28fd2efe
|
@ -61,7 +61,7 @@ our homepage_.
|
|||
.. _homepage: https://searx.github.io/searx
|
||||
|
||||
contact:
|
||||
openhub_ // twitter_ // IRC: #searx @ freenode
|
||||
openhub_ // twitter_ // IRC: #searx @ Libera (irc.libera.chat)
|
||||
|
||||
.. _openhub: https://www.openhub.net/p/searx
|
||||
.. _twitter: https://twitter.com/Searx_engine
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -664,7 +664,7 @@
|
|||
"ruble bielorús": "BYN",
|
||||
"běloruský rubl": "BYN",
|
||||
"hviderussiske rubler": "BYN",
|
||||
"weißrussischer rubel": "BYN",
|
||||
"belarussischer rubel": "BYN",
|
||||
"λευκορωσικό ρούβλι": "BYN",
|
||||
"belarusian ruble": "BYN",
|
||||
"belorusia rublo": "BYN",
|
||||
|
@ -1230,6 +1230,7 @@
|
|||
"birr etíope": "ETB",
|
||||
"بیر اتیوپی": "ETB",
|
||||
"etiopian birr": "ETB",
|
||||
"ביר אתיופי": "ETB",
|
||||
"etiopski bir": "ETB",
|
||||
"etióp birr": "ETB",
|
||||
"birr etiope": "ETB",
|
||||
|
@ -2911,6 +2912,7 @@
|
|||
"rupee nepal": "NPR",
|
||||
"尼泊尔卢比": "NPR",
|
||||
"niue dollar": "NUD",
|
||||
"dólar de niue": "NUD",
|
||||
"دلار نیووی": "NUD",
|
||||
"niuen dollari": "NUD",
|
||||
"доллар ниуэ": "NUD",
|
||||
|
@ -4168,7 +4170,7 @@
|
|||
"franc cfa d'africa centrala": "XAF",
|
||||
"franco cfa da áfrica central": "XAF",
|
||||
"franc cfa beac": "XAF",
|
||||
"франк кфа beac": "XAF",
|
||||
"центральноафриканский франк кфа": "XAF",
|
||||
"stredoafrický frank": "XAF",
|
||||
"orta afrika cfa frangı": "XAF",
|
||||
"центральноафриканський франк": "XAF",
|
||||
|
@ -4347,7 +4349,7 @@
|
|||
"franc cfa d'africa occidentala": "XOF",
|
||||
"franco cfa da áfrica ocidental": "XOF",
|
||||
"franc cfa bceao": "XOF",
|
||||
"франк кфа bceao": "XOF",
|
||||
"западноафриканский франк кфа": "XOF",
|
||||
"západoafrický frank": "XOF",
|
||||
"batı afrika cfa frangı": "XOF",
|
||||
"західноафриканський франк": "XOF",
|
||||
|
@ -5245,49 +5247,14 @@
|
|||
"валюта ботсваны": "BWP",
|
||||
"botswansk pula": "BWP",
|
||||
"போட்ஸ்வானா பூலா": "BWP",
|
||||
"الروبل البلاروسي": "BYN",
|
||||
"Br": [
|
||||
"ETB",
|
||||
"BYN"
|
||||
],
|
||||
"ruble de bielorússia": "BYN",
|
||||
"byr": "BYN",
|
||||
"hviderussisk rubel": "BYN",
|
||||
"belarus rubel": "BYN",
|
||||
"belarussischer rubel": "BYN",
|
||||
"weissrussischer rubel": "BYN",
|
||||
"ρούβλι λευκορωσίας": "BYN",
|
||||
"byn": "BYN",
|
||||
"belarusia rublo": "BYN",
|
||||
"belorusa rublo": "BYN",
|
||||
"belarusa rublo": "BYN",
|
||||
"rublo de belarus": "BYN",
|
||||
"rublo de belarús": "BYN",
|
||||
"rublo de bielorrusia": "BYN",
|
||||
"rouble bielorusse": "BYN",
|
||||
"רובל בלרוסי": "BYN",
|
||||
"bjeloruska rublja": "BYN",
|
||||
"rublo bielorusse": "BYN",
|
||||
"rublo": "BYN",
|
||||
"robla bielorussa": "BYN",
|
||||
"rublo da bielorrússia": "BYN",
|
||||
"rubla belarusă": "BYN",
|
||||
"rubla bielorusă": "BYN",
|
||||
"rublă bielorusă": "BYN",
|
||||
"byb": "BYN",
|
||||
"byl": "BYN",
|
||||
"валюта белоруссии": "BYN",
|
||||
"расчётный рубль белоруссии": "BYN",
|
||||
"бел руб": "BYN",
|
||||
"зайчик": "BYN",
|
||||
"рубль": [
|
||||
"RUB",
|
||||
"BYN"
|
||||
],
|
||||
"беларускі рубель": "BYN",
|
||||
"бјелоруска рубља": "BYN",
|
||||
"belarus rubel": "BYN",
|
||||
"br": "BYN",
|
||||
"beyaz rusya rublesi": "BYN",
|
||||
"nuevo rublo bierlorruso": "BYN",
|
||||
"білоруський рубль": "BYN",
|
||||
"Bz$": "BZD",
|
||||
"bz$": "BZD",
|
||||
|
@ -5795,8 +5762,9 @@
|
|||
"EGP"
|
||||
],
|
||||
"£e": "EGP",
|
||||
"e£": "EGP",
|
||||
"egp": "EGP",
|
||||
"e£": "EGP",
|
||||
"ج.م.": "EGP",
|
||||
"egiptoar libera": "EGP",
|
||||
"livre egyptienne": "EGP",
|
||||
"מטבע מצרים": "EGP",
|
||||
|
@ -5843,7 +5811,6 @@
|
|||
"etb": "ETB",
|
||||
"ethiopian dollar": "ETB",
|
||||
"birr éthiopien": "ETB",
|
||||
"ביר אתיופי": "ETB",
|
||||
"エチオピア・ブル": "ETB",
|
||||
"biras": "ETB",
|
||||
"birr da etiópia": "ETB",
|
||||
|
@ -7705,6 +7672,7 @@
|
|||
"rub": "RUB",
|
||||
"rubel": "RUB",
|
||||
"руб": "RUB",
|
||||
"ruble": "RUB",
|
||||
"rusa rublo": "RUB",
|
||||
"vene rubla": "RUB",
|
||||
"errusiar errublo": "RUB",
|
||||
|
@ -7722,6 +7690,7 @@
|
|||
"rubla rusă": "RUB",
|
||||
"rublă rusească": "RUB",
|
||||
"рр": "RUB",
|
||||
"рубль": "RUB",
|
||||
"валюта абхазии": "RUB",
|
||||
"валюта днр": "RUB",
|
||||
"валюта лнр": "RUB",
|
||||
|
@ -7735,7 +7704,6 @@
|
|||
"ryska rubler": "RUB",
|
||||
"sovjetisk rubel": "RUB",
|
||||
"ரஷ்ய ரூபிள்": "RUB",
|
||||
"ruble": "RUB",
|
||||
"ruble nga": "RUB",
|
||||
"rúp": "RUB",
|
||||
"俄國盧布": "RUB",
|
||||
|
@ -7835,6 +7803,7 @@
|
|||
"دينار سوداني": "SDG",
|
||||
"суданска лира": "SDG",
|
||||
"lliura del sudan": "SDG",
|
||||
"punt swdan": "SDG",
|
||||
"λίρα σουδάν": "SDG",
|
||||
"sdg": "SDG",
|
||||
"libra de sudán": "SDG",
|
||||
|
@ -8441,6 +8410,7 @@
|
|||
"u.s. dollar": "USD",
|
||||
"us$": "USD",
|
||||
"dolar estadounidense": "USD",
|
||||
"$ us": "USD",
|
||||
"dollar des états unis": "USD",
|
||||
"dollar étatsunien": "USD",
|
||||
"דולר אמריקני": "USD",
|
||||
|
@ -8609,6 +8579,7 @@
|
|||
],
|
||||
"franc centrafrican cfa": "XAF",
|
||||
"franc central african cfa": "XAF",
|
||||
"франк кфа beac": "XAF",
|
||||
"валюта габона": "XAF",
|
||||
"валюта камеруна": "XAF",
|
||||
"валюта республики конго": "XAF",
|
||||
|
@ -8616,7 +8587,6 @@
|
|||
"валюта чада": "XAF",
|
||||
"валюта экваториальной гвинеи": "XAF",
|
||||
"франк кфа веас": "XAF",
|
||||
"центральноафриканский франк кфа": "XAF",
|
||||
"centralafrikansk cfa franc": "XAF",
|
||||
"fcfa": "XAF",
|
||||
"மத்திய ஆப்பிரிக்க சி.எஃப்.ஏ பிராங்க்": "XAF",
|
||||
|
@ -8639,10 +8609,6 @@
|
|||
"χρυσάφι": "XAU",
|
||||
"element 79": "XAU",
|
||||
"elemento 79": "XAU",
|
||||
"aurifera": "XAU",
|
||||
"aurifero": "XAU",
|
||||
"aurífera": "XAU",
|
||||
"aurífero": "XAU",
|
||||
"زر": "XAU",
|
||||
"grundämne 79": "XAU",
|
||||
"பொன்": "XAU",
|
||||
|
@ -8659,7 +8625,6 @@
|
|||
"بیت کوین": "XBT",
|
||||
"cryptografisch geld": "XBT",
|
||||
"биткоин": "XBT",
|
||||
"bitkoin": "XBT",
|
||||
"比特幣": "XBT",
|
||||
"位元幣": "XBT",
|
||||
"EC$": "XCD",
|
||||
|
@ -8755,6 +8720,7 @@
|
|||
"frank cfa": "XOF",
|
||||
"frank zachodnioafrykański": "XOF",
|
||||
"franc cfa vest african": "XOF",
|
||||
"франк кфа bceao": "XOF",
|
||||
"валюта бенина": "XOF",
|
||||
"валюта буркина фасо": "XOF",
|
||||
"валюта гвинеи бисау": "XOF",
|
||||
|
@ -8764,7 +8730,6 @@
|
|||
"валюта сенегала": "XOF",
|
||||
"валюта того": "XOF",
|
||||
"западно африканский франк кфа": "XOF",
|
||||
"западноафриканский франк кфа": "XOF",
|
||||
"франк африканского финансового сообщества": "XOF",
|
||||
"франк кфа всеао": "XOF",
|
||||
"frank bceao/cfa": "XOF",
|
||||
|
@ -9728,41 +9693,39 @@
|
|||
"BYN": {
|
||||
"ar": "روبل بلاروسي",
|
||||
"bg": "Беларуска рубла",
|
||||
"ca": "ruble bielorús",
|
||||
"ca": "Ruble bielorús",
|
||||
"cs": "Běloruský rubl",
|
||||
"da": "Hviderussiske rubler",
|
||||
"de": "weißrussischer Rubel",
|
||||
"de": "Belarussischer Rubel",
|
||||
"el": "Λευκορωσικό ρούβλι",
|
||||
"en": "Belarusian ruble",
|
||||
"eo": "Belorusia rublo",
|
||||
"es": "rublo bielorruso",
|
||||
"es": "Nuevo rublo bierlorruso",
|
||||
"et": "Valgevene rubla",
|
||||
"fa": "روبل بلاروس",
|
||||
"fi": "Valko-Venäjän rupla",
|
||||
"fr": "rouble biélorusse",
|
||||
"fr": "Rouble biélorusse",
|
||||
"gl": "Rublo belaruso",
|
||||
"he": "רובל בלארוסי",
|
||||
"hr": "Bjeloruska rublja",
|
||||
"hu": "belarusz rubel",
|
||||
"it": "rublo bielorusso",
|
||||
"hr": "Bjeloruski rubalj",
|
||||
"hu": "Belarusz rubel",
|
||||
"it": "Rublo bielorusso",
|
||||
"ja": "ベラルーシ・ルーブル",
|
||||
"lt": "Baltarusijos rublis",
|
||||
"nl": "Wit-Russische roebel",
|
||||
"pl": "rubel białoruski",
|
||||
"pt": "rublo bielorrusso",
|
||||
"pl": "Rubel białoruski",
|
||||
"pt": "Rublo bielorrusso",
|
||||
"ro": "Rublă belarusă",
|
||||
"ru": "белорусский рубль",
|
||||
"sk": "Bieloruský rubeľ",
|
||||
"sl": "beloruski rubelj",
|
||||
"sr": "белоруска рубља",
|
||||
"sl": "Beloruski rubelj",
|
||||
"sr": "Белоруска рубља",
|
||||
"sv": "Belarusisk rubel",
|
||||
"ta": "பெலருசிய ரூபிள்",
|
||||
"tr": "Beyaz Rusya rublesi",
|
||||
"uk": "білоруський рубль",
|
||||
"tr": "Belarus rublesi",
|
||||
"uk": "білоруський рубель",
|
||||
"vi": "Rúp Belarus",
|
||||
"zh": "白俄羅斯盧布",
|
||||
"ia": "rublo bielorusse",
|
||||
"oc": "Robla bielorussa"
|
||||
"zh": "白俄羅斯盧布"
|
||||
},
|
||||
"BZD": {
|
||||
"ar": "دولار بليزي",
|
||||
|
@ -10415,6 +10378,7 @@
|
|||
"fa": "بیر اتیوپی",
|
||||
"fi": "Etiopian birr",
|
||||
"fr": "Birr",
|
||||
"he": "ביר אתיופי",
|
||||
"hr": "Etiopski bir",
|
||||
"hu": "etióp birr",
|
||||
"it": "birr etiope",
|
||||
|
@ -10428,8 +10392,7 @@
|
|||
"sv": "Etiopisk Birr",
|
||||
"tr": "Birr",
|
||||
"uk": "Ефіопський бир",
|
||||
"zh": "衣索比亞比爾",
|
||||
"he": "ביר אתיופי"
|
||||
"zh": "衣索比亞比爾"
|
||||
},
|
||||
"EUR": {
|
||||
"ar": "يورو",
|
||||
|
@ -10968,7 +10931,7 @@
|
|||
"da": "Gourde",
|
||||
"de": "Gourde",
|
||||
"el": "Γκουρντ",
|
||||
"en": "gourde",
|
||||
"en": "Gourde",
|
||||
"eo": "haitia gurdo",
|
||||
"es": "gourde",
|
||||
"eu": "Gourde",
|
||||
|
@ -12608,6 +12571,7 @@
|
|||
},
|
||||
"NUD": {
|
||||
"en": "Niue dollar",
|
||||
"es": "Dólar de Niue",
|
||||
"fa": "دلار نیوئه",
|
||||
"fi": "Niuen dollari",
|
||||
"ru": "доллар Ниуэ",
|
||||
|
@ -12645,7 +12609,7 @@
|
|||
"sr": "новозеландски долар",
|
||||
"sv": "Nyzeeländsk dollar",
|
||||
"tr": "Yeni Zelanda doları",
|
||||
"uk": "Новозеландський долар",
|
||||
"uk": "новозеландський долар",
|
||||
"vi": "Đô la New Zealand",
|
||||
"zh": "紐西蘭元",
|
||||
"oc": "Dolar neozelandés"
|
||||
|
@ -13230,6 +13194,7 @@
|
|||
"uk": "Суданський фунт",
|
||||
"zh": "蘇丹鎊",
|
||||
"bg": "Суданска лира",
|
||||
"cy": "punt Swdan",
|
||||
"he": "לירה סודאנית"
|
||||
},
|
||||
"SEK": {
|
||||
|
@ -13826,7 +13791,7 @@
|
|||
"fi": "Uusi Taiwanin dollari",
|
||||
"fr": "nouveau dollar de Taïwan",
|
||||
"hr": "Novotajvanski dolar",
|
||||
"hu": "Tajvani új dollár",
|
||||
"hu": "tajvani új dollár",
|
||||
"it": "Dollaro taiwanese",
|
||||
"ja": "新台湾ドル",
|
||||
"lt": "Naujasis Taivano doleris",
|
||||
|
@ -14309,7 +14274,7 @@
|
|||
"sv": "Bitcoin",
|
||||
"ta": "பிட்காயின்",
|
||||
"te": "బిట్ కాయిన్",
|
||||
"tr": "Bitkoin",
|
||||
"tr": "Bitcoin",
|
||||
"uk": "біткоїн",
|
||||
"vi": "Bitcoin",
|
||||
"zh": "比特幣"
|
||||
|
|
|
@ -28582,7 +28582,7 @@
|
|||
"sv",
|
||||
"th",
|
||||
"tr",
|
||||
"zh-CHS",
|
||||
"zh-CHT"
|
||||
"zh_chs",
|
||||
"zh_cht"
|
||||
]
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -113,6 +113,7 @@
|
|||
"Q106645257": "MN m",
|
||||
"Q106645261": "kN m",
|
||||
"Q106645290": "dN m",
|
||||
"Q106647058": "u",
|
||||
"Q1067722": "Fg",
|
||||
"Q106777906": "μS/m",
|
||||
"Q106777917": "S/cm",
|
||||
|
@ -173,8 +174,37 @@
|
|||
"Q107361180": "μW/m²",
|
||||
"Q107361187": "pW/m²",
|
||||
"Q107378499": "kN/m²",
|
||||
"Q107410680": "cN m",
|
||||
"Q107410689": "N cm",
|
||||
"Q107410785": "g/mm²",
|
||||
"Q107410801": "g/(cm s)",
|
||||
"Q107410895": "kJ/hg",
|
||||
"Q107440604": "W/cm²",
|
||||
"Q107440662": "mmol/kg",
|
||||
"Q107440685": "mmol/g",
|
||||
"Q107440698": "kmol/kg",
|
||||
"Q107440839": "g/g",
|
||||
"Q107440910": "g/hg",
|
||||
"Q107441004": "mg/hg",
|
||||
"Q107460729": "mm³/mm³",
|
||||
"Q107460790": "kg/GJ",
|
||||
"Q107460866": "g/m",
|
||||
"Q107460882": "mg/km",
|
||||
"Q107461064": "MJ/m²",
|
||||
"Q107461092": "g/(m² s)",
|
||||
"Q107461119": "μg/(m² s)",
|
||||
"Q107461139": "μg/J",
|
||||
"Q107461146": "g/MJ",
|
||||
"Q107538710": "μH/m",
|
||||
"Q107538724": "nH/m",
|
||||
"Q107970291": "mol/dm³",
|
||||
"Q1084321": "Tb/s",
|
||||
"Q1086691": "fg",
|
||||
"Q108730765": "kW a",
|
||||
"Q108888186": "eV/c²",
|
||||
"Q108888198": "keV/c²",
|
||||
"Q108888206": "MeV/c²",
|
||||
"Q108888224": "GeV/c²",
|
||||
"Q1091257": "tex",
|
||||
"Q1092296": "a",
|
||||
"Q1104069": "$",
|
||||
|
@ -218,6 +248,7 @@
|
|||
"Q130964": "cal",
|
||||
"Q131255": "F",
|
||||
"Q13147228": "g/cm³",
|
||||
"Q131723": "₿",
|
||||
"Q1322380": "Ts",
|
||||
"Q1323615": "oz t",
|
||||
"Q132643": "kr",
|
||||
|
@ -256,7 +287,6 @@
|
|||
"Q15551713": "Sh",
|
||||
"Q1569733": "St",
|
||||
"Q15784325": "apc",
|
||||
"Q160680": "Br",
|
||||
"Q160857": "hp",
|
||||
"Q162525": "°E",
|
||||
"Q1628990": "hph",
|
||||
|
@ -303,6 +333,7 @@
|
|||
"Q1872619": "zs",
|
||||
"Q189097": "₧",
|
||||
"Q190095": "Gy",
|
||||
"Q19017495": "mm²",
|
||||
"Q190951": "S$",
|
||||
"Q191118": "t",
|
||||
"Q1913097": "fg",
|
||||
|
@ -635,8 +666,6 @@
|
|||
"Q53449045": "EW",
|
||||
"Q53449049": "ZW",
|
||||
"Q53449054": "YW",
|
||||
"Q53561461": "wf",
|
||||
"Q53561822": "wf",
|
||||
"Q53651160": "zm³",
|
||||
"Q53651201": "Ym³",
|
||||
"Q53651356": "ym³",
|
||||
|
@ -751,7 +780,6 @@
|
|||
"Q70444514": "Ymol",
|
||||
"Q70444609": "Pmol",
|
||||
"Q712226": "km²",
|
||||
"Q717310": "Mg",
|
||||
"Q72081071": "MeV",
|
||||
"Q723733": "ms",
|
||||
"Q730251": "ft·lbf",
|
||||
|
@ -1148,7 +1176,6 @@
|
|||
"Q11570": "kg",
|
||||
"Q11573": "m",
|
||||
"Q11574": "s",
|
||||
"Q11579": "K",
|
||||
"Q11582": "L",
|
||||
"Q12129": "pc",
|
||||
"Q12438": "N",
|
||||
|
|
|
@ -182,9 +182,9 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
|||
# https://developers.google.com/custom-search/docs/xml_results#lrsp
|
||||
# Language Collection Values:
|
||||
# https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections
|
||||
ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
|
||||
ret_val['params']['lr'] = "lang_" + lang_country if lang_country in lang_list else language
|
||||
|
||||
ret_val['params']['hl'] = lang_list.get(lang_country, language)
|
||||
ret_val['params']['hl'] = lang_country if lang_country in lang_list else language
|
||||
|
||||
# hl parameter:
|
||||
# https://developers.google.com/custom-search/docs/xml_results#hlsp The
|
||||
|
|
|
@ -3,9 +3,18 @@
|
|||
Yahoo (Web)
|
||||
"""
|
||||
|
||||
from urllib.parse import unquote, urlencode
|
||||
from urllib.parse import (
|
||||
unquote,
|
||||
urlencode,
|
||||
)
|
||||
from lxml import html
|
||||
from searx.utils import extract_text, extract_url, match_language, eval_xpath
|
||||
|
||||
from searx.utils import (
|
||||
eval_xpath_getindex,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
match_language,
|
||||
)
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
@ -21,29 +30,75 @@ about = {
|
|||
categories = ['general']
|
||||
paging = True
|
||||
time_range_support = True
|
||||
supported_languages_url = 'https://search.yahoo.com/preferences/languages'
|
||||
"""Supported languages are read from Yahoo preference page."""
|
||||
|
||||
# search-url
|
||||
base_url = 'https://search.yahoo.com/'
|
||||
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
|
||||
search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
|
||||
time_range_dict = {
|
||||
'day': ('1d', 'd'),
|
||||
'week': ('1w', 'w'),
|
||||
'month': ('1m', 'm'),
|
||||
}
|
||||
|
||||
supported_languages_url = 'https://search.yahoo.com/web/advanced'
|
||||
language_aliases = {
|
||||
'zh-HK': 'zh_chs',
|
||||
'zh-CN': 'zh_chs', # dead since 2015 / routed to hk.search.yahoo.com
|
||||
'zh-TW': 'zh_cht',
|
||||
}
|
||||
|
||||
# specific xpath variables
|
||||
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
|
||||
url_xpath = './/h3/a/@href'
|
||||
title_xpath = './/h3/a'
|
||||
content_xpath = './/div[contains(@class, "compText")]'
|
||||
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
|
||||
lang2domain = {
|
||||
'zh_chs': 'hk.search.yahoo.com',
|
||||
'zh_cht': 'tw.search.yahoo.com',
|
||||
'en': 'search.yahoo.com',
|
||||
|
||||
time_range_dict = {'day': ['1d', 'd'],
|
||||
'week': ['1w', 'w'],
|
||||
'month': ['1m', 'm']}
|
||||
'bg': 'search.yahoo.com',
|
||||
'cs': 'search.yahoo.com',
|
||||
'da': 'search.yahoo.com',
|
||||
'el': 'search.yahoo.com',
|
||||
'et': 'search.yahoo.com',
|
||||
'he': 'search.yahoo.com',
|
||||
'hr': 'search.yahoo.com',
|
||||
'ja': 'search.yahoo.com',
|
||||
'ko': 'search.yahoo.com',
|
||||
'sk': 'search.yahoo.com',
|
||||
'sl': 'search.yahoo.com',
|
||||
|
||||
language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'}
|
||||
}
|
||||
|
||||
|
||||
def _get_language(params):
|
||||
|
||||
lang = language_aliases.get(params['language'])
|
||||
if lang is None:
|
||||
lang = match_language(
|
||||
params['language'], supported_languages, language_aliases
|
||||
)
|
||||
lang = lang.split('-')[0]
|
||||
return lang
|
||||
|
||||
|
||||
def request(query, params):
|
||||
"""build request"""
|
||||
offset = (params['pageno'] - 1) * 7 + 1
|
||||
lang = _get_language(params)
|
||||
age, btf = time_range_dict.get(params['time_range'], ('', ''))
|
||||
|
||||
args = urlencode({
|
||||
'p': query,
|
||||
'ei': 'UTF-8',
|
||||
'fl': 1,
|
||||
'vl': 'lang_' + lang,
|
||||
'btf': btf,
|
||||
'fr2': 'time',
|
||||
'age': age,
|
||||
'b': offset,
|
||||
'xargs': 0,
|
||||
})
|
||||
|
||||
domain = lang2domain.get(lang, '%s.search.yahoo.com' % lang)
|
||||
params['url'] = 'https://%s/search?%s' % (domain, args)
|
||||
return params
|
||||
|
||||
|
||||
# remove yahoo-specific tracking-url
|
||||
def parse_url(url_string):
|
||||
endings = ['/RS', '/RK']
|
||||
endpositions = []
|
||||
|
@ -61,86 +116,37 @@ def parse_url(url_string):
|
|||
return unquote(url_string[start:end])
|
||||
|
||||
|
||||
def _get_url(query, offset, language, time_range):
|
||||
if time_range in time_range_dict:
|
||||
return base_url + search_url_with_time.format(offset=offset,
|
||||
query=urlencode({'p': query}),
|
||||
lang=language,
|
||||
age=time_range_dict[time_range][0],
|
||||
btf=time_range_dict[time_range][1])
|
||||
return base_url + search_url.format(offset=offset,
|
||||
query=urlencode({'p': query}),
|
||||
lang=language)
|
||||
|
||||
|
||||
def _get_language(params):
|
||||
if params['language'] == 'all':
|
||||
return 'en'
|
||||
|
||||
language = match_language(params['language'], supported_languages, language_aliases)
|
||||
if language not in language_aliases.values():
|
||||
language = language.split('-')[0]
|
||||
language = language.replace('-', '_').lower()
|
||||
|
||||
return language
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
if params['time_range'] and params['time_range'] not in time_range_dict:
|
||||
return params
|
||||
|
||||
offset = (params['pageno'] - 1) * 10 + 1
|
||||
language = _get_language(params)
|
||||
|
||||
params['url'] = _get_url(query, offset, language, params['time_range'])
|
||||
|
||||
# TODO required?
|
||||
params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
|
||||
.format(lang=language)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
try:
|
||||
results_num = int(eval_xpath(dom, '//div[@class="compPagination"]/span[last()]/text()')[0]
|
||||
.split()[0].replace(',', ''))
|
||||
results.append({'number_of_results': results_num})
|
||||
except:
|
||||
pass
|
||||
|
||||
# parse results
|
||||
for result in eval_xpath(dom, results_xpath):
|
||||
try:
|
||||
url = parse_url(extract_url(eval_xpath(result, url_xpath), search_url))
|
||||
title = extract_text(eval_xpath(result, title_xpath)[0])
|
||||
except:
|
||||
for result in eval_xpath_list(dom, '//div[contains(@class,"algo-sr")]'):
|
||||
url = eval_xpath_getindex(result, './/h3/a/@href', 0, default=None)
|
||||
if url is None:
|
||||
continue
|
||||
url = parse_url(url)
|
||||
|
||||
content = extract_text(eval_xpath(result, content_xpath)[0])
|
||||
title = eval_xpath_getindex(result, './/h3/a', 0, default=None)
|
||||
if title is None:
|
||||
continue
|
||||
offset = len(extract_text(title.xpath('span')))
|
||||
title = extract_text(title)[offset:]
|
||||
|
||||
content = eval_xpath_getindex(
|
||||
result, './/div[contains(@class, "compText")]', 0, default=''
|
||||
)
|
||||
if content:
|
||||
content = extract_text(content)
|
||||
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'content': content})
|
||||
|
||||
# if no suggestion found, return results
|
||||
suggestions = eval_xpath(dom, suggestion_xpath)
|
||||
if not suggestions:
|
||||
return results
|
||||
|
||||
# parse suggestion
|
||||
for suggestion in suggestions:
|
||||
for suggestion in eval_xpath_list(dom, '//div[contains(@class, "AlsoTry")]'):
|
||||
# append suggestion
|
||||
results.append({'suggestion': extract_text(suggestion)})
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
||||
|
||||
|
@ -148,13 +154,9 @@ def response(resp):
|
|||
def _fetch_supported_languages(resp):
|
||||
supported_languages = []
|
||||
dom = html.fromstring(resp.text)
|
||||
options = eval_xpath(dom, '//div[@id="yschlang"]/span/label/input')
|
||||
for option in options:
|
||||
code_parts = eval_xpath(option, './@value')[0][5:].split('_')
|
||||
if len(code_parts) == 2:
|
||||
code = code_parts[0] + '-' + code_parts[1].upper()
|
||||
else:
|
||||
code = code_parts[0]
|
||||
supported_languages.append(code)
|
||||
offset = len('lang_')
|
||||
|
||||
for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'):
|
||||
supported_languages.append(val[offset:])
|
||||
|
||||
return supported_languages
|
||||
|
|
|
@ -6,6 +6,7 @@ from urllib.parse import urlparse, unquote
|
|||
from searx import logger
|
||||
from searx.engines import engines
|
||||
from searx.metrology.error_recorder import record_error
|
||||
from searx import settings
|
||||
|
||||
|
||||
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
|
||||
|
@ -129,13 +130,18 @@ def merge_two_infoboxes(infobox1, infobox2):
|
|||
infobox1['content'] = content2
|
||||
|
||||
|
||||
def result_score(result):
|
||||
def result_score(result, language):
|
||||
weight = 1.0
|
||||
|
||||
for result_engine in result['engines']:
|
||||
if hasattr(engines[result_engine], 'weight'):
|
||||
weight *= float(engines[result_engine].weight)
|
||||
|
||||
if settings['search'].get('prefer_configured_language', False):
|
||||
domain_parts = result['parsed_url'].netloc.split('.')
|
||||
if language in domain_parts:
|
||||
weight *= 1.1
|
||||
|
||||
occurences = len(result['positions'])
|
||||
|
||||
return sum((occurences * weight) / position for position in result['positions'])
|
||||
|
@ -145,9 +151,10 @@ class ResultContainer:
|
|||
"""docstring for ResultContainer"""
|
||||
|
||||
__slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\
|
||||
'_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url', 'engine_data'
|
||||
'_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url', 'engine_data',\
|
||||
'_language'
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, language):
|
||||
super().__init__()
|
||||
self._merged_results = []
|
||||
self.infoboxes = []
|
||||
|
@ -161,6 +168,7 @@ class ResultContainer:
|
|||
self.unresponsive_engines = set()
|
||||
self.timings = []
|
||||
self.redirect_url = None
|
||||
self._language = language.lower().split('-')[0]
|
||||
|
||||
def extend(self, engine_name, results):
|
||||
standard_result_count = 0
|
||||
|
@ -299,7 +307,7 @@ class ResultContainer:
|
|||
|
||||
def order_results(self):
|
||||
for result in self._merged_results:
|
||||
score = result_score(result)
|
||||
score = result_score(result, self._language)
|
||||
result['score'] = score
|
||||
with RLock():
|
||||
for result_engine in result['engines']:
|
||||
|
|
|
@ -66,7 +66,7 @@ class Search:
|
|||
# init vars
|
||||
super().__init__()
|
||||
self.search_query = search_query
|
||||
self.result_container = ResultContainer()
|
||||
self.result_container = ResultContainer(search_query.lang)
|
||||
self.start_time = None
|
||||
self.actual_timeout = None
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ search:
|
|||
default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py'
|
||||
ban_time_on_fail : 5 # ban time in seconds after engine errors
|
||||
max_ban_time_on_fail : 120 # max ban time in seconds after engine errors
|
||||
prefer_configured_language: False # increase weight of results in confiugred language in ranking
|
||||
|
||||
server:
|
||||
port : 8888
|
||||
|
|
|
@ -40,8 +40,8 @@ def get_user_settings_path():
|
|||
# enviroment variable SEARX_SETTINGS_PATH
|
||||
return check_settings_yml(environ['SEARX_SETTINGS_PATH'])
|
||||
|
||||
# if not, get it from searx code base or last solution from /etc/searx
|
||||
return check_settings_yml('settings.yml') or check_settings_yml('/etc/searx/settings.yml')
|
||||
# if not, get it from /etc/searx, or last resort the codebase
|
||||
return check_settings_yml('/etc/searx/settings.yml') or check_settings_yml('settings.yml')
|
||||
|
||||
|
||||
def update_dict(default_dict, user_dict):
|
||||
|
|
|
@ -20,22 +20,22 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff',
|
|||
class ResultContainerTestCase(SearxTestCase):
|
||||
|
||||
def test_empty(self):
|
||||
c = ResultContainer()
|
||||
c = ResultContainer("en-US")
|
||||
self.assertEqual(c.get_ordered_results(), [])
|
||||
|
||||
def test_one_result(self):
|
||||
c = ResultContainer()
|
||||
c = ResultContainer("en-US")
|
||||
c.extend('wikipedia', [fake_result()])
|
||||
self.assertEqual(c.results_length(), 1)
|
||||
|
||||
def test_one_suggestion(self):
|
||||
c = ResultContainer()
|
||||
c = ResultContainer("en-US")
|
||||
c.extend('wikipedia', [fake_result(suggestion=True)])
|
||||
self.assertEqual(len(c.suggestions), 1)
|
||||
self.assertEqual(c.results_length(), 0)
|
||||
|
||||
def test_result_merge(self):
|
||||
c = ResultContainer()
|
||||
c = ResultContainer("en-US")
|
||||
c.extend('wikipedia', [fake_result()])
|
||||
c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
|
||||
self.assertEqual(c.results_length(), 2)
|
||||
|
|
Loading…
Reference in New Issue