From f62ce21f50b540315a708ebfbf36878ddec9d1c4 Mon Sep 17 00:00:00 2001 From: marc Date: Sat, 5 Nov 2016 20:51:38 -0600 Subject: [PATCH] [mod] fetch supported languages for several engines utils/fetch_languages.py gets languages supported by each engine and generates engines_languages.json with each engine's supported language. --- searx/data/engines_languages.json | 3256 +++++++++++++++++++++ searx/engines/__init__.py | 6 + searx/engines/bing.py | 15 + searx/engines/bing_images.py | 2 +- searx/engines/bing_news.py | 2 +- searx/engines/dailymotion.py | 41 +- searx/engines/duckduckgo.py | 27 +- searx/engines/duckduckgo_definitions.py | 2 +- searx/engines/gigablast.py | 22 +- searx/engines/google.py | 30 +- searx/engines/google_news.py | 2 +- searx/engines/mediawiki.py | 1 - searx/engines/qwant.py | 15 +- searx/engines/startpage.py | 5 - searx/engines/subtitleseeker.py | 5 +- searx/engines/swisscows.py | 21 +- searx/engines/wikidata.py | 6 +- searx/engines/wikipedia.py | 53 +- searx/engines/yahoo.py | 20 +- searx/engines/yahoo_news.py | 2 +- searx/languages.py | 123 +- searx/webapp.py | 2 +- tests/unit/engines/test_subtitleseeker.py | 2 +- tests/unit/engines/test_wikipedia.py | 2 + utils/fetch_languages.py | 164 ++ utils/update_languages.py | 169 -- 26 files changed, 3633 insertions(+), 362 deletions(-) create mode 100644 searx/data/engines_languages.json create mode 100644 utils/fetch_languages.py delete mode 100644 utils/update_languages.py diff --git a/searx/data/engines_languages.json b/searx/data/engines_languages.json new file mode 100644 index 00000000..f94eb3aa --- /dev/null +++ b/searx/data/engines_languages.json @@ -0,0 +1,3256 @@ +{ + "google news": { + "gu": { + "name": "ગુજરાતી" + }, + "mfe": { + "name": "Kreol Morisien" + }, + "gd": { + "name": "Gàidhlig" + }, + "ga": { + "name": "Gaeilge" + }, + "gn": { + "name": "Guarani" + }, + "gl": { + "name": "Galego" + }, + "lg": { + "name": "Luganda" + }, + "la": { + "name": "Latin" + }, + "ln": { + "name": "Lingála" + }, + "tw": { + "name": "Twi" + }, + "tt": { + "name": "Tatar" + }, + "tr": { + "name": "Türkçe" + }, + "lv": { + "name": "Latviešu" + }, + "to": { + "name": "Lea Fakatonga" + }, + "lt": { + "name": "Lietuvių" + }, + "tk": { + "name": "Turkmen" + }, + "th": { + "name": "ไทย" + }, + "pcm": { + "name": "Nigerian Pidgin" + }, + "tg": { + "name": "Tajik" + }, + "te": { + "name": "తెలుగు" + }, + "haw": { + "name": "ʻŌlelo HawaiʻI" + }, + "yi": { + "name": "ייִדיש" + }, + "ceb": { + "name": "Cebuano" + }, + "yo": { + "name": "Èdè Yorùbá" + }, + "de": { + "name": "Deutsch" + }, + "ko": { + "name": "한국어" + }, + "da": { + "name": "Dansk" + }, + "crs": { + "name": "Seychellois Creole" + }, + "qu": { + "name": "Runasimi" + }, + "bem": { + "name": "Ichibemba" + }, + "xx": { + "name": "Pirate" + }, + "ban": { + "name": "Balinese" + }, + "el": { + "name": "Ελληνικά" + }, + "eo": { + "name": "Esperanto" + }, + "en": { + "name": "English" + }, + "zh": { + "name": "中文 (繁體)" + }, + "ee": { + "name": "Eʋegbe" + }, + "eu": { + "name": "Euskara" + }, + "et": { + "name": "Eesti" + }, + "es": { + "name": "Español (Latinoamérica)" + }, + "ru": { + "name": "Русский" + }, + "rw": { + "name": "Kinyarwanda" + }, + "lua": { + "name": "Luba-Lulua" + }, + "rm": { + "name": "Rumantsch" + }, + "rn": { + "name": "Ikirundi" + }, + "ro": { + "name": "Română" + }, + "be": { + "name": "Беларуская" + }, + "bg": { + "name": "Български" + }, + "uk": { + "name": "Українська" + }, + "ps": { + "name": "پښتو" + }, + "wo": { + "name": "Wolof" + }, + "bn": { + "name": "বাংলা" + }, + "jw": { + "name": "Javanese" + }, + "tum": { + "name": "Tumbuka" + }, + "br": { + "name": "Brezhoneg" + }, + "bs": { + "name": "Bosanski" + }, + "ja": { + "name": "日本語" + }, + "om": { + "name": "Oromoo" + }, + "ach": { + "name": "Acoli" + }, + "oc": { + "name": "Occitan" + }, + "kri": { + "name": "Krio (Sierra Leone)" + }, + "lo": { + "name": "ລາວ" + }, + "tlh": { + "name": "Klingon" + }, + "or": { + "name": "ଓଡ଼ିଆ" + }, + "xh": { + "name": "Xhosa" + }, + "co": { + "name": "Corsican" + }, + "nso": { + "name": "Northern Sotho" + }, + "ca": { + "name": "Català" + }, + "cy": { + "name": "Cymraeg" + }, + "cs": { + "name": "Čeština" + }, + "tn": { + "name": "Tswana" + }, + "pt": { + "name": "Português (Portugal)" + }, + "tl": { + "name": "Filipino" + }, + "chr": { + "name": "ᏣᎳᎩ" + }, + "pa": { + "name": "ਪੰਜਾਬੀ" + }, + "loz": { + "name": "Lozi" + }, + "is": { + "name": "Íslenska" + }, + "pl": { + "name": "Polski" + }, + "hy": { + "name": "Հայերեն" + }, + "hr": { + "name": "Hrvatski" + }, + "ti": { + "name": "ትግርኛ" + }, + "ht": { + "name": "Haitian Creole" + }, + "hu": { + "name": "Magyar" + }, + "hi": { + "name": "हिन्दी" + }, + "ha": { + "name": "Hausa" + }, + "gaa": { + "name": "Ga" + }, + "mg": { + "name": "Malagasy" + }, + "uz": { + "name": "O‘Zbek" + }, + "ml": { + "name": "മലയാളം" + }, + "mn": { + "name": "Монгол" + }, + "mi": { + "name": "Maori" + }, + "mk": { + "name": "Македонски" + }, + "ur": { + "name": "اردو" + }, + "mt": { + "name": "Malti" + }, + "ms": { + "name": "Bahasa Melayu" + }, + "mr": { + "name": "मराठी" + }, + "ug": { + "name": "ئۇيغۇرچە" + }, + "ta": { + "name": "தமிழ்" + }, + "my": { + "name": "ဗမာ" + }, + "af": { + "name": "Afrikaans" + }, + "vi": { + "name": "Tiếng Việt" + }, + "ak": { + "name": "Akan" + }, + "am": { + "name": "አማርኛ" + }, + "it": { + "name": "Italiano" + }, + "iw": { + "name": "עברית" + }, + "kn": { + "name": "ಕನ್ನಡ" + }, + "ar": { + "name": "العربية" + }, + "km": { + "name": "ខ្មែរ" + }, + "zu": { + "name": "Isizulu" + }, + "ia": { + "name": "Interlingua" + }, + "az": { + "name": "Azərbaycan Dili" + }, + "id": { + "name": "Indonesia" + }, + "ig": { + "name": "Igbo" + }, + "nl": { + "name": "Nederlands" + }, + "nn": { + "name": "Nynorsk" + }, + "no": { + "name": "Norsk" + }, + "ne": { + "name": "नेपाली" + }, + "ny": { + "name": "Nyanja" + }, + "nyn": { + "name": "Runyankore" + }, + "fr": { + "name": "Français" + }, + "fy": { + "name": "West-Frysk" + }, + "fa": { + "name": "فارسی" + }, + "fi": { + "name": "Suomi" + }, + "fo": { + "name": "Føroyskt" + }, + "ka": { + "name": "ქართული" + }, + "kg": { + "name": "Kongo" + }, + "ckb": { + "name": "Central Kurdish" + }, + "kk": { + "name": "Қазақ Тілі" + }, + "sr": { + "name": "Српски" + }, + "sq": { + "name": "Shqip" + }, + "sw": { + "name": "Kiswahili" + }, + "sv": { + "name": "Svenska" + }, + "su": { + "name": "Sundanese" + }, + "st": { + "name": "Southern Sotho" + }, + "sk": { + "name": "Slovenčina" + }, + "si": { + "name": "සිංහල" + }, + "so": { + "name": "Soomaali" + }, + "sn": { + "name": "Chishona" + }, + "sl": { + "name": "Slovenščina" + }, + "ky": { + "name": "Кыргызча" + }, + "sd": { + "name": "Sindhi" + } + }, + "dailymotion": { + "gv": { + "english_name": "Manx" + }, + "gu": { + "name": "ગુજરાતી", + "english_name": "Gujarati" + }, + "gd": { + "english_name": "Gaelic, Scottish" + }, + "ga": { + "name": "Gaeilge", + "english_name": "Irish" + }, + "gn": { + "english_name": "Guarani" + }, + "gl": { + "name": "Galego", + "english_name": "Galician" + }, + "lg": { + "english_name": "Ganda" + }, + "lb": { + "english_name": "Luxembourgish" + }, + "la": { + "english_name": "Latin" + }, + "ln": { + "english_name": "Lingala" + }, + "lo": { + "english_name": "Lao" + }, + "tt": { + "name": "Татарча", + "english_name": "Tatar" + }, + "tr": { + "name": "Türkçe", + "english_name": "Turkish" + }, + "ts": { + "english_name": "Tsonga" + }, + "li": { + "english_name": "Limburgan" + }, + "lv": { + "name": "Latviešu", + "english_name": "Latvian" + }, + "to": { + "english_name": "Tonga (Tonga Islands)" + }, + "lt": { + "name": "Lietuvių", + "english_name": "Lithuanian" + }, + "lu": { + "english_name": "Luba-Katanga" + }, + "tk": { + "english_name": "Turkmen" + }, + "th": { + "name": "ไทย", + "english_name": "Thai" + }, + "ti": { + "name": "ትግርኛ", + "english_name": "Tigrinya" + }, + "tg": { + "english_name": "Tajik" + }, + "te": { + "english_name": "Telugu" + }, + "ta": { + "name": "தமிழ்", + "english_name": "Tamil" + }, + "yi": { + "english_name": "Yiddish" + }, + "yo": { + "english_name": "Yoruba" + }, + "de": { + "name": "Deutsch", + "english_name": "German" + }, + "da": { + "name": "Dansk", + "english_name": "Danish" + }, + "dz": { + "english_name": "Dzongkha" + }, + "st": { + "english_name": "Sotho, Southern" + }, + "dv": { + "english_name": "Dhivehi" + }, + "qu": { + "english_name": "Quechua" + }, + "el": { + "name": "Ελληνικά", + "english_name": "Greek, Modern (1453-)" + }, + "eo": { + "name": "Esperanto", + "english_name": "Esperanto" + }, + "en": { + "english_name": "English" + }, + "zh": { + "name": "中文", + "english_name": "Chinese" + }, + "ee": { + "english_name": "Ewe" + }, + "za": { + "english_name": "Zhuang" + }, + "mh": { + "english_name": "Marshallese" + }, + "uk": { + "name": "українська", + "english_name": "Ukrainian" + }, + "eu": { + "name": "Euskara", + "english_name": "Basque" + }, + "et": { + "name": "Eesti", + "english_name": "Estonian" + }, + "es": { + "name": "Español", + "english_name": "Spanish" + }, + "ru": { + "name": "русский", + "english_name": "Russian" + }, + "rw": { + "name": "Ikinyarwanda", + "english_name": "Kinyarwanda" + }, + "rm": { + "english_name": "Romansh" + }, + "rn": { + "english_name": "Rundi" + }, + "ro": { + "name": "Română", + "english_name": "Romanian" + }, + "bn": { + "name": "বাংলা", + "english_name": "Bengali" + }, + "be": { + "english_name": "Belarusian" + }, + "bg": { + "name": "Български", + "english_name": "Bulgarian" + }, + "ba": { + "english_name": "Bashkir" + }, + "wa": { + "name": "Walon", + "english_name": "Walloon" + }, + "wo": { + "english_name": "Wolof" + }, + "bm": { + "english_name": "Bambara" + }, + "jv": { + "english_name": "Javanese" + }, + "bo": { + "english_name": "Tibetan" + }, + "bi": { + "english_name": "Bislama" + }, + "br": { + "name": "Brezhoneg", + "english_name": "Breton" + }, + "bs": { + "name": "Bosnian", + "english_name": "Bosnian" + }, + "ja": { + "name": "日本語", + "english_name": "Japanese" + }, + "om": { + "english_name": "Oromo" + }, + "oj": { + "english_name": "Ojibwa" + }, + "ty": { + "english_name": "Tahitian" + }, + "oc": { + "name": "Occitan", + "english_name": "Occitan" + }, + "tw": { + "english_name": "Twi" + }, + "os": { + "english_name": "Ossetian" + }, + "or": { + "name": "Oriya", + "english_name": "Oriya" + }, + "xh": { + "name": "Xhosa", + "english_name": "Xhosa" + }, + "ch": { + "english_name": "Chamorro" + }, + "co": { + "english_name": "Corsican" + }, + "ca": { + "name": "Català", + "english_name": "Catalan" + }, + "ce": { + "english_name": "Chechen" + }, + "cy": { + "name": "Cymraeg", + "english_name": "Welsh" + }, + "cs": { + "name": "čeština", + "english_name": "Czech" + }, + "cr": { + "english_name": "Cree" + }, + "cv": { + "english_name": "Chuvash" + }, + "cu": { + "english_name": "Slavic, Church" + }, + "ve": { + "name": "Venda", + "english_name": "Venda" + }, + "ps": { + "name": "Pushto", + "english_name": "Pushto" + }, + "pt": { + "name": "Português", + "english_name": "Portuguese" + }, + "tl": { + "english_name": "Tagalog" + }, + "pa": { + "name": "ਪੰਜਾਬੀ", + "english_name": "Panjabi" + }, + "vi": { + "name": "Tiếng Việt", + "english_name": "Vietnamese" + }, + "pi": { + "english_name": "Pali" + }, + "is": { + "name": "Íslenska", + "english_name": "Icelandic" + }, + "pl": { + "name": "polski", + "english_name": "Polish" + }, + "hz": { + "english_name": "Herero" + }, + "hy": { + "english_name": "Armenian" + }, + "hr": { + "name": "hrvatski", + "english_name": "Croatian" + }, + "iu": { + "english_name": "Inuktitut" + }, + "ht": { + "english_name": "Haitian" + }, + "hu": { + "name": "magyar", + "english_name": "Hungarian" + }, + "hi": { + "name": "हिंदी", + "english_name": "Hindi" + }, + "ho": { + "english_name": "Hiri Motu" + }, + "ha": { + "english_name": "Hausa" + }, + "he": { + "name": "עברית", + "english_name": "Hebrew" + }, + "mg": { + "english_name": "Malagasy" + }, + "uz": { + "english_name": "Uzbek" + }, + "ml": { + "english_name": "Malayalam" + }, + "mn": { + "name": "Монгол", + "english_name": "Mongolian" + }, + "mi": { + "name": "Reo Māori", + "english_name": "Maori" + }, + "ik": { + "english_name": "Inupiaq" + }, + "mk": { + "name": "Македонски", + "english_name": "Macedonian" + }, + "ur": { + "english_name": "Urdu" + }, + "mt": { + "name": "Malti", + "english_name": "Maltese" + }, + "ms": { + "name": "Malay", + "english_name": "Malay" + }, + "mr": { + "name": "मराठी", + "english_name": "Marathi" + }, + "ug": { + "english_name": "Uighur" + }, + "my": { + "english_name": "Burmese" + }, + "sq": { + "english_name": "Albanian" + }, + "ae": { + "english_name": "Avestan" + }, + "ss": { + "english_name": "Swati" + }, + "af": { + "name": "Afrikaans", + "english_name": "Afrikaans" + }, + "tn": { + "english_name": "Tswana" + }, + "sw": { + "english_name": "Swahili (macrolanguage)" + }, + "ak": { + "english_name": "Akan" + }, + "am": { + "name": "አማርኛ", + "english_name": "Amharic" + }, + "it": { + "name": "Italiano", + "english_name": "Italian" + }, + "an": { + "english_name": "Aragonese" + }, + "ii": { + "english_name": "Yi, Sichuan" + }, + "ia": { + "english_name": "Interlingua" + }, + "as": { + "english_name": "Assamese" + }, + "ar": { + "name": "العربية", + "english_name": "Arabic" + }, + "su": { + "english_name": "Sundanese" + }, + "io": { + "english_name": "Ido" + }, + "av": { + "english_name": "Avaric" + }, + "ay": { + "english_name": "Aymara" + }, + "az": { + "name": "Azerbaijani", + "english_name": "Azerbaijani" + }, + "ie": { + "english_name": "Interlingue" + }, + "id": { + "name": "Indonesian", + "english_name": "Indonesian" + }, + "ig": { + "english_name": "Igbo" + }, + "sk": { + "name": "Slovenský", + "english_name": "Slovak" + }, + "sr": { + "name": "српски", + "english_name": "Serbian" + }, + "nl": { + "name": "Nederlands", + "english_name": "Dutch" + }, + "nn": { + "name": "Norwegian Nynorsk", + "english_name": "Norwegian Nynorsk" + }, + "no": { + "english_name": "Norwegian" + }, + "na": { + "english_name": "Nauru" + }, + "nb": { + "name": "Norwegian Bokmål", + "english_name": "Norwegian Bokmål" + }, + "nd": { + "english_name": "Ndebele, North" + }, + "ne": { + "english_name": "Nepali (macrolanguage)" + }, + "ng": { + "english_name": "Ndonga" + }, + "ny": { + "english_name": "Nyanja" + }, + "vo": { + "english_name": "Volapük" + }, + "zu": { + "name": "Isi-Zulu", + "english_name": "Zulu" + }, + "so": { + "english_name": "Somali" + }, + "nr": { + "english_name": "Ndebele, South" + }, + "nv": { + "english_name": "Navajo" + }, + "sn": { + "english_name": "Shona" + }, + "fr": { + "name": "français", + "english_name": "French" + }, + "sm": { + "english_name": "Samoan" + }, + "fy": { + "english_name": "Frisian, Western" + }, + "sv": { + "name": "Svenska", + "english_name": "Swedish" + }, + "fa": { + "name": "فارسی", + "english_name": "Persian" + }, + "ff": { + "english_name": "Fulah" + }, + "fi": { + "name": "suomi", + "english_name": "Finnish" + }, + "fj": { + "english_name": "Fijian" + }, + "sa": { + "english_name": "Sanskrit" + }, + "fo": { + "english_name": "Faroese" + }, + "ka": { + "english_name": "Georgian" + }, + "kg": { + "english_name": "Kongo" + }, + "kk": { + "english_name": "Kazakh" + }, + "kj": { + "english_name": "Kuanyama" + }, + "ki": { + "english_name": "Kikuyu" + }, + "ko": { + "name": "한국어", + "english_name": "Korean" + }, + "kn": { + "name": "ಕನ್ನಡ", + "english_name": "Kannada" + }, + "km": { + "english_name": "Khmer, Central" + }, + "kl": { + "english_name": "Kalaallisut" + }, + "ks": { + "english_name": "Kashmiri" + }, + "kr": { + "english_name": "Kanuri" + }, + "si": { + "english_name": "Sinhala" + }, + "sh": { + "name": "Serbo-Croatian", + "english_name": "Serbo-Croatian" + }, + "kw": { + "english_name": "Cornish" + }, + "kv": { + "english_name": "Komi" + }, + "ku": { + "english_name": "Kurdish" + }, + "sl": { + "name": "slovenščina", + "english_name": "Slovenian" + }, + "sc": { + "english_name": "Sardinian" + }, + "ky": { + "english_name": "Kirghiz" + }, + "sg": { + "english_name": "Sango" + }, + "se": { + "english_name": "Sami, Northern" + }, + "sd": { + "english_name": "Sindhi" + } + }, + "google": { + "gu": { + "name": "ગુજરાતી" + }, + "mfe": { + "name": "Kreol Morisien" + }, + "gd": { + "name": "Gàidhlig" + }, + "ga": { + "name": "Gaeilge" + }, + "gn": { + "name": "Guarani" + }, + "gl": { + "name": "Galego" + }, + "lg": { + "name": "Luganda" + }, + "la": { + "name": "Latin" + }, + "ln": { + "name": "Lingála" + }, + "tw": { + "name": "Twi" + }, + "tt": { + "name": "Tatar" + }, + "tr": { + "name": "Türkçe" + }, + "lv": { + "name": "Latviešu" + }, + "to": { + "name": "Lea Fakatonga" + }, + "lt": { + "name": "Lietuvių" + }, + "tk": { + "name": "Turkmen" + }, + "th": { + "name": "ไทย" + }, + "pcm": { + "name": "Nigerian Pidgin" + }, + "tg": { + "name": "Tajik" + }, + "te": { + "name": "తెలుగు" + }, + "haw": { + "name": "ʻŌlelo HawaiʻI" + }, + "yi": { + "name": "ייִדיש" + }, + "ceb": { + "name": "Cebuano" + }, + "yo": { + "name": "Èdè Yorùbá" + }, + "de": { + "name": "Deutsch" + }, + "ko": { + "name": "한국어" + }, + "da": { + "name": "Dansk" + }, + "crs": { + "name": "Seychellois Creole" + }, + "qu": { + "name": "Runasimi" + }, + "bem": { + "name": "Ichibemba" + }, + "xx": { + "name": "Pirate" + }, + "ban": { + "name": "Balinese" + }, + "el": { + "name": "Ελληνικά" + }, + "eo": { + "name": "Esperanto" + }, + "en": { + "name": "English" + }, + "zh": { + "name": "中文 (繁體)" + }, + "ee": { + "name": "Eʋegbe" + }, + "eu": { + "name": "Euskara" + }, + "et": { + "name": "Eesti" + }, + "es": { + "name": "Español (Latinoamérica)" + }, + "ru": { + "name": "Русский" + }, + "rw": { + "name": "Kinyarwanda" + }, + "lua": { + "name": "Luba-Lulua" + }, + "rm": { + "name": "Rumantsch" + }, + "rn": { + "name": "Ikirundi" + }, + "ro": { + "name": "Română" + }, + "be": { + "name": "Беларуская" + }, + "bg": { + "name": "Български" + }, + "uk": { + "name": "Українська" + }, + "ps": { + "name": "پښتو" + }, + "wo": { + "name": "Wolof" + }, + "bn": { + "name": "বাংলা" + }, + "jw": { + "name": "Javanese" + }, + "tum": { + "name": "Tumbuka" + }, + "br": { + "name": "Brezhoneg" + }, + "bs": { + "name": "Bosanski" + }, + "ja": { + "name": "日本語" + }, + "om": { + "name": "Oromoo" + }, + "ach": { + "name": "Acoli" + }, + "oc": { + "name": "Occitan" + }, + "kri": { + "name": "Krio (Sierra Leone)" + }, + "lo": { + "name": "ລາວ" + }, + "tlh": { + "name": "Klingon" + }, + "or": { + "name": "ଓଡ଼ିଆ" + }, + "xh": { + "name": "Xhosa" + }, + "co": { + "name": "Corsican" + }, + "nso": { + "name": "Northern Sotho" + }, + "ca": { + "name": "Català" + }, + "cy": { + "name": "Cymraeg" + }, + "cs": { + "name": "Čeština" + }, + "tn": { + "name": "Tswana" + }, + "pt": { + "name": "Português (Portugal)" + }, + "tl": { + "name": "Filipino" + }, + "chr": { + "name": "ᏣᎳᎩ" + }, + "pa": { + "name": "ਪੰਜਾਬੀ" + }, + "loz": { + "name": "Lozi" + }, + "is": { + "name": "Íslenska" + }, + "pl": { + "name": "Polski" + }, + "hy": { + "name": "Հայերեն" + }, + "hr": { + "name": "Hrvatski" + }, + "ti": { + "name": "ትግርኛ" + }, + "ht": { + "name": "Haitian Creole" + }, + "hu": { + "name": "Magyar" + }, + "hi": { + "name": "हिन्दी" + }, + "ha": { + "name": "Hausa" + }, + "gaa": { + "name": "Ga" + }, + "mg": { + "name": "Malagasy" + }, + "uz": { + "name": "O‘Zbek" + }, + "ml": { + "name": "മലയാളം" + }, + "mn": { + "name": "Монгол" + }, + "mi": { + "name": "Maori" + }, + "mk": { + "name": "Македонски" + }, + "ur": { + "name": "اردو" + }, + "mt": { + "name": "Malti" + }, + "ms": { + "name": "Bahasa Melayu" + }, + "mr": { + "name": "मराठी" + }, + "ug": { + "name": "ئۇيغۇرچە" + }, + "ta": { + "name": "தமிழ்" + }, + "my": { + "name": "ဗမာ" + }, + "af": { + "name": "Afrikaans" + }, + "vi": { + "name": "Tiếng Việt" + }, + "ak": { + "name": "Akan" + }, + "am": { + "name": "አማርኛ" + }, + "it": { + "name": "Italiano" + }, + "iw": { + "name": "עברית" + }, + "kn": { + "name": "ಕನ್ನಡ" + }, + "ar": { + "name": "العربية" + }, + "km": { + "name": "ខ្មែរ" + }, + "zu": { + "name": "Isizulu" + }, + "ia": { + "name": "Interlingua" + }, + "az": { + "name": "Azərbaycan Dili" + }, + "id": { + "name": "Indonesia" + }, + "ig": { + "name": "Igbo" + }, + "nl": { + "name": "Nederlands" + }, + "nn": { + "name": "Nynorsk" + }, + "no": { + "name": "Norsk" + }, + "ne": { + "name": "नेपाली" + }, + "ny": { + "name": "Nyanja" + }, + "nyn": { + "name": "Runyankore" + }, + "fr": { + "name": "Français" + }, + "fy": { + "name": "West-Frysk" + }, + "fa": { + "name": "فارسی" + }, + "fi": { + "name": "Suomi" + }, + "fo": { + "name": "Føroyskt" + }, + "ka": { + "name": "ქართული" + }, + "kg": { + "name": "Kongo" + }, + "ckb": { + "name": "Central Kurdish" + }, + "kk": { + "name": "Қазақ Тілі" + }, + "sr": { + "name": "Српски" + }, + "sq": { + "name": "Shqip" + }, + "sw": { + "name": "Kiswahili" + }, + "sv": { + "name": "Svenska" + }, + "su": { + "name": "Sundanese" + }, + "st": { + "name": "Southern Sotho" + }, + "sk": { + "name": "Slovenčina" + }, + "si": { + "name": "සිංහල" + }, + "so": { + "name": "Soomaali" + }, + "sn": { + "name": "Chishona" + }, + "sl": { + "name": "Slovenščina" + }, + "ky": { + "name": "Кыргызча" + }, + "sd": { + "name": "Sindhi" + } + }, + "duckduckgo": [ + "da-DK", + "vi-VN", + "en-SG", + "sl-SL", + "en-XA", + "tzh-HK", + "en-UK", + "ro-RO", + "en-MY", + "el-GR", + "it-CH", + "hu-HU", + "fr-FR", + "en-PH", + "tl-PH", + "fr-CA", + "fi-FI", + "et-EE", + "sv-SE", + "es-XL", + "th-TH", + "sk-SK", + "es-ES", + "en-IE", + "es-US", + "es-PE", + "nl-NL", + "en-US", + "de-DE", + "de-AT", + "wt-WT", + "no-NO", + "tr-TR", + "ca-ES", + "it-IT", + "es-CO", + "ru-RU", + "ca-CT", + "en-ZA", + "en-CA", + "jp-JP", + "es-MX", + "id-ID", + "es-AR", + "he-IL", + "kr-KR", + "en-AU", + "ms-MY", + "pl-PL", + "lv-LV", + "bg-BG", + "zh-CN", + "en-NZ", + "lt-LT", + "tzh-TW", + "hr-HR", + "pt-PT", + "fr-BE", + "de-CH", + "cs-CZ", + "en-IN", + "nl-BE", + "fr-CH", + "en-ID", + "ar-XA", + "pt-BR", + "uk-UA", + "es-CL" + ], + "bing": [ + "sq", + "de", + "ar", + "bg", + "ca", + "cs", + "zh-CHS", + "zh-CHT", + "ko", + "hr", + "da", + "sk", + "sl", + "es", + "et", + "fi", + "fr", + "el", + "he", + "nl", + "hu", + "id", + "en", + "is", + "it", + "ja", + "lv", + "lt", + "ms", + "nb", + "fa", + "pl", + "pt-BR", + "pt-PT", + "ro", + "ru", + "sr", + "sv", + "th", + "tr", + "uk", + "vi" + ], + "wikipedia": { + "sco": { + "articles": 41754, + "name": "Scots", + "english_name": "Scots" + }, + "scn": { + "articles": 25373, + "name": "Sicilianu", + "english_name": "Sicilian" + }, + "gu": { + "articles": 26690, + "name": "ગુજરાતી", + "english_name": "Gujarati" + }, + "gd": { + "articles": 14246, + "name": "Gàidhlig", + "english_name": "Scottish Gaelic" + }, + "ga": { + "articles": 38828, + "name": "Gaeilge", + "english_name": "Irish" + }, + "gl": { + "articles": 134667, + "name": "Galego", + "english_name": "Galician" + }, + "als": { + "articles": 22337, + "name": "Alemannisch", + "english_name": "Alemannic" + }, + "lb": { + "articles": 47306, + "name": "Lëtzebuergesch", + "english_name": "Luxembourgish" + }, + "la": { + "articles": 125689, + "name": "Latina", + "english_name": "Latin" + }, + "tt": { + "articles": 69902, + "name": "Tatarça / Татарча", + "english_name": "Tatar" + }, + "tr": { + "articles": 287403, + "name": "Türkçe", + "english_name": "Turkish" + }, + "li": { + "articles": 11552, + "name": "Limburgs", + "english_name": "Limburgish" + }, + "lv": { + "articles": 74333, + "name": "Latviešu", + "english_name": "Latvian" + }, + "tl": { + "articles": 65681, + "name": "Tagalog", + "english_name": "Tagalog" + }, + "vec": { + "articles": 10855, + "name": "Vèneto", + "english_name": "Venetian" + }, + "th": { + "articles": 113214, + "name": "ไทย", + "english_name": "Thai" + }, + "tg": { + "articles": 67389, + "name": "Тоҷикӣ", + "english_name": "Tajik" + }, + "te": { + "articles": 66207, + "name": "తెలుగు", + "english_name": "Telugu" + }, + "ta": { + "articles": 89565, + "name": "தமிழ்", + "english_name": "Tamil" + }, + "yi": { + "articles": 13590, + "name": "ייִדיש", + "english_name": "Yiddish" + }, + "ceb": { + "articles": 3525383, + "name": "Sinugboanong Binisaya", + "english_name": "Cebuano" + }, + "yo": { + "articles": 31493, + "name": "Yorùbá", + "english_name": "Yoruba" + }, + "de": { + "articles": 2008971, + "name": "Deutsch", + "english_name": "German" + }, + "da": { + "articles": 221798, + "name": "Dansk", + "english_name": "Danish" + }, + "qu": { + "articles": 19808, + "name": "Runa Simi", + "english_name": "Quechua" + }, + "bar": { + "articles": 21966, + "name": "Boarisch", + "english_name": "Bavarian" + }, + "kn": { + "articles": 21617, + "name": "ಕನ್ನಡ", + "english_name": "Kannada" + }, + "bpy": { + "articles": 25067, + "name": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী", + "english_name": "Bishnupriya Manipuri" + }, + "el": { + "articles": 124348, + "name": "Ελληνικά", + "english_name": "Greek" + }, + "eo": { + "articles": 235567, + "name": "Esperanto", + "english_name": "Esperanto" + }, + "en": { + "articles": 5307436, + "name": "English", + "english_name": "English" + }, + "zh": { + "articles": 915298, + "name": "中文", + "english_name": "Chinese" + }, + "pms": { + "articles": 63988, + "name": "Piemontèis", + "english_name": "Piedmontese" + }, + "arz": { + "articles": 16098, + "name": "مصرى (Maṣri)", + "english_name": "Egyptian Arabic" + }, + "eu": { + "articles": 261846, + "name": "Euskara", + "english_name": "Basque" + }, + "et": { + "articles": 151580, + "name": "Eesti", + "english_name": "Estonian" + }, + "es": { + "articles": 1301725, + "name": "Español", + "english_name": "Spanish" + }, + "ba": { + "articles": 36610, + "name": "Башҡорт", + "english_name": "Bashkir" + }, + "ru": { + "articles": 1359173, + "name": "Русский", + "english_name": "Russian" + }, + "new": { + "articles": 72175, + "name": "नेपाल भाषा", + "english_name": "Newar" + }, + "ro": { + "articles": 373067, + "name": "Română", + "english_name": "Romanian" + }, + "jv": { + "articles": 49675, + "name": "Basa Jawa", + "english_name": "Javanese" + }, + "hsb": { + "articles": 10908, + "name": "Hornjoserbsce", + "english_name": "Upper Sorbian" + }, + "be": { + "articles": 123470, + "name": "Беларуская", + "english_name": "Belarusian" + }, + "bg": { + "articles": 223701, + "name": "Български", + "english_name": "Bulgarian" + }, + "uk": { + "articles": 666877, + "name": "Українська", + "english_name": "Ukrainian" + }, + "wa": { + "articles": 14312, + "name": "Walon", + "english_name": "Walloon" + }, + "ast": { + "articles": 47712, + "name": "Asturianu", + "english_name": "Asturian" + }, + "bn": { + "articles": 46038, + "name": "বাংলা", + "english_name": "Bengali" + }, + "map-bms": { + "articles": 13275, + "name": "Basa Banyumasan", + "english_name": "Banyumasan" + }, + "br": { + "articles": 60624, + "name": "Brezhoneg", + "english_name": "Breton" + }, + "bs": { + "articles": 72057, + "name": "Bosanski", + "english_name": "Bosnian" + }, + "ja": { + "articles": 1041538, + "name": "日本語", + "english_name": "Japanese" + }, + "oc": { + "articles": 84521, + "name": "Occitan", + "english_name": "Occitan" + }, + "be-tarask": { + "articles": 59872, + "name": "Беларуская (тарашкевіца)", + "english_name": "Belarusian (Taraškievica)" + }, + "nds": { + "articles": 25732, + "name": "Plattdüütsch", + "english_name": "Low Saxon" + }, + "os": { + "articles": 10293, + "name": "Иронау", + "english_name": "Ossetian" + }, + "or": { + "articles": 11703, + "name": "ଓଡ଼ିଆ", + "english_name": "Oriya" + }, + "simple": { + "articles": 121809, + "name": "Simple English", + "english_name": "Simple English" + }, + "ca": { + "articles": 528658, + "name": "Català", + "english_name": "Catalan" + }, + "lmo": { + "articles": 34556, + "name": "Lumbaart", + "english_name": "Lombard" + }, + "ce": { + "articles": 158845, + "name": "Нохчийн", + "english_name": "Chechen" + }, + "cy": { + "articles": 89271, + "name": "Cymraeg", + "english_name": "Welsh" + }, + "cs": { + "articles": 369023, + "name": "Čeština", + "english_name": "Czech" + }, + "cv": { + "articles": 36500, + "name": "Чăваш", + "english_name": "Chuvash" + }, + "pt": { + "articles": 949039, + "name": "Português", + "english_name": "Portuguese" + }, + "lt": { + "articles": 180372, + "name": "Lietuvių", + "english_name": "Lithuanian" + }, + "zh-min-nan": { + "articles": 201851, + "name": "Bân-lâm-gú", + "english_name": "Min Nan" + }, + "pa": { + "articles": 24065, + "name": "ਪੰਜਾਬੀ", + "english_name": "Punjabi" + }, + "war": { + "articles": 1261969, + "name": "Winaray", + "english_name": "Waray-Waray" + }, + "pl": { + "articles": 1197444, + "name": "Polski", + "english_name": "Polish" + }, + "hy": { + "articles": 212704, + "name": "Հայերեն", + "english_name": "Armenian" + }, + "an": { + "articles": 31832, + "name": "Aragonés", + "english_name": "Aragonese" + }, + "hr": { + "articles": 171042, + "name": "Hrvatski", + "english_name": "Croatian" + }, + "ht": { + "articles": 51108, + "name": "Krèyol ayisyen", + "english_name": "Haitian" + }, + "hu": { + "articles": 399859, + "name": "Magyar", + "english_name": "Hungarian" + }, + "bat-smg": { + "articles": 15940, + "name": "Žemaitėška", + "english_name": "Samogitian" + }, + "hi": { + "articles": 114388, + "name": "हिन्दी", + "english_name": "Hindi" + }, + "pnb": { + "articles": 42659, + "name": "شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", + "english_name": "Western Punjabi" + }, + "bug": { + "articles": 14116, + "name": "Basa Ugi", + "english_name": "Buginese" + }, + "he": { + "articles": 199202, + "name": "עברית", + "english_name": "Hebrew" + }, + "mg": { + "articles": 82826, + "name": "Malagasy", + "english_name": "Malagasy" + }, + "uz": { + "articles": 128742, + "name": "O‘zbek", + "english_name": "Uzbek" + }, + "ml": { + "articles": 46792, + "name": "മലയാളം", + "english_name": "Malayalam" + }, + "azb": { + "articles": 11813, + "name": "تۆرکجه", + "english_name": "South Azerbaijani" + }, + "mn": { + "articles": 16281, + "name": "Монгол", + "english_name": "Mongolian" + }, + "mk": { + "articles": 87527, + "name": "Македонски", + "english_name": "Macedonian" + }, + "ur": { + "articles": 110767, + "name": "اردو", + "english_name": "Urdu" + }, + "ms": { + "articles": 286177, + "name": "Bahasa Melayu", + "english_name": "Malay" + }, + "mr": { + "articles": 45049, + "name": "मराठी", + "english_name": "Marathi" + }, + "my": { + "articles": 33571, + "name": "မြန်မာဘာသာ", + "english_name": "Burmese" + }, + "sah": { + "articles": 10965, + "name": "Саха тыла (Saxa Tyla)", + "english_name": "Sakha" + }, + "af": { + "articles": 42949, + "name": "Afrikaans", + "english_name": "Afrikaans" + }, + "vi": { + "articles": 1151564, + "name": "Tiếng Việt", + "english_name": "Vietnamese" + }, + "is": { + "articles": 41500, + "name": "Íslenska", + "english_name": "Icelandic" + }, + "am": { + "articles": 13291, + "name": "አማርኛ", + "english_name": "Amharic" + }, + "it": { + "articles": 1317506, + "name": "Italiano", + "english_name": "Italian" + }, + "vo": { + "articles": 120413, + "name": "Volapük", + "english_name": "Volapük" + }, + "ar": { + "articles": 453499, + "name": "العربية", + "english_name": "Arabic" + }, + "io": { + "articles": 26845, + "name": "Ido", + "english_name": "Ido" + }, + "ia": { + "articles": 19784, + "name": "Interlingua", + "english_name": "Interlingua" + }, + "az": { + "articles": 111474, + "name": "Azərbaycanca", + "english_name": "Azerbaijani" + }, + "id": { + "articles": 390200, + "name": "Bahasa Indonesia", + "english_name": "Indonesian" + }, + "nl": { + "articles": 1885741, + "name": "Nederlands", + "english_name": "Dutch" + }, + "nn": { + "articles": 131696, + "name": "Nynorsk", + "english_name": "Norwegian (Nynorsk)" + }, + "no": { + "articles": 458147, + "name": "Norsk (Bokmål)", + "english_name": "Norwegian (Bokmål)" + }, + "nah": { + "articles": 10428, + "name": "Nāhuatl", + "english_name": "Nahuatl" + }, + "ne": { + "articles": 29164, + "name": "नेपाली", + "english_name": "Nepali" + }, + "nap": { + "articles": 14400, + "name": "Nnapulitano", + "english_name": "Neapolitan" + }, + "fr": { + "articles": 1822985, + "name": "Français", + "english_name": "French" + }, + "mrj": { + "articles": 10164, + "name": "Кырык Мары (Kyryk Mary)", + "english_name": "Hill Mari" + }, + "zh-yue": { + "articles": 49352, + "name": "粵語", + "english_name": "Cantonese" + }, + "fy": { + "articles": 36464, + "name": "Frysk", + "english_name": "West Frisian" + }, + "fa": { + "articles": 516569, + "name": "فارسی", + "english_name": "Persian" + }, + "fi": { + "articles": 405166, + "name": "Suomi", + "english_name": "Finnish" + }, + "mzn": { + "articles": 12362, + "name": "مَزِروني", + "english_name": "Mazandarani" + }, + "sa": { + "articles": 10198, + "name": "संस्कृतम्", + "english_name": "Sanskrit" + }, + "fo": { + "articles": 12370, + "name": "Føroyskt", + "english_name": "Faroese" + }, + "ka": { + "articles": 111155, + "name": "ქართული", + "english_name": "Georgian" + }, + "ckb": { + "articles": 18217, + "name": "Soranî / کوردی", + "english_name": "Sorani" + }, + "kk": { + "articles": 217477, + "name": "Қазақша", + "english_name": "Kazakh" + }, + "sr": { + "articles": 342497, + "name": "Српски / Srpski", + "english_name": "Serbian" + }, + "sq": { + "articles": 62437, + "name": "Shqip", + "english_name": "Albanian" + }, + "min": { + "articles": 221961, + "name": "Minangkabau", + "english_name": "Minangkabau" + }, + "ko": { + "articles": 367127, + "name": "한국어", + "english_name": "Korean" + }, + "sv": { + "articles": 3783326, + "name": "Svenska", + "english_name": "Swedish" + }, + "su": { + "articles": 19163, + "name": "Basa Sunda", + "english_name": "Sundanese" + }, + "sk": { + "articles": 215360, + "name": "Slovenčina", + "english_name": "Slovak" + }, + "si": { + "articles": 12832, + "name": "සිංහල", + "english_name": "Sinhalese" + }, + "sh": { + "articles": 436526, + "name": "Srpskohrvatski / Српскохрватски", + "english_name": "Serbo-Croatian" + }, + "ku": { + "articles": 22367, + "name": "Kurdî / كوردی", + "english_name": "Kurdish" + }, + "sl": { + "articles": 153978, + "name": "Slovenščina", + "english_name": "Slovenian" + }, + "ky": { + "articles": 59677, + "name": "Кыргызча", + "english_name": "Kirghiz" + }, + "sw": { + "articles": 34773, + "name": "Kiswahili", + "english_name": "Swahili" + } + }, + "bing news": [ + "sq", + "de", + "ar", + "bg", + "ca", + "cs", + "zh-CHS", + "zh-CHT", + "ko", + "hr", + "da", + "sk", + "sl", + "es", + "et", + "fi", + "fr", + "el", + "he", + "nl", + "hu", + "id", + "en", + "is", + "it", + "ja", + "lv", + "lt", + "ms", + "nb", + "fa", + "pl", + "pt-BR", + "pt-PT", + "ro", + "ru", + "sr", + "sv", + "th", + "tr", + "uk", + "vi" + ], + "yahoo news": [ + "ar", + "bg", + "zh_chs", + "zh_cht", + "hr", + "cs", + "da", + "nl", + "en", + "et", + "fi", + "fr", + "de", + "el", + "he", + "hu", + "it", + "ja", + "ko", + "lv", + "lt", + "no", + "pl", + "pt", + "ro", + "ru", + "sk", + "sl", + "es", + "sv", + "th", + "tr" + ], + "swisscows": [ + "browser", + "ar-SA", + "es-AR", + "en-AU", + "de-AT", + "fr-BE", + "nl-BE", + "pt-BR", + "en-CA", + "fr-CA", + "es-CL", + "zh-CN", + "da-DK", + "fi-FI", + "fr-FR", + "de-DE", + "zh-HK", + "en-IN", + "en-IE", + "it-IT", + "ja-JP", + "ko-KR", + "en-MY", + "es-MX", + "nl-NL", + "en-NZ", + "nb-NO", + "en-PH", + "pl-PL", + "pt-PT", + "ru-RU", + "en-ZA", + "es-ES", + "sv-SE", + "de-CH", + "fr-CH", + "zh-TW", + "tr-TR", + "en-GB", + "en-US", + "es-US" + ], + "wikidata": { + "sco": { + "articles": 41754, + "name": "Scots", + "english_name": "Scots" + }, + "scn": { + "articles": 25373, + "name": "Sicilianu", + "english_name": "Sicilian" + }, + "gu": { + "articles": 26690, + "name": "ગુજરાતી", + "english_name": "Gujarati" + }, + "gd": { + "articles": 14246, + "name": "Gàidhlig", + "english_name": "Scottish Gaelic" + }, + "ga": { + "articles": 38828, + "name": "Gaeilge", + "english_name": "Irish" + }, + "gl": { + "articles": 134667, + "name": "Galego", + "english_name": "Galician" + }, + "als": { + "articles": 22337, + "name": "Alemannisch", + "english_name": "Alemannic" + }, + "lb": { + "articles": 47306, + "name": "Lëtzebuergesch", + "english_name": "Luxembourgish" + }, + "la": { + "articles": 125689, + "name": "Latina", + "english_name": "Latin" + }, + "tt": { + "articles": 69902, + "name": "Tatarça / Татарча", + "english_name": "Tatar" + }, + "tr": { + "articles": 287403, + "name": "Türkçe", + "english_name": "Turkish" + }, + "li": { + "articles": 11552, + "name": "Limburgs", + "english_name": "Limburgish" + }, + "lv": { + "articles": 74333, + "name": "Latviešu", + "english_name": "Latvian" + }, + "tl": { + "articles": 65681, + "name": "Tagalog", + "english_name": "Tagalog" + }, + "vec": { + "articles": 10855, + "name": "Vèneto", + "english_name": "Venetian" + }, + "th": { + "articles": 113214, + "name": "ไทย", + "english_name": "Thai" + }, + "tg": { + "articles": 67389, + "name": "Тоҷикӣ", + "english_name": "Tajik" + }, + "te": { + "articles": 66207, + "name": "తెలుగు", + "english_name": "Telugu" + }, + "ta": { + "articles": 89565, + "name": "தமிழ்", + "english_name": "Tamil" + }, + "yi": { + "articles": 13590, + "name": "ייִדיש", + "english_name": "Yiddish" + }, + "ceb": { + "articles": 3525383, + "name": "Sinugboanong Binisaya", + "english_name": "Cebuano" + }, + "yo": { + "articles": 31493, + "name": "Yorùbá", + "english_name": "Yoruba" + }, + "de": { + "articles": 2008971, + "name": "Deutsch", + "english_name": "German" + }, + "da": { + "articles": 221798, + "name": "Dansk", + "english_name": "Danish" + }, + "qu": { + "articles": 19808, + "name": "Runa Simi", + "english_name": "Quechua" + }, + "bar": { + "articles": 21966, + "name": "Boarisch", + "english_name": "Bavarian" + }, + "kn": { + "articles": 21617, + "name": "ಕನ್ನಡ", + "english_name": "Kannada" + }, + "bpy": { + "articles": 25067, + "name": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী", + "english_name": "Bishnupriya Manipuri" + }, + "el": { + "articles": 124348, + "name": "Ελληνικά", + "english_name": "Greek" + }, + "eo": { + "articles": 235567, + "name": "Esperanto", + "english_name": "Esperanto" + }, + "en": { + "articles": 5307436, + "name": "English", + "english_name": "English" + }, + "zh": { + "articles": 915298, + "name": "中文", + "english_name": "Chinese" + }, + "pms": { + "articles": 63988, + "name": "Piemontèis", + "english_name": "Piedmontese" + }, + "arz": { + "articles": 16098, + "name": "مصرى (Maṣri)", + "english_name": "Egyptian Arabic" + }, + "eu": { + "articles": 261846, + "name": "Euskara", + "english_name": "Basque" + }, + "et": { + "articles": 151580, + "name": "Eesti", + "english_name": "Estonian" + }, + "es": { + "articles": 1301725, + "name": "Español", + "english_name": "Spanish" + }, + "ba": { + "articles": 36610, + "name": "Башҡорт", + "english_name": "Bashkir" + }, + "ru": { + "articles": 1359173, + "name": "Русский", + "english_name": "Russian" + }, + "new": { + "articles": 72175, + "name": "नेपाल भाषा", + "english_name": "Newar" + }, + "ro": { + "articles": 373067, + "name": "Română", + "english_name": "Romanian" + }, + "jv": { + "articles": 49675, + "name": "Basa Jawa", + "english_name": "Javanese" + }, + "hsb": { + "articles": 10908, + "name": "Hornjoserbsce", + "english_name": "Upper Sorbian" + }, + "be": { + "articles": 123470, + "name": "Беларуская", + "english_name": "Belarusian" + }, + "bg": { + "articles": 223701, + "name": "Български", + "english_name": "Bulgarian" + }, + "uk": { + "articles": 666877, + "name": "Українська", + "english_name": "Ukrainian" + }, + "wa": { + "articles": 14312, + "name": "Walon", + "english_name": "Walloon" + }, + "ast": { + "articles": 47712, + "name": "Asturianu", + "english_name": "Asturian" + }, + "bn": { + "articles": 46038, + "name": "বাংলা", + "english_name": "Bengali" + }, + "map-bms": { + "articles": 13275, + "name": "Basa Banyumasan", + "english_name": "Banyumasan" + }, + "br": { + "articles": 60624, + "name": "Brezhoneg", + "english_name": "Breton" + }, + "bs": { + "articles": 72057, + "name": "Bosanski", + "english_name": "Bosnian" + }, + "ja": { + "articles": 1041538, + "name": "日本語", + "english_name": "Japanese" + }, + "oc": { + "articles": 84521, + "name": "Occitan", + "english_name": "Occitan" + }, + "be-tarask": { + "articles": 59872, + "name": "Беларуская (тарашкевіца)", + "english_name": "Belarusian (Taraškievica)" + }, + "nds": { + "articles": 25732, + "name": "Plattdüütsch", + "english_name": "Low Saxon" + }, + "os": { + "articles": 10293, + "name": "Иронау", + "english_name": "Ossetian" + }, + "or": { + "articles": 11703, + "name": "ଓଡ଼ିଆ", + "english_name": "Oriya" + }, + "simple": { + "articles": 121809, + "name": "Simple English", + "english_name": "Simple English" + }, + "ca": { + "articles": 528658, + "name": "Català", + "english_name": "Catalan" + }, + "lmo": { + "articles": 34556, + "name": "Lumbaart", + "english_name": "Lombard" + }, + "ce": { + "articles": 158845, + "name": "Нохчийн", + "english_name": "Chechen" + }, + "cy": { + "articles": 89271, + "name": "Cymraeg", + "english_name": "Welsh" + }, + "cs": { + "articles": 369023, + "name": "Čeština", + "english_name": "Czech" + }, + "cv": { + "articles": 36500, + "name": "Чăваш", + "english_name": "Chuvash" + }, + "pt": { + "articles": 949039, + "name": "Português", + "english_name": "Portuguese" + }, + "lt": { + "articles": 180372, + "name": "Lietuvių", + "english_name": "Lithuanian" + }, + "zh-min-nan": { + "articles": 201851, + "name": "Bân-lâm-gú", + "english_name": "Min Nan" + }, + "pa": { + "articles": 24065, + "name": "ਪੰਜਾਬੀ", + "english_name": "Punjabi" + }, + "war": { + "articles": 1261969, + "name": "Winaray", + "english_name": "Waray-Waray" + }, + "pl": { + "articles": 1197444, + "name": "Polski", + "english_name": "Polish" + }, + "hy": { + "articles": 212704, + "name": "Հայերեն", + "english_name": "Armenian" + }, + "an": { + "articles": 31832, + "name": "Aragonés", + "english_name": "Aragonese" + }, + "hr": { + "articles": 171042, + "name": "Hrvatski", + "english_name": "Croatian" + }, + "ht": { + "articles": 51108, + "name": "Krèyol ayisyen", + "english_name": "Haitian" + }, + "hu": { + "articles": 399859, + "name": "Magyar", + "english_name": "Hungarian" + }, + "bat-smg": { + "articles": 15940, + "name": "Žemaitėška", + "english_name": "Samogitian" + }, + "hi": { + "articles": 114388, + "name": "हिन्दी", + "english_name": "Hindi" + }, + "pnb": { + "articles": 42659, + "name": "شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", + "english_name": "Western Punjabi" + }, + "bug": { + "articles": 14116, + "name": "Basa Ugi", + "english_name": "Buginese" + }, + "he": { + "articles": 199202, + "name": "עברית", + "english_name": "Hebrew" + }, + "mg": { + "articles": 82826, + "name": "Malagasy", + "english_name": "Malagasy" + }, + "uz": { + "articles": 128742, + "name": "O‘zbek", + "english_name": "Uzbek" + }, + "ml": { + "articles": 46792, + "name": "മലയാളം", + "english_name": "Malayalam" + }, + "azb": { + "articles": 11813, + "name": "تۆرکجه", + "english_name": "South Azerbaijani" + }, + "mn": { + "articles": 16281, + "name": "Монгол", + "english_name": "Mongolian" + }, + "mk": { + "articles": 87527, + "name": "Македонски", + "english_name": "Macedonian" + }, + "ur": { + "articles": 110767, + "name": "اردو", + "english_name": "Urdu" + }, + "ms": { + "articles": 286177, + "name": "Bahasa Melayu", + "english_name": "Malay" + }, + "mr": { + "articles": 45049, + "name": "मराठी", + "english_name": "Marathi" + }, + "my": { + "articles": 33571, + "name": "မြန်မာဘာသာ", + "english_name": "Burmese" + }, + "sah": { + "articles": 10965, + "name": "Саха тыла (Saxa Tyla)", + "english_name": "Sakha" + }, + "af": { + "articles": 42949, + "name": "Afrikaans", + "english_name": "Afrikaans" + }, + "vi": { + "articles": 1151564, + "name": "Tiếng Việt", + "english_name": "Vietnamese" + }, + "is": { + "articles": 41500, + "name": "Íslenska", + "english_name": "Icelandic" + }, + "am": { + "articles": 13291, + "name": "አማርኛ", + "english_name": "Amharic" + }, + "it": { + "articles": 1317506, + "name": "Italiano", + "english_name": "Italian" + }, + "vo": { + "articles": 120413, + "name": "Volapük", + "english_name": "Volapük" + }, + "ar": { + "articles": 453499, + "name": "العربية", + "english_name": "Arabic" + }, + "io": { + "articles": 26845, + "name": "Ido", + "english_name": "Ido" + }, + "ia": { + "articles": 19784, + "name": "Interlingua", + "english_name": "Interlingua" + }, + "az": { + "articles": 111474, + "name": "Azərbaycanca", + "english_name": "Azerbaijani" + }, + "id": { + "articles": 390200, + "name": "Bahasa Indonesia", + "english_name": "Indonesian" + }, + "nl": { + "articles": 1885741, + "name": "Nederlands", + "english_name": "Dutch" + }, + "nn": { + "articles": 131696, + "name": "Nynorsk", + "english_name": "Norwegian (Nynorsk)" + }, + "no": { + "articles": 458147, + "name": "Norsk (Bokmål)", + "english_name": "Norwegian (Bokmål)" + }, + "nah": { + "articles": 10428, + "name": "Nāhuatl", + "english_name": "Nahuatl" + }, + "ne": { + "articles": 29164, + "name": "नेपाली", + "english_name": "Nepali" + }, + "nap": { + "articles": 14400, + "name": "Nnapulitano", + "english_name": "Neapolitan" + }, + "fr": { + "articles": 1822985, + "name": "Français", + "english_name": "French" + }, + "mrj": { + "articles": 10164, + "name": "Кырык Мары (Kyryk Mary)", + "english_name": "Hill Mari" + }, + "zh-yue": { + "articles": 49352, + "name": "粵語", + "english_name": "Cantonese" + }, + "fy": { + "articles": 36464, + "name": "Frysk", + "english_name": "West Frisian" + }, + "fa": { + "articles": 516569, + "name": "فارسی", + "english_name": "Persian" + }, + "fi": { + "articles": 405166, + "name": "Suomi", + "english_name": "Finnish" + }, + "mzn": { + "articles": 12362, + "name": "مَزِروني", + "english_name": "Mazandarani" + }, + "sa": { + "articles": 10198, + "name": "संस्कृतम्", + "english_name": "Sanskrit" + }, + "fo": { + "articles": 12370, + "name": "Føroyskt", + "english_name": "Faroese" + }, + "ka": { + "articles": 111155, + "name": "ქართული", + "english_name": "Georgian" + }, + "ckb": { + "articles": 18217, + "name": "Soranî / کوردی", + "english_name": "Sorani" + }, + "kk": { + "articles": 217477, + "name": "Қазақша", + "english_name": "Kazakh" + }, + "sr": { + "articles": 342497, + "name": "Српски / Srpski", + "english_name": "Serbian" + }, + "sq": { + "articles": 62437, + "name": "Shqip", + "english_name": "Albanian" + }, + "min": { + "articles": 221961, + "name": "Minangkabau", + "english_name": "Minangkabau" + }, + "ko": { + "articles": 367127, + "name": "한국어", + "english_name": "Korean" + }, + "sv": { + "articles": 3783326, + "name": "Svenska", + "english_name": "Swedish" + }, + "su": { + "articles": 19163, + "name": "Basa Sunda", + "english_name": "Sundanese" + }, + "sk": { + "articles": 215360, + "name": "Slovenčina", + "english_name": "Slovak" + }, + "si": { + "articles": 12832, + "name": "සිංහල", + "english_name": "Sinhalese" + }, + "sh": { + "articles": 436526, + "name": "Srpskohrvatski / Српскохрватски", + "english_name": "Serbo-Croatian" + }, + "ku": { + "articles": 22367, + "name": "Kurdî / كوردی", + "english_name": "Kurdish" + }, + "sl": { + "articles": 153978, + "name": "Slovenščina", + "english_name": "Slovenian" + }, + "ky": { + "articles": 59677, + "name": "Кыргызча", + "english_name": "Kirghiz" + }, + "sw": { + "articles": 34773, + "name": "Kiswahili", + "english_name": "Swahili" + } + }, + "ddg definitions": [ + "da-DK", + "vi-VN", + "en-SG", + "sl-SL", + "en-XA", + "tzh-HK", + "en-UK", + "ro-RO", + "en-MY", + "el-GR", + "it-CH", + "hu-HU", + "fr-FR", + "en-PH", + "tl-PH", + "fr-CA", + "fi-FI", + "et-EE", + "sv-SE", + "es-XL", + "th-TH", + "sk-SK", + "es-ES", + "en-IE", + "es-US", + "es-PE", + "nl-NL", + "en-US", + "de-DE", + "de-AT", + "wt-WT", + "no-NO", + "tr-TR", + "ca-ES", + "it-IT", + "es-CO", + "ru-RU", + "ca-CT", + "en-ZA", + "en-CA", + "jp-JP", + "es-MX", + "id-ID", + "es-AR", + "he-IL", + "kr-KR", + "en-AU", + "ms-MY", + "pl-PL", + "lv-LV", + "bg-BG", + "zh-CN", + "en-NZ", + "lt-LT", + "tzh-TW", + "hr-HR", + "pt-PT", + "fr-BE", + "de-CH", + "cs-CZ", + "en-IN", + "nl-BE", + "fr-CH", + "en-ID", + "ar-XA", + "pt-BR", + "uk-UA", + "es-CL" + ], + "bing images": [ + "sq", + "de", + "ar", + "bg", + "ca", + "cs", + "zh-CHS", + "zh-CHT", + "ko", + "hr", + "da", + "sk", + "sl", + "es", + "et", + "fi", + "fr", + "el", + "he", + "nl", + "hu", + "id", + "en", + "is", + "it", + "ja", + "lv", + "lt", + "ms", + "nb", + "fa", + "pl", + "pt-BR", + "pt-PT", + "ro", + "ru", + "sr", + "sv", + "th", + "tr", + "uk", + "vi" + ], + "yahoo": [ + "ar", + "bg", + "zh_chs", + "zh_cht", + "hr", + "cs", + "da", + "nl", + "en", + "et", + "fi", + "fr", + "de", + "el", + "he", + "hu", + "it", + "ja", + "ko", + "lv", + "lt", + "no", + "pl", + "pt", + "ro", + "ru", + "sk", + "sl", + "es", + "sv", + "th", + "tr" + ], + "gigablast": [ + "en", + "fr", + "es", + "ru", + "tr", + "ja", + "h_", + "tw", + "cn", + "ko", + "de", + "nl", + "it", + "fi", + "sv", + "no", + "pt", + "vi", + "ar", + "he", + "id", + "el", + "th", + "hi", + "bn", + "pl", + "tl", + "la", + "eo", + "ca", + "bg", + "tx", + "sr", + "hu", + "da", + "lt", + "cs", + "gl", + "ka", + "gd", + "go", + "ro", + "ga", + "lv", + "hy", + "is", + "ag", + "gv", + "io", + "fa", + "te", + "vv", + "mg", + "ku", + "lb", + "et" + ] +} \ No newline at end of file diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index ab367798..7a64fd25 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -20,6 +20,7 @@ from os.path import realpath, dirname import sys from flask_babel import gettext from operator import itemgetter +from json import loads from searx import settings from searx import logger from searx.utils import load_module @@ -78,6 +79,9 @@ def load_engine(engine_data): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) + if engine_data['name'] in languages: + setattr(engine, 'supported_languages', languages[engine_data['name']]) + # checking required variables for engine_attr in dir(engine): if engine_attr.startswith('_'): @@ -207,6 +211,8 @@ if 'engines' not in settings or not settings['engines']: logger.error('No engines found. Edit your settings.yml') exit(2) +languages = loads(open(engine_dir + '/../data/engines_languages.json').read()) + for engine_data in settings['engines']: engine = load_engine(engine_data) if engine is not None: diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 052b6644..35400339 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -15,12 +15,14 @@ from urllib import urlencode from lxml import html +from requests import get from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] paging = True language_support = True +supported_languages_url = 'https://www.bing.com/account/general' # search-url base_url = 'https://www.bing.com/' @@ -81,3 +83,16 @@ def response(resp): # return results return results + + +# get supported languages from their site +def fetch_supported_languages(): + supported_languages = [] + response = get(supported_languages_url) + dom = html.fromstring(response.text) + options = dom.xpath('//div[@id="limit-languages"]//input') + for option in options: + code = option.xpath('./@id')[0].replace('_', '-') + supported_languages.append(code) + + return supported_languages diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index c0deaf6b..746d3abc 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -19,7 +19,7 @@ from urllib import urlencode from lxml import html from json import loads import re -from searx.engines.bing import supported_languages +from searx.engines.bing import fetch_supported_languages # engine dependent config categories = ['images'] diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 4bac5bbc..2d936fa5 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -17,7 +17,7 @@ from datetime import datetime from dateutil import parser from lxml import etree from searx.utils import list_get -from searx.engines.bing import supported_languages +from searx.engines.bing import fetch_supported_languages # engine dependent config categories = ['news'] diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 4a7d7b6a..813dd951 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -15,29 +15,12 @@ from urllib import urlencode from json import loads from datetime import datetime +from requests import get # engine dependent config categories = ['videos'] paging = True language_support = True -supported_languages = ["af", "ak", "am", "ar", "an", "as", "av", "ae", "ay", "az", - "ba", "bm", "be", "bn", "bi", "bo", "bs", "br", "bg", "ca", - "cs", "ch", "ce", "cu", "cv", "kw", "co", "cr", "cy", "da", - "de", "dv", "dz", "el", "en", "eo", "et", "eu", "ee", "fo", - "fa", "fj", "fi", "fr", "fy", "ff", "gd", "ga", "gl", "gv", - "gn", "gu", "ht", "ha", "sh", "he", "hz", "hi", "ho", "hr", - "hu", "hy", "ig", "io", "ii", "iu", "ie", "ia", "id", "ik", - "is", "it", "jv", "ja", "kl", "kn", "ks", "ka", "kr", "kk", - "km", "ki", "rw", "ky", "kv", "kg", "ko", "kj", "ku", "lo", - "la", "lv", "li", "ln", "lt", "lb", "lu", "lg", "mh", "ml", - "mr", "mk", "mg", "mt", "mn", "mi", "ms", "my", "na", "nv", - "nr", "nd", "ng", "ne", "nl", "nn", "nb", "no", "ny", "oc", - "oj", "or", "om", "os", "pa", "pi", "pl", "pt", "ps", "qu", - "rm", "ro", "rn", "ru", "sg", "sa", "si", "sk", "sl", "se", - "sm", "sn", "sd", "so", "st", "es", "sq", "sc", "sr", "ss", - "su", "sw", "sv", "ty", "ta", "tt", "te", "tg", "tl", "th", - "ti", "to", "tn", "ts", "tk", "tr", "tw", "ug", "uk", "ur", - "uz", "ve", "vi", "vo", "wa", "wo", "xh", "yi", "yo", "za", "zh", "zu"] # search-url # see http://www.dailymotion.com/doc/api/obj-video.html @@ -45,6 +28,8 @@ search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,descr embedded_url = '' +supported_languages_url = 'https://api.dailymotion.com/languages' + # do search-request def request(query, params): @@ -92,3 +77,23 @@ def response(resp): # return results return results + + +# get supported languages from their site +def fetch_supported_languages(): + supported_languages = {} + + response = get(supported_languages_url) + response_json = loads(response.text) + + for language in response_json['list']: + supported_languages[language['code']] = {} + + name = language['native_name'] + if name: + supported_languages[language['code']]['name'] = name + english_name = language['name'] + if english_name: + supported_languages[language['code']]['english_name'] = english_name + + return supported_languages diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 3e1752dd..d37d2778 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -15,19 +15,15 @@ from urllib import urlencode from lxml.html import fromstring +from requests import get +from json import loads from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] paging = True language_support = True -supported_languages = ["es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "bg-BG", "en-CA", "fr-CA", "ca-CT", - "es-CL", "zh-CN", "es-CO", "hr-HR", "cs-CZ", "da-DK", "et-EE", "fi-FI", "fr-FR", "de-DE", - "el-GR", "tzh-HK", "hu-HU", "en-IN", "id-ID", "en-ID", "en-IE", "he-IL", "it-IT", "jp-JP", - "kr-KR", "es-XL", "lv-LV", "lt-LT", "ms-MY", "en-MY", "es-MX", "nl-NL", "en-NZ", "no-NO", - "es-PE", "en-PH", "tl-PH", "pl-PL", "pt-PT", "ro-RO", "ru-RU", "ar-XA", "en-XA", "en-SG", - "sk-SK", "sl-SL", "en-ZA", "es-ES", "ca-ES", "sv-SE", "de-CH", "fr-CH", "it-CH", "tzh-TW", - "th-TH", "tr-TR", "uk-UA", "en-UK", "en-US", "es-US", "vi-VN"] +supported_languages_url = 'https://duckduckgo.com/d2030.js' time_range_support = True # search-url @@ -65,8 +61,6 @@ def request(query, params): locale = 'xa' + params['language'].split('-')[0] elif params['language'][-2:] == 'GB': locale = 'uk' + params['language'].split('-')[0] - elif params['language'] == 'es-419': - locale = 'xl-es' else: locale = params['language'].split('-') if len(locale) == 2: @@ -120,3 +114,18 @@ def response(resp): # return results return results + + +# get supported languages from their site +def fetch_supported_languages(): + response = get(supported_languages_url) + + # response is a js file with regions as an embedded object + response_page = response.text + response_page = response_page[response_page.find('regions:{') + 8:] + response_page = response_page[:response_page.find('}') + 1] + + regions_json = loads(response_page) + supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) + + return supported_languages diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 23a2f3be..b965c02e 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -4,7 +4,7 @@ from re import compile, sub from lxml import html from searx.utils import html_to_text from searx.engines.xpath import extract_text -from searx.engines.duckduckgo import supported_languages +from searx.engines.duckduckgo import fetch_supported_languages url = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index f012e1df..e598e55c 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -14,6 +14,8 @@ from json import loads from random import randint from time import time from urllib import urlencode +from requests import get +from lxml.html import fromstring # engine dependent config categories = ['general'] @@ -40,11 +42,7 @@ url_xpath = './/url' title_xpath = './/title' content_xpath = './/sum' -supported_languages = ["en", "fr", "es", "ru", "tr", "ja", "zh-CN", "zh-TW", "ko", "de", - "nl", "it", "fi", "sv", "no", "pt", "vi", "ar", "he", "id", "el", - "th", "hi", "bn", "pl", "tl", "la", "eo", "ca", "bg", "tx", "sr", - "hu", "da", "lt", "cs", "gl", "ka", "gd", "go", "ro", "ga", "lv", - "hy", "is", "ag", "gv", "io", "fa", "te", "vv", "mg", "ku", "lb", "et"] +supported_languages_url = 'https://gigablast.com/search?&rxikd=1' # do search-request @@ -90,3 +88,17 @@ def response(resp): # return results return results + + +# get supported languages from their site +def fetch_supported_languages(): + supported_languages = [] + response = get(supported_languages_url) + dom = fromstring(response.text) + links = dom.xpath('//span[@id="menu2"]/a') + for link in links: + code = link.xpath('./@href')[0][-2:] + if code != 'xx' and code not in supported_languages: + supported_languages.append(code) + + return supported_languages diff --git a/searx/engines/google.py b/searx/engines/google.py index 31035be6..a82a0b5a 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -12,6 +12,7 @@ import re from urllib import urlencode from urlparse import urlparse, parse_qsl from lxml import html, etree +from requests import get from searx.engines.xpath import extract_text, extract_url from searx.search import logger @@ -23,20 +24,6 @@ categories = ['general'] paging = True language_support = True use_locale_domain = True -supported_languages = ["ach", "af", "ak", "az", "ms", "ban", "xx-bork", "bs", "br", "ca", - "ceb", "ckb", "cs", "sn", "co", "cy", "da", "de", "yo", "et", - "xx-elmer", "en", "es", "es-419", "eo", "eu", "ee", "tl", "fo", "fr", - "gaa", "ga", "gd", "gl", "gn", "xx-hacker", "ht", "ha", "hr", "haw", - "bem", "ig", "rn", "id", "ia", "zu", "is", "it", "jw", "rw", "sw", - "tlh", "kg", "mfe", "kri", "la", "lv", "to", "lt", "ln", "loz", - "lua", "lg", "hu", "mg", "mt", "mi", "nl", "pcm", "no", "nso", - "ny", "nn", "uz", "oc", "om", "xx-pirate", "pl", "pt-BR", "pt-PT", - "ro", "rm", "qu", "nyn", "crs", "sq", "sd", "sk", "sl", "so", "st", - "sr-ME", "sr-Latn", "su", "fi", "sv", "tg", "tt", "vi", "tn", "tum", - "tr", "tk", "tw", "fy", "wo", "xh", "el", "be", "bg", "ky", "kk", "mk", - "mn", "ru", "sr", "uk", "ka", "hy", "yi", "iw", "ug", "ur", "ar", "ps", - "fa", "ti", "am", "ne", "mr", "hi", "bn", "pa", "gu", "or", "ta", "te", - "kn", "ml", "si", "th", "lo", "my", "km", "chr", "ko", "zh-CN", "zh-TW", "ja"] time_range_support = True # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests @@ -117,6 +104,7 @@ map_hostname_start = 'maps.google.' maps_path = '/maps' redirect_path = '/url' images_path = '/images' +supported_languages_url = 'https://www.google.com/preferences?#languages' # specific xpath variables results_xpath = '//div[@class="g"]' @@ -373,3 +361,17 @@ def attributes_to_html(attributes): retval = retval + '' + a.get('label') + '' + value + '' retval = retval + '' return retval + + +# get supported languages from their site +def fetch_supported_languages(): + supported_languages = {} + response = get(supported_languages_url) + dom = html.fromstring(response.text) + options = dom.xpath('//select[@name="hl"]/option') + for option in options: + code = option.xpath('./@value')[0].split('-')[0] + name = option.text[:-1].title() + supported_languages[code] = {"name": name} + + return supported_languages diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 6d143024..d138f99f 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -13,7 +13,7 @@ from lxml import html from urllib import urlencode from json import loads -from searx.engines.google import supported_languages +from searx.engines.google import fetch_supported_languages # search-url categories = ['news'] diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index ea607dd6..93d98d3a 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -15,7 +15,6 @@ from json import loads from string import Formatter from urllib import urlencode, quote -from searx.engines.wikipedia import supported_languages # engine dependent config categories = ['general'] diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 200e9ada..67803fa9 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -20,11 +20,6 @@ from searx.utils import html_to_text categories = None paging = True language_support = True -supported_languages = ["fr-FR", "de-DE", "en-GB", "it-IT", "es-ES", "pt-PT", "de-CH", "fr-CH", "it-CH", "de-AT", - "fr-BE", "nl-BE", "nl-NL", "da-DK", "fi-FI", "sv-SE", "en-IE", "no-NO", "pl-PL", "ru-RU", - "el-GR", "bg-BG", "cs-CZ", "et-EE", "hu-HU", "ro-RO", "en-US", "en-CA", "fr-CA", "pt-BR", - "es-AR", "es-CL", "es-MX", "ja-JP", "en-SG", "en-IN", "en-MY", "ms-MY", "ko-KR", "tl-PH", - "th-TH", "he-IL", "tr-TR", "en-AU", "en-NZ"] category_to_keyword = {'general': 'web', 'images': 'images', @@ -51,15 +46,7 @@ def request(query, params): # add language tag if specified if params['language'] != 'all': - locale = params['language'].split('-') - if len(locale) == 2 and params['language'] in supported_languages: - params['url'] += '&locale=' + params['language'].replace('-', '_').lower() - else: - # try to get a country code for language - for lang in supported_languages: - if locale[0] == lang.split('-')[0]: - params['url'] += '&locale=' + lang.replace('-', '_').lower() - break + params['url'] += '&locale=' + params['language'].replace('-', '_').lower() return params diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 3814d994..54aafdee 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -24,11 +24,6 @@ categories = ['general'] # paging = False language_support = True -supported_languages = ["af", "de", "ar", "hy", "be", "bg", "ca", "cs", "zh-CN", "zh-TW", - "ko", "hr", "da", "sk", "sl", "es", "eo", "et", "fi", "fr", - "el", "iw", "hi", "nl", "hu", "id", "en", "is", "it", "ja", - "lv", "lt", "no", "fa", "pl", "pt", "ro", "ru", "sr", "sw", - "sv", "tl", "th", "tr", "uk", "vi"] # search-url base_url = 'https://startpage.com/' diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index 2c0a94f0..f979d014 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -22,7 +22,7 @@ language = "" # search-url url = 'http://www.subtitleseeker.com/' -search_url = url + 'search/TITLES/{query}&p={pageno}' +search_url = url + 'search/TITLES/{query}?p={pageno}' # specific xpath variables results_xpath = '//div[@class="boxRows"]' @@ -51,7 +51,8 @@ def response(resp): elif resp.search_params['language'] != 'all': search_lang = [lc[3] for lc in language_codes - if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] + if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]] + search_lang = search_lang[0].split(' (')[0] # parse results for result in dom.xpath(results_xpath): diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index 5c6b051a..7f85019a 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -13,17 +13,13 @@ from json import loads from urllib import urlencode, unquote import re +from requests import get +from lxml.html import fromstring # engine dependent config categories = ['general', 'images'] paging = True language_support = True -supported_languages = ["ar-SA", "es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "bg-BG", "en-CA", "fr-CA", - "es-CL", "zh-CN", "hr-HR", "cs-CZ", "da-DK", "et-EE", "fi-FI", "fr-FR", "de-DE", "el-GR", - "zh-HK", "hu-HU", "en-IN", "en-IE", "he-IL", "it-IT", "ja-JP", "ko-KR", "lv-LV", "lt-LT", - "en-MY", "es-MX", "nl-NL", "en-NZ", "nb-NO", "en-PH", "pl-PL", "pt-PT", "ro-RO", "ru-RU", - "en-SG", "sk-SK", "sl-SI", "en-ZA", "es-ES", "sv-SE", "de-CH", "fr-CH", "zh-TW", "th-TH", - "tr-TR", "uk-UA", "en-GB", "en-US", "es-US"] # search-url base_url = 'https://swisscows.ch/' @@ -114,3 +110,16 @@ def response(resp): # return results return results + + +# get supported languages from their site +def fetch_supported_languages(): + supported_languages = [] + response = get(base_url) + dom = fromstring(response.text) + options = dom.xpath('//div[@id="regions-popup"]//ul/li/a') + for option in options: + code = option.xpath('./@data-val')[0] + supported_languages.append(code) + + return supported_languages diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index edb6d75f..9c0a768e 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -15,7 +15,7 @@ from searx import logger from searx.poolrequests import get from searx.engines.xpath import extract_text from searx.utils import format_date_by_locale -from searx.engines.wikipedia import supported_languages +from searx.engines.wikipedia import fetch_supported_languages from json import loads from lxml.html import fromstring @@ -57,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' def request(query, params): - language = params['language'].split('_')[0] + language = params['language'].split('-')[0] if language == 'all': language = 'en' @@ -72,7 +72,7 @@ def response(resp): html = fromstring(resp.content) wikidata_ids = html.xpath(wikidata_ids_xpath) - language = resp.search_params['language'].split('_')[0] + language = resp.search_params['language'].split('-')[0] if language == 'all': language = 'en' diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index fdba5ed6..0dee325a 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -12,36 +12,9 @@ from json import loads from urllib import urlencode, quote +from requests import get +from lxml.html import fromstring -supported_languages = ["en", "sv", "ceb", "de", "nl", "fr", "ru", "it", "es", "war", - "pl", "vi", "ja", "pt", "zh", "uk", "ca", "fa", "no", "sh", - "ar", "fi", "hu", "id", "ro", "cs", "ko", "sr", "ms", "tr", - "eu", "eo", "min", "bg", "da", "kk", "sk", "hy", "he", "zh-min-nan", - "lt", "hr", "sl", "et", "ce", "gl", "nn", "uz", "la", "vo", - "el", "simple", "be", "az", "th", "ur", "ka", "hi", "oc", "ta", - "mk", "mg", "new", "lv", "cy", "bs", "tt", "tl", "te", "pms", - "be-tarask", "br", "sq", "ky", "ht", "jv", "tg", "ast", "zh-yue", "lb", - "mr", "ml", "bn", "pnb", "is", "af", "sco", "ga", "ba", "fy", - "cv", "lmo", "sw", "my", "an", "yo", "ne", "io", "gu", "nds", - "scn", "bpy", "pa", "ku", "als", "kn", "bar", "ia", "qu", "su", - "ckb", "bat-smg", "mn", "arz", "nap", "wa", "bug", "gd", "yi", "map-bms", - "am", "mzn", "fo", "si", "nah", "li", "sah", "vec", "hsb", "or", - "os", "mrj", "sa", "hif", "mhr", "roa-tara", "azb", "pam", "ilo", - "sd", "ps", "se", "mi", "bh", "eml", "bcl", "xmf", "diq", "hak", - "gan", "glk", "vls", "nds-nl", "rue", "bo", "fiu-vro", "co", "sc", - "tk", "csb", "lrc", "vep", "wuu", "km", "szl", "gv", "crh", "kv", - "zh-classical", "frr", "zea", "as", "so", "kw", "nso", "ay", "stq", - "udm", "cdo", "nrm", "ie", "koi", "rm", "pcd", "myv", "mt", "fur", - "ace", "lad", "gn", "lij", "dsb", "dv", "cbk-zam", "ext", "gom", - "kab", "ksh", "ang", "mai", "mwl", "lez", "gag", "ln", "ug", "pi", - "pag", "frp", "sn", "nv", "av", "pfl", "haw", "xal", "krc", "kaa", - "rw", "bxr", "pdc", "to", "kl", "nov", "arc", "kbd", "lo", "bjn", - "pap", "ha", "tet", "ki", "tyv", "tpi", "na", "lbe", "ig", "jbo", - "roa-rup", "ty", "jam", "za", "kg", "mdf", "lg", "wo", "srn", "ab", - "ltg", "zu", "sm", "chr", "om", "tn", "chy", "rmy", "cu", "tw", "tum", - "xh", "bi", "rn", "pih", "got", "ss", "pnt", "bm", "ch", "mo", "ts", - "ady", "iu", "st", "ee", "ny", "fj", "ks", "ak", "ik", "sg", "ve", - "dz", "ff", "ti", "cr", "ng", "cho", "kj", "mh", "ho", "ii", "aa", "mus", "hz", "kr"] # search-url base_url = 'https://{language}.wikipedia.org/' @@ -54,6 +27,7 @@ search_postfix = 'w/api.php?'\ '&explaintext'\ '&pithumbsize=300'\ '&redirects' +supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' # set language in base_url @@ -142,3 +116,24 @@ def response(resp): 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) return results + + +# get supported languages from their site +def fetch_supported_languages(): + supported_languages = {} + response = get(supported_languages_url) + dom = fromstring(response.text) + tables = dom.xpath('//table[contains(@class,"sortable")]') + for table in tables: + # exclude header row + trs = table.xpath('.//tr')[1:] + for tr in trs: + td = tr.xpath('./td') + code = td[3].xpath('./a')[0].text + name = td[2].xpath('./a')[0].text + english_name = td[1].xpath('./a')[0].text + articles = int(td[4].xpath('./a/b')[0].text.replace(',', '')) + if articles >= 10000: + supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles} + + return supported_languages diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index c00e4236..db10c893 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -14,16 +14,13 @@ from urllib import urlencode from urlparse import unquote from lxml import html +from requests import get from searx.engines.xpath import extract_text, extract_url # engine dependent config categories = ['general'] paging = True language_support = True -supported_languages = ["ar", "bg", "ca", "szh", "tzh", "hr", "cs", "da", "nl", "en", - "et", "fi", "fr", "de", "el", "he", "hu", "is", "id", "it", "ja", - "ko", "lv", "lt", "no", "fa", "pl", "pt", "ro", "ru", "sk", "sr", - "sl", "es", "sv", "th", "tr"] time_range_support = True # search-url @@ -31,6 +28,8 @@ base_url = 'https://search.yahoo.com/' search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time' +supported_languages_url = 'https://search.yahoo.com/web/advanced' + # specific xpath variables results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" url_xpath = './/h3/a/@href' @@ -142,3 +141,16 @@ def response(resp): # return results return results + + +# get supported languages from their site +def fetch_supported_languages(): + supported_languages = [] + response = get(supported_languages_url) + dom = html.fromstring(response.text) + options = dom.xpath('//div[@id="yschlang"]/span/label/input') + for option in options: + code = option.xpath('./@value')[0][5:] + supported_languages.append(code) + + return supported_languages diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 613513e5..bc7b5c36 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -12,7 +12,7 @@ from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text, extract_url -from searx.engines.yahoo import parse_url, supported_languages +from searx.engines.yahoo import parse_url, fetch_supported_languages from datetime import datetime, timedelta import re from dateutil import parser diff --git a/searx/languages.py b/searx/languages.py index b07d296e..0337947f 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -4,39 +4,29 @@ language_codes = ( (u"ach", u"Acoli", u"", u""), - (u"af", u"Afrikaans", u"", u"Afrikaans"), + (u"af", u"Afrikaans", u"", u""), (u"ak", u"Akan", u"", u""), - (u"als", u"Alemannisch", u"", u"Alemannic"), - (u"am", u"አማርኛ", u"", u"Amharic"), - (u"an", u"Aragonés", u"", u"Aragonese"), + (u"am", u"አማርኛ", u"", u""), (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"), - (u"arz", u"مصرى (Maṣri)", u"", u"Egyptian Arabic"), - (u"ast", u"Asturianu", u"", u"Asturian"), (u"az", u"Azərbaycanca", u"", u"Azerbaijani"), - (u"azb", u"تۆرکجه", u"", u"South Azerbaijani"), - (u"ba", u"Башҡорт", u"", u"Bashkir"), (u"ban", u"Balinese", u"", u""), - (u"bar", u"Boarisch", u"", u"Bavarian"), (u"be", u"Беларуская", u"", u"Belarusian"), (u"bem", u"Ichibemba", u"", u""), (u"bg-BG", u"Български", u"България", u"Bulgarian"), - (u"bn", u"বাংলা", u"", u"Bengali"), - (u"bpy", u"ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী", u"", u"Bishnupriya Manipuri"), - (u"br", u"Brezhoneg", u"", u"Breton"), - (u"bs", u"Bosanski", u"", u"Bosnian"), - (u"bug", u"Basa Ugi", u"", u"Buginese"), + (u"bn", u"বাংলা", u"", u""), + (u"br", u"Brezhoneg", u"", u""), + (u"bs", u"Bosanski", u"", u""), (u"ca", u"Català", u"", u"Catalan"), (u"ca-CT", u"Català", u"", u"Catalan"), (u"ca-ES", u"Català", u"Espanya", u"Catalan"), (u"ce", u"Нохчийн", u"", u"Chechen"), (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"), (u"chr", u"ᏣᎳᎩ", u"", u""), - (u"ckb", u"Soranî / کوردی", u"", u"Sorani"), + (u"ckb", u"Central Kurdish", u"", u""), (u"co", u"Corsican", u"", u""), (u"crs", u"Seychellois Creole", u"", u""), (u"cs-CZ", u"Čeština", u"Česko", u"Czech"), - (u"cv", u"Чăваш", u"", u"Chuvash"), - (u"cy", u"Cymraeg", u"", u"Welsh"), + (u"cy", u"Cymraeg", u"", u""), (u"da-DK", u"Dansk", u"Danmark", u"Danish"), (u"de", u"Deutsch", u"", u"German"), (u"de-AT", u"Deutsch", u"Österreich", u"German"), @@ -70,148 +60,129 @@ language_codes = ( (u"eu", u"Euskara", u"", u"Basque"), (u"fa", u"فارسی", u"", u"Persian"), (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"), - (u"fo", u"Føroyskt", u"", u"Faroese"), + (u"fo", u"Føroyskt", u"", u""), (u"fr", u"Français", u"", u"French"), (u"fr-BE", u"Français", u"Belgique", u"French"), (u"fr-CA", u"Français", u"Canada", u"French"), (u"fr-CH", u"Français", u"Suisse", u"French"), (u"fr-FR", u"Français", u"France", u"French"), - (u"fy", u"Frysk", u"", u"West Frisian"), - (u"ga", u"Gaeilge", u"", u"Irish"), + (u"fy", u"West-Frysk", u"", u""), + (u"ga", u"Gaeilge", u"", u""), (u"gaa", u"Ga", u"", u""), - (u"gd", u"Gàidhlig", u"", u"Scottish Gaelic"), + (u"gd", u"Gàidhlig", u"", u""), (u"gl", u"Galego", u"", u"Galician"), (u"gn", u"Guarani", u"", u""), - (u"gu", u"ગુજરાતી", u"", u"Gujarati"), + (u"gu", u"ગુજરાતી", u"", u""), (u"ha", u"Hausa", u"", u""), (u"haw", u"ʻŌlelo HawaiʻI", u"", u""), (u"he-IL", u"עברית", u"ישראל", u"Hebrew"), (u"hi", u"हिन्दी", u"", u"Hindi"), (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"), - (u"hsb", u"Hornjoserbsce", u"", u"Upper Sorbian"), - (u"ht", u"Krèyol ayisyen", u"", u"Haitian"), + (u"ht", u"Haitian Creole", u"", u""), (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"), (u"hy", u"Հայերեն", u"", u"Armenian"), - (u"ia", u"Interlingua", u"", u"Interlingua"), + (u"ia", u"Interlingua", u"", u""), (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"), (u"ig", u"Igbo", u"", u""), - (u"io", u"Ido", u"", u"Ido"), - (u"is", u"Íslenska", u"", u"Icelandic"), + (u"is", u"Íslenska", u"", u""), (u"it", u"Italiano", u"", u"Italian"), (u"it-CH", u"Italiano", u"Svizzera", u"Italian"), (u"it-IT", u"Italiano", u"Italia", u"Italian"), (u"iw", u"עברית", u"", u""), (u"ja-JP", u"日本語", u"日本", u"Japanese"), - (u"jv", u"Basa Jawa", u"", u"Javanese"), (u"ka", u"ქართული", u"", u"Georgian"), (u"kg", u"Kongo", u"", u""), (u"kk", u"Қазақша", u"", u"Kazakh"), (u"km", u"ខ្មែរ", u"", u""), - (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"), + (u"kn", u"ಕನ್ನಡ", u"", u""), (u"ko-KR", u"한국어", u"대한민국", u"Korean"), - (u"kri", u"Krio (Sierra Leone)", u"", u""), - (u"ku", u"Kurdî / كوردی", u"", u"Kurdish"), - (u"ky", u"Кыргызча", u"", u"Kirghiz"), + (u"kri", u"Krio", u"", u""), + (u"ky", u"Кыргызча", u"", u""), (u"la", u"Latina", u"", u"Latin"), - (u"lb", u"Lëtzebuergesch", u"", u"Luxembourgish"), (u"lg", u"Luganda", u"", u""), - (u"li", u"Limburgs", u"", u"Limburgish"), - (u"lmo", u"Lumbaart", u"", u"Lombard"), (u"ln", u"Lingála", u"", u""), (u"lo", u"ລາວ", u"", u""), (u"loz", u"Lozi", u"", u""), (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"), (u"lua", u"Luba-Lulua", u"", u""), - (u"lv-LV", u"Latviešu", u"Latvijas Republika", u"Latvian"), + (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""), (u"mfe", u"Kreol Morisien", u"", u""), - (u"mg", u"Malagasy", u"", u"Malagasy"), + (u"mg", u"Malagasy", u"", u""), (u"mi", u"Maori", u"", u""), (u"min", u"Minangkabau", u"", u"Minangkabau"), - (u"mk", u"Македонски", u"", u"Macedonian"), - (u"ml", u"മലയാളം", u"", u"Malayalam"), - (u"mn", u"Монгол", u"", u"Mongolian"), - (u"mr", u"मराठी", u"", u"Marathi"), - (u"mrj", u"Кырык Мары (Kyryk Mary)", u"", u"Hill Mari"), + (u"mk", u"Македонски", u"", u""), + (u"ml", u"മലയാളം", u"", u""), + (u"mn", u"Монгол", u"", u""), + (u"mr", u"मराठी", u"", u""), (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"), (u"mt", u"Malti", u"", u""), - (u"my", u"မြန်မာဘာသာ", u"", u"Burmese"), - (u"mzn", u"مَزِروني", u"", u"Mazandarani"), - (u"nah", u"Nāhuatl", u"", u"Nahuatl"), - (u"nap", u"Nnapulitano", u"", u"Neapolitan"), - (u"nds-nl", u"Plattdüütsch", u"Nedderlannen", u"Low Saxon"), - (u"ne", u"नेपाली", u"", u"Nepali"), - (u"new", u"नेपाल भाषा", u"", u"Newar"), + (u"my", u"ဗမာ", u"", u""), + (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"), + (u"ne", u"नेपाली", u"", u""), (u"nl", u"Nederlands", u"", u"Dutch"), (u"nl-BE", u"Nederlands", u"België", u"Dutch"), (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), - (u"nn", u"Nynorsk", u"", u"Norwegian (Nynorsk)"), - (u"no-NO", u"Norsk (Bokmål)", u"Norge", u"Norwegian (Bokmål)"), + (u"nn", u"Nynorsk", u"", u"Norwegian"), + (u"no-NO", u"Norsk", u"Norge", u"Norwegian"), (u"nso", u"Northern Sotho", u"", u""), (u"ny", u"Nyanja", u"", u""), (u"nyn", u"Runyankore", u"", u""), - (u"oc", u"Occitan", u"", u"Occitan"), + (u"oc", u"Occitan", u"", u""), (u"om", u"Oromoo", u"", u""), - (u"or", u"ଓଡ଼ିଆ", u"", u"Oriya"), - (u"os", u"Иронау", u"", u"Ossetian"), - (u"pa", u"ਪੰਜਾਬੀ", u"", u"Punjabi"), + (u"or", u"ଓଡ଼ିଆ", u"", u""), + (u"pa", u"ਪੰਜਾਬੀ", u"", u""), (u"pcm", u"Nigerian Pidgin", u"", u""), (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"), - (u"pms", u"Piemontèis", u"", u"Piedmontese"), - (u"pnb", u"شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", u"", u"Western Punjabi"), (u"ps", u"پښتو", u"", u""), (u"pt", u"Português", u"", u"Portuguese"), (u"pt-BR", u"Português", u"Brasil", u"Portuguese"), (u"pt-PT", u"Português", u"Portugal", u"Portuguese"), - (u"qu", u"Runa Simi", u"", u"Quechua"), + (u"qu", u"Runasimi", u"", u""), (u"rm", u"Rumantsch", u"", u""), (u"rn", u"Ikirundi", u"", u""), (u"ro-RO", u"Română", u"România", u"Romanian"), (u"ru-RU", u"Русский", u"Россия", u"Russian"), (u"rw", u"Kinyarwanda", u"", u""), - (u"sa", u"संस्कृतम्", u"", u"Sanskrit"), - (u"sah", u"Саха тыла (Saxa Tyla)", u"", u"Sakha"), - (u"scn", u"Sicilianu", u"", u"Sicilian"), - (u"sco", u"Scots", u"", u"Scots"), (u"sd", u"Sindhi", u"", u""), (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"), - (u"si", u"සිංහල", u"", u"Sinhalese"), + (u"si", u"සිංහල", u"", u""), (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"), - (u"sl-SI", u"Slovenščina", u"Slovenija", u"Slovenian"), + (u"sl", u"Slovenščina", u"", u"Slovenian"), (u"sn", u"Chishona", u"", u""), (u"so", u"Soomaali", u"", u""), - (u"sq", u"Shqip", u"", u"Albanian"), - (u"sr-ME", u"Српски / Srpski", u"Црна Гора", u"Serbian"), + (u"sq", u"Shqip", u"", u""), + (u"sr", u"Српски / Srpski", u"", u"Serbian"), (u"st", u"Southern Sotho", u"", u""), - (u"su", u"Basa Sunda", u"", u"Sundanese"), + (u"su", u"Sundanese", u"", u""), (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"), - (u"sw", u"Kiswahili", u"", u"Swahili"), - (u"ta", u"தமிழ்", u"", u"Tamil"), - (u"te", u"తెలుగు", u"", u"Telugu"), - (u"tg", u"Тоҷикӣ", u"", u"Tajik"), + (u"sw", u"Kiswahili", u"", u""), + (u"ta", u"தமிழ்", u"", u""), + (u"te", u"తెలుగు", u"", u""), + (u"tg", u"Tajik", u"", u""), (u"th-TH", u"ไทย", u"ไทย", u"Thai"), (u"ti", u"ትግርኛ", u"", u""), (u"tk", u"Turkmen", u"", u""), - (u"tl-PH", u"Tagalog", u"Pilipinas", u"Tagalog"), + (u"tl-PH", u"Filipino", u"Pilipinas", u""), (u"tlh", u"Klingon", u"", u""), (u"tn", u"Tswana", u"", u""), (u"to", u"Lea Fakatonga", u"", u""), (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"), - (u"tt", u"Tatarça / Татарча", u"", u"Tatar"), + (u"tt", u"Tatar", u"", u""), (u"tum", u"Tumbuka", u"", u""), (u"tw", u"Twi", u"", u""), (u"ug", u"ئۇيغۇرچە", u"", u""), (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"), (u"ur", u"اردو", u"", u"Urdu"), (u"uz", u"O‘zbek", u"", u"Uzbek"), - (u"vec", u"Vèneto", u"", u"Venetian"), + (u"ve", u"Venda", u"", u"Venda"), (u"vi-VN", u"Tiếng Việt", u"Công Hòa Xã Hội Chủ Nghĩa Việt Nam", u"Vietnamese"), (u"vo", u"Volapük", u"", u"Volapük"), (u"wa", u"Walon", u"", u"Walloon"), (u"war", u"Winaray", u"", u"Waray-Waray"), (u"wo", u"Wolof", u"", u""), (u"xh", u"Xhosa", u"", u""), - (u"yi", u"ייִדיש", u"", u"Yiddish"), - (u"yo", u"Yorùbá", u"", u"Yoruba"), + (u"yi", u"ייִדיש", u"", u""), + (u"yo", u"Èdè Yorùbá", u"", u""), (u"zh", u"中文", u"", u"Chinese"), (u"zh-CN", u"中文", u"中国", u"Chinese"), (u"zh-HK", u"中文", u"香港", u"Chinese"), diff --git a/searx/webapp.py b/searx/webapp.py index c4a35a97..b124aa75 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -514,7 +514,7 @@ def index(): answers=result_container.answers, infoboxes=result_container.infoboxes, paging=result_container.paging, - current_language=search.lang, + current_language=search_query.lang, base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] diff --git a/tests/unit/engines/test_subtitleseeker.py b/tests/unit/engines/test_subtitleseeker.py index a641601b..e499cd2d 100644 --- a/tests/unit/engines/test_subtitleseeker.py +++ b/tests/unit/engines/test_subtitleseeker.py @@ -17,7 +17,7 @@ class TestSubtitleseekerEngine(SearxTestCase): def test_response(self): dicto = defaultdict(dict) - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' response = mock.Mock(search_params=dicto) self.assertRaises(AttributeError, subtitleseeker.response, None) diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py index 431cf69c..3e2f47ea 100644 --- a/tests/unit/engines/test_wikipedia.py +++ b/tests/unit/engines/test_wikipedia.py @@ -8,6 +8,8 @@ from searx.testing import SearxTestCase class TestWikipediaEngine(SearxTestCase): def test_request(self): + wikipedia.supported_languages = ['fr', 'en'] + query = 'test_query' dicto = defaultdict(dict) dicto['language'] = 'fr-FR' diff --git a/utils/fetch_languages.py b/utils/fetch_languages.py new file mode 100644 index 00000000..ae4a2def --- /dev/null +++ b/utils/fetch_languages.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- + +# This script generates languages.py from intersecting each engine's supported languages. +# +# The country names are obtained from http://api.geonames.org which requires registering as a user. +# +# Output files (engines_languages.json and languages.py) +# are written in current directory to avoid overwriting in case something goes wrong. + +from requests import get +from urllib import urlencode +from lxml.html import fromstring +from json import loads, dumps +import io +from sys import path +path.append('../searx') # noqa +from searx.engines import engines + +# Geonames API for country names. +geonames_user = '' # ADD USER NAME HERE +country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}' + +# Output files. +engines_languages_file = 'engines_languages.json' +languages_file = 'languages.py' + +engines_languages = {} +languages = {} + + +# To filter out invalid codes and dialects. +def valid_code(lang_code): + # filter invalid codes + # sl-SL is technically not invalid, but still a mistake + if lang_code[:2] == 'xx'\ + or lang_code == 'sl-SL'\ + or lang_code == 'wt-WT'\ + or lang_code == 'jw'\ + or lang_code[-2:] == 'UK'\ + or lang_code[-2:] == 'XA'\ + or lang_code[-2:] == 'XL': + return False + + # filter dialects + lang_code = lang_code.split('-') + if len(lang_code) > 2 or len(lang_code[0]) > 3: + return False + if len(lang_code) == 2 and len(lang_code[1]) > 2: + return False + + return True + + +# Get country name in specified language. +def get_country_name(locale): + if geonames_user is '': + return '' + + locale = locale.split('-') + if len(locale) != 2: + return '' + + url = country_names_url.format(parameters=urlencode({'lang': locale[0], + 'country': locale[1], + 'username': geonames_user})) + response = get(url) + json = loads(response.text) + content = json.get('geonames', None) + if content is None or len(content) != 1: + print "No country name found for " + locale[0] + "-" + locale[1] + return '' + + return content[0].get('countryName', '') + + +# Fetchs supported languages for each engine and writes json file with those. +def fetch_supported_languages(): + for engine_name in engines: + if hasattr(engines[engine_name], 'fetch_supported_languages'): + try: + engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() + except Exception as e: + print e + + # write json file + f = io.open(engines_languages_file, "w", encoding="utf-8") + f.write(unicode(dumps(engines_languages, indent=4, ensure_ascii=False, encoding="utf-8"))) + f.close() + + +# Join all language lists. +# Iterate all languages supported by each engine. +def join_language_lists(): + # include wikipedia first for more accurate language names + # exclude languages with too few articles + languages.update({code: lang for code, lang + in engines_languages['wikipedia'].iteritems() + if valid_code(code) and lang['articles'] >= 100000}) + + for engine_name in engines_languages: + for locale in engines_languages[engine_name]: + if not valid_code(locale): + continue + + # if language is not on list or if it has no name yet + if locale not in languages or not languages[locale].get('name'): + if isinstance(engines_languages[engine_name], dict) \ + and engines_languages[engine_name][locale].get('articles', float('inf')) >= 100000: + languages[locale] = engines_languages[engine_name][locale] + else: + languages[locale] = {} + + # get locales that have no name yet + for locale in languages.keys(): + if not languages[locale].get('name'): + # try to get language and country names + name = languages.get(locale.split('-')[0], {}).get('name', None) + if name: + languages[locale]['name'] = name + languages[locale]['country'] = get_country_name(locale) or '' + languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '') + else: + # filter out locales with no name + del languages[locale] + + +# Remove countryless language if language is featured in only one country. +def filter_single_country_languages(): + prev_lang = None + for code in sorted(languages): + lang = code.split('-')[0] + if lang == prev_lang: + countries += 1 + else: + if prev_lang is not None and countries == 1: + del languages[prev_lang] + countries = 0 + prev_lang = lang + + +# Write languages.py. +def write_languages_file(): + new_file = open(languages_file, 'w') + file_content = '# -*- coding: utf-8 -*-\n' + file_content += '# list of language codes\n' + file_content += '# this file is generated automatically by utils/update_search_languages.py\n' + file_content += '\nlanguage_codes = (' + for code in sorted(languages): + file_content += '\n (u"' + code + '"'\ + + ', u"' + languages[code]['name'].split(' (')[0] + '"'\ + + ', u"' + languages[code].get('country', '') + '"'\ + + ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),' + # remove last comma + file_content = file_content[:-1] + file_content += '\n)\n' + new_file.write(file_content.encode('utf8')) + new_file.close() + + +if __name__ == "__main__": + fetch_supported_languages() + join_language_lists() + filter_single_country_languages() + write_languages_file() diff --git a/utils/update_languages.py b/utils/update_languages.py deleted file mode 100644 index cc3fa29c..00000000 --- a/utils/update_languages.py +++ /dev/null @@ -1,169 +0,0 @@ -# -*- coding: utf-8 -*- - -# This script generates languages.py from -# intersecting each engine's supported languages. -# -# The language's native names are obtained from -# Wikipedia and Google's supported languages. -# -# The country names are obtained from http://api.geonames.org -# which requires registering as a user. -# -# Output file (languages.py) is written in current directory -# to avoid overwriting in case something goes wrong. - -from requests import get -from urllib import urlencode -from lxml.html import fromstring -from json import loads -from sys import path -path.append('../searx') -from searx.engines import engines - -# list of names -wiki_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' -google_languages_url = 'https://www.google.com/preferences?#languages' -country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}' - -geonames_user = '' # add user name here - -google_json_name = 'google.preferences.langMap' - -languages = {} - - -# To filter out invalid codes and dialects. -def valid_code(lang_code): - # filter invalid codes - # sl-SL is technically not invalid, but still a mistake - if lang_code[:2] == 'xx'\ - or lang_code == 'sl-SL'\ - or lang_code == 'jw'\ - or lang_code[-2:] == 'UK'\ - or lang_code[-2:] == 'XA'\ - or lang_code[-2:] == 'XL': - return False - - # filter dialects - lang_code = lang_code.split('-') - if len(lang_code) > 2 or len(lang_code[0]) > 3: - return False - if len(lang_code) == 2 and len(lang_code[1]) > 2: - return False - - return True - - -# Get country name in specified language. -def get_country_name(locale): - if geonames_user is '': - return '' - - locale = locale.split('-') - if len(locale) != 2: - return '' - - url = country_names_url.format(parameters=urlencode({'lang': locale[0], - 'country': locale[1], - 'username': geonames_user})) - response = get(url) - json = loads(response.text) - content = json.get('geonames', None) - if content is None or len(content) != 1: - print "No country name found for " + locale[0] + "-" + locale[1] - print json - return '' - - return content[0].get('countryName', '') - - -# Get language names from Wikipedia. -def get_wikipedia_languages(): - response = get(wiki_languages_url) - dom = fromstring(response.text) - tables = dom.xpath('//table[contains(@class,"sortable")]') - for table in tables: - # exclude header row - trs = table.xpath('.//tr')[1:] - for tr in trs: - td = tr.xpath('./td') - code = td[3].xpath('./a')[0].text - name = td[2].xpath('./a')[0].text - english_name = td[1].xpath('./a')[0].text - articles = int(td[4].xpath('./a/b')[0].text.replace(',','')) - - # exclude language variants and languages with few articles - if code not in languages and articles >= 10000 and valid_code(code): - languages[code] = (name, '', english_name) - - -# Get language names from Google. -def get_google_languages(): - response = get(google_languages_url) - dom = fromstring(response.text) - options = dom.xpath('//select[@name="hl"]/option') - for option in options: - code = option.xpath('./@value')[0].split('-')[0] - name = option.text[:-1].title() - - if code not in languages and valid_code(code): - languages[code] = (name, '', '') - - -# Join all language lists. -# iterate all languages supported by each engine -def join_language_lists(): - for engine_name in engines: - for locale in engines[engine_name].supported_languages: - locale = locale.replace('_', '-') - if locale not in languages and valid_code(locale): - # try to get language name - language = languages.get(locale.split('-')[0], None) - if language == None: - print engine_name + ": " + locale - continue - - country = get_country_name(locale) - languages[locale] = (language[0], country, language[2]) - - -# Remove countryless language if language is featured in only one country. -def filter_single_country_languages(): - prev_lang = None - for code in sorted(languages): - lang = code.split('-')[0] - if lang == prev_lang: - countries += 1 - else: - if prev_lang is not None and countries == 1: - del languages[prev_lang] - countries = 0 - prev_lang = lang - - -# Write languages.py. -def write_languages_file(): - new_file = open('languages.py', 'w') - file_content = '# -*- coding: utf-8 -*-\n' - file_content += '# list of language codes\n' - file_content += '# this file is generated automatically by utils/update_search_languages.py\n' - file_content += '\nlanguage_codes = (' - for code in sorted(languages): - (name, country, english) = languages[code] - file_content += '\n (u"' + code + '"'\ - + ', u"' + name + '"'\ - + ', u"' + country + '"'\ - + ', u"' + english + '"),' - # remove last comma - file_content = file_content[:-1] - file_content += '\n)\n' - new_file.write(file_content.encode('utf8')) - new_file.close() - - -if __name__ == "__main__": - get_wikipedia_languages() - get_google_languages() - join_language_lists() - filter_single_country_languages() - write_languages_file()