From 066bd916bf0c0344c978d2ea46cf9e9960841a61 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sun, 28 May 2017 15:46:45 +0200 Subject: [PATCH 1/2] [mod] fetch firefox versions in a standalone script --- searx/data/useragents.json | 15 ++++++++ searx/utils.py | 28 ++++---------- utils/fetch_firefox_version.py | 69 ++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 20 deletions(-) create mode 100644 searx/data/useragents.json create mode 100755 utils/fetch_firefox_version.py diff --git a/searx/data/useragents.json b/searx/data/useragents.json new file mode 100644 index 00000000..ba80ce88 --- /dev/null +++ b/searx/data/useragents.json @@ -0,0 +1,15 @@ +{ + "os": [ + "Windows NT 10; WOW64", + "X11; Linux x86_64" + ], + "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}", + "versions": [ + "59.0.2", + "59.0.1", + "59.0", + "58.0.2", + "58.0.1", + "58.0" + ] +} \ No newline at end of file diff --git a/searx/utils.py b/searx/utils.py index bd6c3fe2..f457284e 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -10,8 +10,10 @@ from codecs import getincrementalencoder from imp import load_source from numbers import Number from os.path import splitext, join +from io import open from random import choice import sys +import json from searx import settings from searx.version import VERSION_STRING @@ -39,29 +41,11 @@ else: logger = logger.getChild('utils') -ua_versions = ('52.8.1', - '53.0', - '54.0', - '55.0', - '56.0', - '57.0', - '58.0', - '59.0', - '60.0.2') - -ua_os = ('Windows NT 6.3; WOW64', - 'X11; Linux x86_64', - 'X11; Linux x86') - -ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}" - blocked_tags = ('script', 'style') - -def gen_useragent(os=None): - # TODO - return ua.format(os=os or choice(ua_os), version=choice(ua_versions)) +useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__)) + + "/data/useragents.json", 'r', encoding='utf-8').read()) def searx_useragent(): @@ -70,6 +54,10 @@ def searx_useragent(): suffix=settings['outgoing'].get('useragent_suffix', '')) +def gen_useragent(): + return str(useragents['ua'].format(os=choice(useragents['os']), version=choice(useragents['versions']))) + + def highlight_content(content, query): if not content: diff --git a/utils/fetch_firefox_version.py b/utils/fetch_firefox_version.py new file mode 100755 index 00000000..21d6e82f --- /dev/null +++ b/utils/fetch_firefox_version.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +# set path +from sys import path +from os.path import realpath, dirname +path.append(realpath(dirname(realpath(__file__)) + '/../')) + +# +import json +import requests +import re +from distutils.version import LooseVersion, StrictVersion +from lxml import html +from searx.url_utils import urlparse, urljoin + +URL = 'https://ftp.mozilla.org/pub/firefox/releases/' +RELEASE_PATH = '/pub/firefox/releases/' + +NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?(esr)?$') +# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$') +# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$') + +# +useragent = { + "versions": (), + "os": ('Windows NT 10; WOW64', + 'X11; Linux x86_64'), + "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}" +} + + +def fetch_firefox_versions(): + resp = requests.get(URL, timeout=2.0) + if resp.status_code != 200: + raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) + else: + dom = html.fromstring(resp.text) + versions = [] + + for link in dom.xpath('//a/@href'): + url = urlparse(urljoin(URL, link)) + path = url.path + if path.startswith(RELEASE_PATH): + version = path[len(RELEASE_PATH):-1] + if NORMAL_REGEX.match(version): + versions.append(LooseVersion(version)) + + list.sort(versions, reverse=True) + return versions + + +def fetch_firefox_last_versions(): + versions = fetch_firefox_versions() + + result = [] + major_last = versions[0].version[0] + major_list = (major_last, major_last - 1) + for version in versions: + major_current = version.version[0] + if major_current in major_list and 'esr' not in version.version: + result.append(version.vstring) + + return result + + +useragent["versions"] = fetch_firefox_last_versions() +f = open("../searx/data/useragents.json", "wb") +json.dump(useragent, f, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8") +f.close() From 50c836864a9a7a765561d886b11f44d8cea0bce9 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 5 Aug 2018 10:55:42 +0200 Subject: [PATCH 2/2] fetch_firefox_version.py : compatible with Python 3 and minor fixes. --- searx/data/useragents.json | 21 ++++++++++----------- searx/utils.py | 4 ++-- utils/fetch_firefox_version.py | 20 ++++++++++++-------- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/searx/data/useragents.json b/searx/data/useragents.json index ba80ce88..850bc418 100644 --- a/searx/data/useragents.json +++ b/searx/data/useragents.json @@ -1,15 +1,14 @@ { - "os": [ - "Windows NT 10; WOW64", - "X11; Linux x86_64" - ], - "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}", + "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}", "versions": [ - "59.0.2", - "59.0.1", - "59.0", - "58.0.2", - "58.0.1", - "58.0" + "61.0.1", + "61.0", + "60.0.2", + "60.0.1", + "60.0" + ], + "os": [ + "Windows NT 10; WOW64", + "X11; Linux x86_64" ] } \ No newline at end of file diff --git a/searx/utils.py b/searx/utils.py index f457284e..dfa22c5f 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -54,8 +54,8 @@ def searx_useragent(): suffix=settings['outgoing'].get('useragent_suffix', '')) -def gen_useragent(): - return str(useragents['ua'].format(os=choice(useragents['os']), version=choice(useragents['versions']))) +def gen_useragent(os=None): + return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions']))) def highlight_content(content, query): diff --git a/utils/fetch_firefox_version.py b/utils/fetch_firefox_version.py index 21d6e82f..ed179585 100755 --- a/utils/fetch_firefox_version.py +++ b/utils/fetch_firefox_version.py @@ -2,7 +2,7 @@ # set path from sys import path -from os.path import realpath, dirname +from os.path import realpath, dirname, join path.append(realpath(dirname(realpath(__file__)) + '/../')) # @@ -12,16 +12,17 @@ import re from distutils.version import LooseVersion, StrictVersion from lxml import html from searx.url_utils import urlparse, urljoin +from searx import searx_dir URL = 'https://ftp.mozilla.org/pub/firefox/releases/' RELEASE_PATH = '/pub/firefox/releases/' -NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?(esr)?$') +NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$') # BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$') # ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$') # -useragent = { +useragents = { "versions": (), "os": ('Windows NT 10; WOW64', 'X11; Linux x86_64'), @@ -57,13 +58,16 @@ def fetch_firefox_last_versions(): major_list = (major_last, major_last - 1) for version in versions: major_current = version.version[0] - if major_current in major_list and 'esr' not in version.version: + if major_current in major_list: result.append(version.vstring) return result -useragent["versions"] = fetch_firefox_last_versions() -f = open("../searx/data/useragents.json", "wb") -json.dump(useragent, f, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8") -f.close() +def get_useragents_filename(): + return join(join(searx_dir, "data"), "useragents.json") + + +useragents["versions"] = fetch_firefox_last_versions() +with open(get_useragents_filename(), "w") as f: + json.dump(useragents, f, indent=4, ensure_ascii=False)