1
0
mirror of https://github.com/DNSCrypt/dnscrypt-proxy.git synced 2025-01-14 02:25:52 +01:00
This commit is contained in:
Frank Denis 2019-08-27 18:25:47 +02:00
parent b543ccabdb
commit 5f29677400

View File

@ -8,16 +8,19 @@ import sys
try: try:
import urllib2 as urllib import urllib2 as urllib
URLLIB_NEW = False URLLIB_NEW = False
except (ImportError, ModuleNotFoundError): except (ImportError, ModuleNotFoundError):
import urllib.request as urllib import urllib.request as urllib
from urllib.request import Request from urllib.request import Request
URLLIB_NEW = True URLLIB_NEW = True
def parse_time_restricted_list(content): def parse_time_restricted_list(content):
rx_comment = re.compile(r'^(#|$)') rx_comment = re.compile(r"^(#|$)")
rx_inline_comment = re.compile(r'\s*#\s*[a-z0-9-].*$') rx_inline_comment = re.compile(r"\s*#\s*[a-z0-9-].*$")
rx_trusted = re.compile(r'^([*a-z0-9.-]+)\s*(@\S+)?$') rx_trusted = re.compile(r"^([*a-z0-9.-]+)\s*(@\S+)?$")
names = set() names = set()
time_restrictions = {} time_restrictions = {}
@ -26,7 +29,7 @@ def parse_time_restricted_list(content):
line = str.lower(str.strip(line)) line = str.lower(str.strip(line))
if rx_comment.match(line): if rx_comment.match(line):
continue continue
line = rx_inline_comment.sub('', line) line = rx_inline_comment.sub("", line)
for rx in rx_set: for rx in rx_set:
matches = rx.match(line) matches = rx.match(line)
if not matches: if not matches:
@ -46,16 +49,16 @@ def parse_trusted_list(content):
def parse_list(content, trusted=False): def parse_list(content, trusted=False):
rx_comment = re.compile(r'^(#|$)') rx_comment = re.compile(r"^(#|$)")
rx_inline_comment = re.compile(r'\s*#\s*[a-z0-9-].*$') rx_inline_comment = re.compile(r"\s*#\s*[a-z0-9-].*$")
rx_u = re.compile( rx_u = re.compile(r"^@*\|\|([a-z0-9.-]+[.][a-z]{2,})\^?(\$(popup|third-party))?$")
r'^@*\|\|([a-z0-9.-]+[.][a-z]{2,})\^?(\$(popup|third-party))?$') rx_l = re.compile(r"^([a-z0-9.-]+[.][a-z]{2,})$")
rx_l = re.compile(r'^([a-z0-9.-]+[.][a-z]{2,})$')
rx_h = re.compile( rx_h = re.compile(
r'^[0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}\s+([a-z0-9.-]+[.][a-z]{2,})$') r"^[0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}\s+([a-z0-9.-]+[.][a-z]{2,})$"
)
rx_mdl = re.compile(r'^"[^"]+","([a-z0-9.-]+[.][a-z]{2,})",') rx_mdl = re.compile(r'^"[^"]+","([a-z0-9.-]+[.][a-z]{2,})",')
rx_b = re.compile(r'^([a-z0-9.-]+[.][a-z]{2,}),.+,[0-9: /-]+,') rx_b = re.compile(r"^([a-z0-9.-]+[.][a-z]{2,}),.+,[0-9: /-]+,")
rx_dq = re.compile(r'^address=/([a-z0-9.-]+[.][a-z]{2,})/.') rx_dq = re.compile(r"^address=/([a-z0-9.-]+[.][a-z]{2,})/.")
if trusted: if trusted:
return parse_trusted_list(content) return parse_trusted_list(content)
@ -67,7 +70,7 @@ def parse_list(content, trusted=False):
line = str.lower(str.strip(line)) line = str.lower(str.strip(line))
if rx_comment.match(line): if rx_comment.match(line):
continue continue
line = rx_inline_comment.sub('', line) line = rx_inline_comment.sub("", line)
for rx in rx_set: for rx in rx_set:
matches = rx.match(line) matches = rx.match(line)
if not matches: if not matches:
@ -81,8 +84,10 @@ def print_restricted_name(name, time_restrictions):
if name in time_restrictions: if name in time_restrictions:
print("{}\t{}".format(name, time_restrictions[name])) print("{}\t{}".format(name, time_restrictions[name]))
else: else:
print("# ignored: [{}] was in the time-restricted list, " print(
"but without a time restriction label".format(name)) "# ignored: [{}] was in the time-restricted list, "
"but without a time restriction label".format(name)
)
def load_from_url(url): def load_from_url(url):
@ -103,11 +108,10 @@ def load_from_url(url):
except urllib.URLError as err: except urllib.URLError as err:
raise Exception("[{}] could not be loaded: {}\n".format(url, err)) raise Exception("[{}] could not be loaded: {}\n".format(url, err))
if trusted is False and response.getcode() != 200: if trusted is False and response.getcode() != 200:
raise Exception("[{}] returned HTTP code {}\n".format( raise Exception("[{}] returned HTTP code {}\n".format(url, response.getcode()))
url, response.getcode()))
content = response.read() content = response.read()
if URLLIB_NEW: if URLLIB_NEW:
content = content.decode('utf-8', errors='replace') content = content.decode("utf-8", errors="replace")
return (content, trusted) return (content, trusted)
@ -137,7 +141,9 @@ def whitelist_from_url(url):
return names return names
def blacklists_from_config_file(file, whitelist, time_restricted_url, ignore_retrieval_failure): def blacklists_from_config_file(
file, whitelist, time_restricted_url, ignore_retrieval_failure
):
blacklists = {} blacklists = {}
whitelisted_names = set() whitelisted_names = set()
all_names = set() all_names = set()
@ -161,13 +167,14 @@ def blacklists_from_config_file(file, whitelist, time_restricted_url, ignore_ret
exit(1) exit(1)
# Time-based blacklist # Time-based blacklist
if time_restricted_url and not re.match(r'^[a-z0-9]+:', time_restricted_url): if time_restricted_url and not re.match(r"^[a-z0-9]+:", time_restricted_url):
time_restricted_url = "file:" + time_restricted_url time_restricted_url = "file:" + time_restricted_url
if time_restricted_url: if time_restricted_url:
time_restricted_content, _trusted = load_from_url(time_restricted_url) time_restricted_content, _trusted = load_from_url(time_restricted_url)
time_restricted_names, time_restrictions = parse_time_restricted_list( time_restricted_names, time_restrictions = parse_time_restricted_list(
time_restricted_content) time_restricted_content
)
if time_restricted_names: if time_restricted_names:
print("########## Time-based blacklist ##########\n") print("########## Time-based blacklist ##########\n")
@ -178,7 +185,7 @@ def blacklists_from_config_file(file, whitelist, time_restricted_url, ignore_ret
whitelisted_names |= time_restricted_names whitelisted_names |= time_restricted_names
# Whitelist # Whitelist
if whitelist and not re.match(r'^[a-z0-9]+:', whitelist): if whitelist and not re.match(r"^[a-z0-9]+:", whitelist):
whitelist = "file:" + whitelist whitelist = "file:" + whitelist
whitelisted_names |= whitelist_from_url(whitelist) whitelisted_names |= whitelist_from_url(whitelist)
@ -207,17 +214,33 @@ def blacklists_from_config_file(file, whitelist, time_restricted_url, ignore_ret
argp = argparse.ArgumentParser( argp = argparse.ArgumentParser(
description="Create a unified blacklist from a set of local and remote files") description="Create a unified blacklist from a set of local and remote files"
argp.add_argument("-c", "--config", default="domains-blacklist.conf", )
help="file containing blacklist sources") argp.add_argument(
argp.add_argument("-w", "--whitelist", default="domains-whitelist.txt", "-c",
help="file containing a set of names to exclude from the blacklist") "--config",
argp.add_argument("-r", "--time-restricted", default="domains-time-restricted.txt", default="domains-blacklist.conf",
help="file containing a set of names to be time restricted") help="file containing blacklist sources",
argp.add_argument("-i", "--ignore-retrieval-failure", action='store_true', )
help="generate list even if some urls couldn't be retrieved") argp.add_argument(
argp.add_argument("-t", "--timeout", default=30, "-w",
help="URL open timeout") "--whitelist",
default="domains-whitelist.txt",
help="file containing a set of names to exclude from the blacklist",
)
argp.add_argument(
"-r",
"--time-restricted",
default="domains-time-restricted.txt",
help="file containing a set of names to be time restricted",
)
argp.add_argument(
"-i",
"--ignore-retrieval-failure",
action="store_true",
help="generate list even if some urls couldn't be retrieved",
)
argp.add_argument("-t", "--timeout", default=30, help="URL open timeout")
args = argp.parse_args() args = argp.parse_args()
conf = args.config conf = args.config
@ -225,5 +248,5 @@ whitelist = args.whitelist
time_restricted = args.time_restricted time_restricted = args.time_restricted
ignore_retrieval_failure = args.ignore_retrieval_failure ignore_retrieval_failure = args.ignore_retrieval_failure
blacklists_from_config_file( blacklists_from_config_file(conf, whitelist, time_restricted, ignore_retrieval_failure)
conf, whitelist, time_restricted, ignore_retrieval_failure)