mirror of
https://github.com/DNSCrypt/dnscrypt-proxy.git
synced 2025-01-14 02:25:52 +01:00
Import the generate-domains-blacklists tool
This commit is contained in:
parent
6ad53c75e6
commit
35e32b823f
93
utils/generate-domains-blacklists/domains-blacklist-all.conf
Normal file
93
utils/generate-domains-blacklists/domains-blacklist-all.conf
Normal file
@ -0,0 +1,93 @@
|
||||
|
||||
##################################################################################
|
||||
# #
|
||||
# Generate a black list of domains using public data sources, and the local #
|
||||
# domains-blacklist-local-additions.txt file. #
|
||||
# #
|
||||
# Comment the URLs of the sources you want to disable, and run the script to #
|
||||
# build the dnscrypt-blacklist-domains.txt file: #
|
||||
# #
|
||||
# $ generate-domains-blacklist.py > dnscrypt-blacklist-domains.txt #
|
||||
# #
|
||||
# That blacklist file can then be used in the dnscrypt-proxy configuration: #
|
||||
# #
|
||||
# BlackList domains:/etc/dnscrypt-blacklist-domains.txt #
|
||||
# #
|
||||
##################################################################################
|
||||
|
||||
# Local additions
|
||||
file:domains-blacklist-local-additions.txt
|
||||
|
||||
# Bambenek malware C2s
|
||||
http://osint.bambenekconsulting.com/feeds/c2-dommasterlist.txt
|
||||
|
||||
# hpHosts’ Ad and tracking servers
|
||||
http://hosts-file.net/.%5Cad_servers.txt
|
||||
|
||||
# Malware domains
|
||||
http://mirror1.malwaredomains.com/files/justdomains
|
||||
|
||||
# Abuse.ch Ransomware Tracker
|
||||
http://ransomwaretracker.abuse.ch/downloads/RW_DOMBL.txt
|
||||
|
||||
# Malware Domain List
|
||||
http://www.malwaredomainlist.com/mdlcsv.php?inactive=off
|
||||
|
||||
# Adblock Warning Removal List
|
||||
https://easylist-downloads.adblockplus.org/antiadblockfilters.txt
|
||||
|
||||
# EasyList
|
||||
https://easylist-downloads.adblockplus.org/easylist_noelemhide.txt
|
||||
|
||||
# EasyList China
|
||||
https://easylist-downloads.adblockplus.org/easylistchina.txt
|
||||
|
||||
# Fanboy’s Social Blocking List
|
||||
https://easylist-downloads.adblockplus.org/fanboy-social.txt
|
||||
|
||||
# Peter Lowe’s Ad and tracking server list
|
||||
https://pgl.yoyo.org/adservers/serverlist.php
|
||||
|
||||
# Spam404
|
||||
https://raw.githubusercontent.com/Dawsey21/Lists/master/adblock-list.txt
|
||||
|
||||
# CJX Annoyance List
|
||||
https://raw.githubusercontent.com/cjx82630/cjxlist/master/cjxlist.txt
|
||||
|
||||
# EU: Prebake - Filter Obtrusive Cookie Notices
|
||||
https://raw.githubusercontent.com/liamja/Prebake/master/obtrusive.txt
|
||||
|
||||
# Malvertising filter list by Disconnect
|
||||
https://s3.amazonaws.com/lists.disconnect.me/simple_malvertising.txt
|
||||
|
||||
# Malware filter list by Disconnect
|
||||
https://s3.amazonaws.com/lists.disconnect.me/simple_malware.txt
|
||||
|
||||
# Basic tracking list by Disconnect
|
||||
https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt
|
||||
|
||||
# Quidsup NoTrack
|
||||
https://raw.githubusercontent.com/quidsup/notrack/master/trackers.txt
|
||||
|
||||
# Sysctl list (ads)
|
||||
http://sysctl.org/cameleon/hosts
|
||||
|
||||
# KAD host file (fraud/adware) - https://github.com/azet12/KADhosts
|
||||
https://raw.githubusercontent.com/azet12/KADhosts/master/KADhosts.txt
|
||||
|
||||
# Fake news sites
|
||||
https://raw.githubusercontent.com/marktron/fakenews/master/fakenews
|
||||
|
||||
# Dynamic DNS services, sadly often used by malware
|
||||
http://mirror2.malwaredomains.com/files/dynamic_dns.txt
|
||||
|
||||
# Block pornography
|
||||
https://raw.githubusercontent.com/Clefspeare13/pornhosts/master/0.0.0.0/hosts
|
||||
https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/pornography-hosts
|
||||
http://securemecca.com/Downloads/hosts.txt
|
||||
|
||||
# Block gambling sites
|
||||
https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/gambling-hosts
|
||||
|
||||
# Block social media sites
|
||||
# https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/social-hosts
|
@ -0,0 +1,29 @@
|
||||
|
||||
# Local set of patterns to block
|
||||
|
||||
ad.*
|
||||
ads.*
|
||||
banner.*
|
||||
banners.*
|
||||
creatives.*
|
||||
oas.*
|
||||
oascentral.*
|
||||
stats.*
|
||||
tag.*
|
||||
telemetry.*
|
||||
tracker.*
|
||||
|
||||
# My Macbook constantly sends a lot of useless queries to *.local,
|
||||
# so I block them. *.lan is apparently another common one, and
|
||||
# *.localdomain and *.workgroup are common on Windows.
|
||||
|
||||
*.lan
|
||||
*.local
|
||||
*.localdomain
|
||||
*.workgroup
|
||||
|
||||
# eth0.me is hardcoded in tools such as Archey, but is not available any
|
||||
# more, causing issues such as terminal sessions taking a long time to
|
||||
# start.
|
||||
|
||||
eth0.me
|
106
utils/generate-domains-blacklists/domains-blacklist.conf
Normal file
106
utils/generate-domains-blacklists/domains-blacklist.conf
Normal file
@ -0,0 +1,106 @@
|
||||
|
||||
##################################################################################
|
||||
# #
|
||||
# Generate a black list of domains using public data sources, and the local #
|
||||
# domains-blacklist-local-additions.txt file. #
|
||||
# #
|
||||
# The default configuration is just indicative, and corresponds to the one #
|
||||
# used to produce the public "mybase" set. #
|
||||
# #
|
||||
# Comment out the URLs of the sources you wish to disable, leave the ones #
|
||||
# you would like enabled uncommented. Then run the script to build the #
|
||||
# dnscrypt-blacklist-domains.txt file: #
|
||||
# #
|
||||
# $ generate-domains-blacklist.py > dnscrypt-blacklist-domains.txt #
|
||||
# #
|
||||
# Domains that should never be blocked can be put into a file named #
|
||||
# domains-whitelist.txt. #
|
||||
# #
|
||||
# That blacklist file can then be used in the dnscrypt-proxy configuration: #
|
||||
# #
|
||||
# BlackList domains:/etc/dnscrypt-blacklist-domains.txt #
|
||||
# #
|
||||
##################################################################################
|
||||
|
||||
# Local additions
|
||||
file:domains-blacklist-local-additions.txt
|
||||
|
||||
# Bambenek malware C2s
|
||||
http://osint.bambenekconsulting.com/feeds/c2-dommasterlist.txt
|
||||
|
||||
# hpHosts’ Ad and tracking servers
|
||||
http://hosts-file.net/.%5Cad_servers.txt
|
||||
|
||||
# Malware domains
|
||||
http://mirror1.malwaredomains.com/files/justdomains
|
||||
|
||||
# Abuse.ch Ransomware Tracker
|
||||
http://ransomwaretracker.abuse.ch/downloads/RW_DOMBL.txt
|
||||
|
||||
# Malware Domain List
|
||||
http://www.malwaredomainlist.com/mdlcsv.php?inactive=off
|
||||
|
||||
# Adblock Warning Removal List
|
||||
https://easylist-downloads.adblockplus.org/antiadblockfilters.txt
|
||||
|
||||
# EasyList
|
||||
https://easylist-downloads.adblockplus.org/easylist_noelemhide.txt
|
||||
|
||||
# EasyList China
|
||||
https://easylist-downloads.adblockplus.org/easylistchina.txt
|
||||
|
||||
# Fanboy’s Social Blocking List
|
||||
https://easylist-downloads.adblockplus.org/fanboy-social.txt
|
||||
|
||||
# Peter Lowe’s Ad and tracking server list
|
||||
https://pgl.yoyo.org/adservers/serverlist.php
|
||||
|
||||
# Spam404
|
||||
https://raw.githubusercontent.com/Dawsey21/Lists/master/adblock-list.txt
|
||||
|
||||
# CJX Annoyance List
|
||||
https://raw.githubusercontent.com/cjx82630/cjxlist/master/cjxlist.txt
|
||||
|
||||
# EU: Prebake - Filter Obtrusive Cookie Notices
|
||||
https://raw.githubusercontent.com/liamja/Prebake/master/obtrusive.txt
|
||||
|
||||
# Malvertising filter list by Disconnect
|
||||
https://s3.amazonaws.com/lists.disconnect.me/simple_malvertising.txt
|
||||
|
||||
# Malware filter list by Disconnect
|
||||
https://s3.amazonaws.com/lists.disconnect.me/simple_malware.txt
|
||||
|
||||
# Basic tracking list by Disconnect
|
||||
https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt
|
||||
|
||||
# Sysctl list (ads)
|
||||
http://sysctl.org/cameleon/hosts
|
||||
|
||||
# KAD host file (fraud/adware) - https://github.com/azet12/KADhosts
|
||||
https://raw.githubusercontent.com/azet12/KADhosts/master/KADhosts.txt
|
||||
|
||||
# BarbBlock list (spurious and invalid DMCA takedowns)
|
||||
https://ssl.bblck.me/blacklists/domain-list.txt
|
||||
|
||||
# Dan Pollock's hosts list
|
||||
http://someonewhocares.org/hosts/hosts
|
||||
|
||||
# Websites potentially publishing fake news
|
||||
# https://raw.githubusercontent.com/marktron/fakenews/master/fakenews
|
||||
|
||||
# Quidsup NoTrack - Contains too many false positives to be enabled by default
|
||||
# https://raw.githubusercontent.com/quidsup/notrack/master/trackers.txt
|
||||
|
||||
# Dynamic DNS services, sadly often used by malware
|
||||
# http://mirror2.malwaredomains.com/files/dynamic_dns.txt
|
||||
|
||||
# Block pornography
|
||||
# https://raw.githubusercontent.com/Clefspeare13/pornhosts/master/0.0.0.0/hosts
|
||||
# https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/pornography-hosts
|
||||
# http://securemecca.com/Downloads/hosts.txt
|
||||
|
||||
# Block gambling sites
|
||||
# https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/gambling-hosts
|
||||
|
||||
# Block social media sites
|
||||
# https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/social-hosts
|
23
utils/generate-domains-blacklists/domains-whitelist.txt
Normal file
23
utils/generate-domains-blacklists/domains-whitelist.txt
Normal file
@ -0,0 +1,23 @@
|
||||
a-msedge.net
|
||||
amazon.com
|
||||
appsflyer.com
|
||||
azurewebsites.net
|
||||
cdnetworks.com
|
||||
cloudapp.net
|
||||
edgekey.net
|
||||
elasticbeanstalk.com
|
||||
invalid
|
||||
j.mp
|
||||
l-msedge.net
|
||||
lan
|
||||
localdomain
|
||||
microsoft.com
|
||||
msedge.net
|
||||
nsatc.net
|
||||
ovh.net
|
||||
pusher.com
|
||||
pusherapp.com
|
||||
spotify.com
|
||||
tagcommander.com
|
||||
tracker.debian.org
|
||||
windows.net
|
140
utils/generate-domains-blacklists/generate-domains-blacklist.py
Executable file
140
utils/generate-domains-blacklists/generate-domains-blacklist.py
Executable file
@ -0,0 +1,140 @@
|
||||
#! /usr/bin/env python
|
||||
|
||||
# run with python generate-domains-blacklist.py > list.txt.tmp && mv -f list.txt.tmp list
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
import urllib2
|
||||
|
||||
|
||||
def parse_blacklist(content, trusted=False):
|
||||
rx_comment = re.compile(r'^(#|$)')
|
||||
rx_inline_comment = re.compile(r'\s*#\s*[a-z0-9-].*$')
|
||||
rx_u = re.compile(r'^@*\|\|([a-z0-9.-]+[.][a-z]{2,})\^?(\$(popup|third-party))?$')
|
||||
rx_l = re.compile(r'^([a-z0-9.-]+[.][a-z]{2,})$')
|
||||
rx_h = re.compile(r'^[0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}\s+([a-z0-9.-]+[.][a-z]{2,})$')
|
||||
rx_mdl = re.compile(r'^"[^"]+","([a-z0-9.-]+[.][a-z]{2,})",')
|
||||
rx_b = re.compile(r'^([a-z0-9.-]+[.][a-z]{2,}),.+,[0-9: /-]+,')
|
||||
rx_trusted = re.compile(r'^([*a-z0-9.-]+)$')
|
||||
|
||||
names = set()
|
||||
rx_set = [rx_u, rx_l, rx_h, rx_mdl, rx_b]
|
||||
if trusted:
|
||||
rx_set = [rx_trusted]
|
||||
for line in content.splitlines():
|
||||
line = str.lower(str.strip(line))
|
||||
if rx_comment.match(line):
|
||||
continue
|
||||
line = rx_inline_comment.sub('', line)
|
||||
for rx in rx_set:
|
||||
matches = rx.match(line)
|
||||
if not matches:
|
||||
continue
|
||||
name = matches.group(1)
|
||||
names.add(name)
|
||||
return names
|
||||
|
||||
|
||||
def list_from_url(url):
|
||||
sys.stderr.write("Loading data from [{}]\n".format(url))
|
||||
req = urllib2.Request(url)
|
||||
trusted = False
|
||||
if req.get_type() == "file":
|
||||
trusted = True
|
||||
response = None
|
||||
try:
|
||||
response = urllib2.urlopen(req, timeout=10)
|
||||
except urllib2.URLError as err:
|
||||
raise Exception("[{}] could not be loaded: {}\n".format(url, err))
|
||||
if trusted is False and response.getcode() != 200:
|
||||
raise Exception("[{}] returned HTTP code {}\n".format(url, response.getcode()))
|
||||
content = response.read()
|
||||
|
||||
return parse_blacklist(content, trusted)
|
||||
|
||||
|
||||
def name_cmp(name):
|
||||
parts = name.split(".")
|
||||
parts.reverse()
|
||||
return str.join(".", parts)
|
||||
|
||||
|
||||
def has_suffix(names, name):
|
||||
parts = str.split(name, ".")
|
||||
while parts:
|
||||
parts = parts[1:]
|
||||
if str.join(".", parts) in names:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def whitelist_from_url(url):
|
||||
if not url:
|
||||
return set()
|
||||
|
||||
return list_from_url(url)
|
||||
|
||||
|
||||
def blacklists_from_config_file(file, whitelist, ignore_retrieval_failure):
|
||||
blacklists = {}
|
||||
all_names = set()
|
||||
unique_names = set()
|
||||
|
||||
if whitelist and not re.match(r'^[a-z0-9]+:', whitelist):
|
||||
whitelist = "file:" + whitelist
|
||||
|
||||
whitelisted_names = whitelist_from_url(whitelist)
|
||||
|
||||
with open(file) as fd:
|
||||
for line in fd:
|
||||
line = str.strip(line)
|
||||
if str.startswith(line, "#") or line == "":
|
||||
continue
|
||||
url = line
|
||||
try:
|
||||
names = list_from_url(url)
|
||||
blacklists[url] = names
|
||||
all_names |= names
|
||||
except Exception as e:
|
||||
sys.stderr.write(e.message)
|
||||
if not ignore_retrieval_failure:
|
||||
exit(1)
|
||||
|
||||
for url, names in blacklists.items():
|
||||
print("\n\n########## Blacklist from {} ##########\n".format(url))
|
||||
ignored, whitelisted = 0, 0
|
||||
list_names = list()
|
||||
for name in names:
|
||||
if has_suffix(all_names, name) or name in unique_names:
|
||||
ignored = ignored + 1
|
||||
elif has_suffix(whitelisted_names, name) or name in whitelisted_names:
|
||||
whitelisted = whitelisted + 1
|
||||
else:
|
||||
list_names.append(name)
|
||||
unique_names.add(name)
|
||||
|
||||
list_names.sort(key=name_cmp)
|
||||
if ignored:
|
||||
print("# Ignored duplicates: {}\n".format(ignored))
|
||||
if whitelisted:
|
||||
print("# Ignored entries due to the whitelist: {}\n".format(whitelisted))
|
||||
for name in list_names:
|
||||
print(name)
|
||||
|
||||
|
||||
argp = argparse.ArgumentParser(description="Create a unified blacklist from a set of local and remote files")
|
||||
argp.add_argument("-c", "--config", default="domains-blacklist.conf",
|
||||
help="file containing blacklist sources")
|
||||
argp.add_argument("-w", "--whitelist", default="domains-whitelist.txt",
|
||||
help="file containing a set of names to exclude from the blacklist")
|
||||
argp.add_argument("-i", "--ignore-retrieval-failure", action='store_true',
|
||||
help="generate list even if some urls couldn't be retrieved")
|
||||
args = argp.parse_args()
|
||||
|
||||
conf = args.config
|
||||
whitelist = args.whitelist
|
||||
ignore_retrieval_failure = args.ignore_retrieval_failure
|
||||
|
||||
blacklists_from_config_file(conf, whitelist, ignore_retrieval_failure)
|
Loading…
Reference in New Issue
Block a user