Re-add -o / --output-file

This commit is contained in:
Frank Denis 2020-04-21 23:40:58 +02:00
parent dcd6f8448d
commit a71b531d2e
1 changed files with 33 additions and 12 deletions

View File

@ -2,6 +2,8 @@
# run with python generate-domains-blacklist.py > list.txt.tmp && mv -f list.txt.tmp list # run with python generate-domains-blacklist.py > list.txt.tmp && mv -f list.txt.tmp list
from __future__ import print_function
import argparse import argparse
import re import re
import sys import sys
@ -81,13 +83,14 @@ def parse_list(content, trusted=False):
return names, time_restrictions return names, time_restrictions
def print_restricted_name(name, time_restrictions): def print_restricted_name(output_fd, name, time_restrictions):
if name in time_restrictions: if name in time_restrictions:
print("{}\t{}".format(name, time_restrictions[name])) print("{}\t{}".format(
name, time_restrictions[name]), file=output_fd, end='\n')
else: else:
print( print(
"# ignored: [{}] was in the time-restricted list, " "# ignored: [{}] was in the time-restricted list, "
"but without a time restriction label".format(name) "but without a time restriction label".format(name), file=output_fd, end='\n'
) )
@ -115,7 +118,7 @@ def load_from_url(url):
if URLLIB_NEW: if URLLIB_NEW:
content = content.decode("utf-8", errors="replace") content = content.decode("utf-8", errors="replace")
return (content, trusted) return content, trusted
def name_cmp(name): def name_cmp(name):
@ -144,7 +147,7 @@ def whitelist_from_url(url):
def blacklists_from_config_file( def blacklists_from_config_file(
file, whitelist, time_restricted_url, ignore_retrieval_failure file, whitelist, time_restricted_url, ignore_retrieval_failure, output_file
): ):
blacklists = {} blacklists = {}
whitelisted_names = set() whitelisted_names = set()
@ -172,6 +175,10 @@ def blacklists_from_config_file(
if time_restricted_url and not re.match(r"^[a-z0-9]+:", time_restricted_url): if time_restricted_url and not re.match(r"^[a-z0-9]+:", time_restricted_url):
time_restricted_url = "file:" + time_restricted_url time_restricted_url = "file:" + time_restricted_url
output_fd = sys.stdout
if output_file:
output_fd = open(output_file, "w")
if time_restricted_url: if time_restricted_url:
time_restricted_content, _trusted = load_from_url(time_restricted_url) time_restricted_content, _trusted = load_from_url(time_restricted_url)
time_restricted_names, time_restrictions = parse_time_restricted_list( time_restricted_names, time_restrictions = parse_time_restricted_list(
@ -179,9 +186,10 @@ def blacklists_from_config_file(
) )
if time_restricted_names: if time_restricted_names:
print("########## Time-based blacklist ##########\n") print("########## Time-based blacklist ##########\n",
file=output_fd, end='\n')
for name in time_restricted_names: for name in time_restricted_names:
print_restricted_name(name, time_restrictions) print_restricted_name(output_fd, name, time_restrictions)
# Time restricted names should be whitelisted, or they could be always blocked # Time restricted names should be whitelisted, or they could be always blocked
whitelisted_names |= time_restricted_names whitelisted_names |= time_restricted_names
@ -194,7 +202,8 @@ def blacklists_from_config_file(
# Process blacklists # Process blacklists
for url, names in blacklists.items(): for url, names in blacklists.items():
print("\n\n########## Blacklist from {} ##########\n".format(url)) print("\n\n########## Blacklist from {} ##########\n".format(
url), file=output_fd, end='\n')
ignored, whitelisted = 0, 0 ignored, whitelisted = 0, 0
list_names = list() list_names = list()
for name in names: for name in names:
@ -208,11 +217,15 @@ def blacklists_from_config_file(
list_names.sort(key=name_cmp) list_names.sort(key=name_cmp)
if ignored: if ignored:
print("# Ignored duplicates: {}\n".format(ignored)) print("# Ignored duplicates: {}\n".format(
ignored), file=output_fd, end='\n')
if whitelisted: if whitelisted:
print("# Ignored entries due to the whitelist: {}\n".format(whitelisted)) print(
"# Ignored entries due to the whitelist: {}\n".format(whitelisted), file=output_fd, end='\n')
for name in list_names: for name in list_names:
print(name) print(name, file=output_fd, end='\n')
output_fd.close()
argp = argparse.ArgumentParser( argp = argparse.ArgumentParser(
@ -242,13 +255,21 @@ argp.add_argument(
action="store_true", action="store_true",
help="generate list even if some urls couldn't be retrieved", help="generate list even if some urls couldn't be retrieved",
) )
argp.add_argument(
"-o",
"--output-file",
default=None,
help="save generated blacklist to a text file with the provided file name",
)
argp.add_argument("-t", "--timeout", default=30, help="URL open timeout") argp.add_argument("-t", "--timeout", default=30, help="URL open timeout")
args = argp.parse_args() args = argp.parse_args()
conf = args.config conf = args.config
whitelist = args.whitelist whitelist = args.whitelist
time_restricted = args.time_restricted time_restricted = args.time_restricted
ignore_retrieval_failure = args.ignore_retrieval_failure ignore_retrieval_failure = args.ignore_retrieval_failure
output_file = args.output_file
blacklists_from_config_file( blacklists_from_config_file(
conf, whitelist, time_restricted, ignore_retrieval_failure) conf, whitelist, time_restricted, ignore_retrieval_failure, output_file)