Re-add -o / --output-file

2020-04-21 23:40:58 +02:00 · 2020-04-21 23:40:58 +02:00 · a71b531d2e
parent dcd6f8448d
commit a71b531d2e
1 changed files with 33 additions and 12 deletions
--- a/utils/generate-domains-blacklists/generate-domains-blacklist.py
+++ b/utils/generate-domains-blacklists/generate-domains-blacklist.py
@ -2,6 +2,8 @@

 # run with python generate-domains-blacklist.py > list.txt.tmp && mv -f list.txt.tmp list

+from __future__ import print_function
+
 import argparse
 import re
 import sys
@ -81,13 +83,14 @@ def parse_list(content, trusted=False):
    return names, time_restrictions


-def print_restricted_name(name, time_restrictions):
+def print_restricted_name(output_fd, name, time_restrictions):
    if name in time_restrictions:
-        print("{}\t{}".format(name, time_restrictions[name]))
+        print("{}\t{}".format(
+            name, time_restrictions[name]), file=output_fd, end='\n')
    else:
        print(
            "# ignored: [{}] was in the time-restricted list, "
-            "but without a time restriction label".format(name)
+            "but without a time restriction label".format(name), file=output_fd, end='\n'
        )


@ -115,7 +118,7 @@ def load_from_url(url):
    if URLLIB_NEW:
        content = content.decode("utf-8", errors="replace")

-    return (content, trusted)
+    return content, trusted


 def name_cmp(name):
@ -144,7 +147,7 @@ def whitelist_from_url(url):


 def blacklists_from_config_file(
-    file, whitelist, time_restricted_url, ignore_retrieval_failure
+    file, whitelist, time_restricted_url, ignore_retrieval_failure, output_file
 ):
    blacklists = {}
    whitelisted_names = set()
@ -172,6 +175,10 @@ def blacklists_from_config_file(
    if time_restricted_url and not re.match(r"^[a-z0-9]+:", time_restricted_url):
        time_restricted_url = "file:" + time_restricted_url

+    output_fd = sys.stdout
+    if output_file:
+        output_fd = open(output_file, "w")
+
    if time_restricted_url:
        time_restricted_content, _trusted = load_from_url(time_restricted_url)
        time_restricted_names, time_restrictions = parse_time_restricted_list(
@ -179,9 +186,10 @@ def blacklists_from_config_file(
        )

        if time_restricted_names:
-            print("########## Time-based blacklist ##########\n")
+            print("########## Time-based blacklist ##########\n",
+                  file=output_fd, end='\n')
            for name in time_restricted_names:
-                print_restricted_name(name, time_restrictions)
+                print_restricted_name(output_fd, name, time_restrictions)

        # Time restricted names should be whitelisted, or they could be always blocked
        whitelisted_names |= time_restricted_names
@ -194,7 +202,8 @@ def blacklists_from_config_file(

    # Process blacklists
    for url, names in blacklists.items():
-        print("\n\n########## Blacklist from {} ##########\n".format(url))
+        print("\n\n########## Blacklist from {} ##########\n".format(
+            url), file=output_fd, end='\n')
        ignored, whitelisted = 0, 0
        list_names = list()
        for name in names:
@ -208,11 +217,15 @@ def blacklists_from_config_file(

        list_names.sort(key=name_cmp)
        if ignored:
-            print("# Ignored duplicates: {}\n".format(ignored))
+            print("# Ignored duplicates: {}\n".format(
+                ignored), file=output_fd, end='\n')
        if whitelisted:
-            print("# Ignored entries due to the whitelist: {}\n".format(whitelisted))
+            print(
+                "# Ignored entries due to the whitelist: {}\n".format(whitelisted), file=output_fd, end='\n')
        for name in list_names:
-            print(name)
+            print(name, file=output_fd, end='\n')
+
+    output_fd.close()


 argp = argparse.ArgumentParser(
@ -242,13 +255,21 @@ argp.add_argument(
    action="store_true",
    help="generate list even if some urls couldn't be retrieved",
 )
+argp.add_argument(
+    "-o",
+    "--output-file",
+    default=None,
+    help="save generated blacklist to a text file with the provided file name",
+)
 argp.add_argument("-t", "--timeout", default=30, help="URL open timeout")
+
 args = argp.parse_args()

 conf = args.config
 whitelist = args.whitelist
 time_restricted = args.time_restricted
 ignore_retrieval_failure = args.ignore_retrieval_failure
+output_file = args.output_file

 blacklists_from_config_file(
-    conf, whitelist, time_restricted, ignore_retrieval_failure)
+    conf, whitelist, time_restricted, ignore_retrieval_failure, output_file)