From 05593a8bbd7dfc23f9bf8632bdac9c5c3efcc114 Mon Sep 17 00:00:00 2001 From: Cristian-J <59663672+Cristian-J@users.noreply.github.com> Date: Wed, 8 Jan 2020 19:05:27 +0000 Subject: [PATCH] Ignore links that start with a hyphen or a dot If you use filter blacklists you'll end up with many invalid links that start with a hyphen or a dot in the final blacklist. --- .../generate-domains-blacklist.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/utils/generate-domains-blacklists/generate-domains-blacklist.py b/utils/generate-domains-blacklists/generate-domains-blacklist.py index 466c5372..43eba48b 100755 --- a/utils/generate-domains-blacklists/generate-domains-blacklist.py +++ b/utils/generate-domains-blacklists/generate-domains-blacklist.py @@ -52,14 +52,14 @@ def parse_list(content, trusted=False): rx_comment = re.compile(r"^(#|$)") rx_inline_comment = re.compile(r"\s*#\s*[a-z0-9-].*$") rx_u = re.compile( - r"^@*\|\|([a-z0-9.-]+[.][a-z]{2,})\^?(\$(popup|third-party))?$") - rx_l = re.compile(r"^([a-z0-9.-]+[.][a-z]{2,})$") + r"^@*\|\|([a-z0-9][a-z0-9.-]*[.][a-z]{2,})\^?(\$(popup|third-party))?$") + rx_l = re.compile(r"^([a-z0-9][a-z0-9.-]*[.][a-z]{2,})$") rx_h = re.compile( - r"^[0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}\s+([a-z0-9.-]+[.][a-z]{2,})$" + r"^[0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}\s+([a-z0-9][a-z0-9.-]*[.][a-z]{2,})$" ) - rx_mdl = re.compile(r'^"[^"]+","([a-z0-9.-]+[.][a-z]{2,})",') - rx_b = re.compile(r"^([a-z0-9.-]+[.][a-z]{2,}),.+,[0-9: /-]+,") - rx_dq = re.compile(r"^address=/([a-z0-9.-]+[.][a-z]{2,})/.") + rx_mdl = re.compile(r'^"[^"]+","([a-z0-9][a-z0-9.-]*[.][a-z]{2,})",') + rx_b = re.compile(r"^([a-z0-9][a-z0-9.-]*[.][a-z]{2,}),.+,[0-9: /-]+,") + rx_dq = re.compile(r"^address=/([a-z0-9][a-z0-9.-]*[.][a-z]{2,})/.") if trusted: return parse_trusted_list(content)