Added simple url validation for the python fetching script #171
This commit is contained in:
parent
4faf8f0d67
commit
ec3089c8bc
|
@ -17,9 +17,9 @@
|
||||||
"https://invidious.lunar.icu",
|
"https://invidious.lunar.icu",
|
||||||
"https://invidious.mutahar.rocks",
|
"https://invidious.mutahar.rocks",
|
||||||
"https://invidious.weblibre.org",
|
"https://invidious.weblibre.org",
|
||||||
|
"https://invidious.esmailelbob.xyz",
|
||||||
"https://invidious.privacy.gd",
|
"https://invidious.privacy.gd",
|
||||||
"https://youtube.076.ne.jp",
|
"https://youtube.076.ne.jp",
|
||||||
"https://invidious.esmailelbob.xyz",
|
|
||||||
"https://invidious.namazso.eu"
|
"https://invidious.namazso.eu"
|
||||||
],
|
],
|
||||||
"tor": [
|
"tor": [
|
||||||
|
@ -175,7 +175,6 @@
|
||||||
"https://libreddit.winscloud.net",
|
"https://libreddit.winscloud.net",
|
||||||
"https://libreddit.tiekoetter.com",
|
"https://libreddit.tiekoetter.com",
|
||||||
"https://reddit.rtrace.io",
|
"https://reddit.rtrace.io",
|
||||||
"http://libreddit.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion/",
|
|
||||||
"https://libreddit.lunar.icu"
|
"https://libreddit.lunar.icu"
|
||||||
],
|
],
|
||||||
"tor": [
|
"tor": [
|
||||||
|
@ -190,7 +189,8 @@
|
||||||
"http://libredoxhxwnmsb6dvzzd35hmgzmawsq5i764es7witwhddvpc2razid.onion",
|
"http://libredoxhxwnmsb6dvzzd35hmgzmawsq5i764es7witwhddvpc2razid.onion",
|
||||||
"http://libreddit.2syis2nnyytz6jnusnjurva4swlaizlnleiks5mjp46phuwjbdjqwgqd.onion",
|
"http://libreddit.2syis2nnyytz6jnusnjurva4swlaizlnleiks5mjp46phuwjbdjqwgqd.onion",
|
||||||
"http://ol5begilptoou34emq2sshf3may3hlblvipdjtybbovpb7c7zodxmtqd.onion",
|
"http://ol5begilptoou34emq2sshf3may3hlblvipdjtybbovpb7c7zodxmtqd.onion",
|
||||||
"http://lbrdtjaj7567ptdd4rv74lv27qhxfkraabnyphgcvptl64ijx2tijwid.onion"
|
"http://lbrdtjaj7567ptdd4rv74lv27qhxfkraabnyphgcvptl64ijx2tijwid.onion",
|
||||||
|
"http://libreddit.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"teddit": {
|
"teddit": {
|
||||||
|
@ -230,12 +230,14 @@
|
||||||
"https://wikiless.sethforprivacy.com",
|
"https://wikiless.sethforprivacy.com",
|
||||||
"https://wiki.604kph.xyz",
|
"https://wiki.604kph.xyz",
|
||||||
"https://wikiless.lunar.icu",
|
"https://wikiless.lunar.icu",
|
||||||
"https://https://wiki.froth.zone",
|
"https://https://wiki.froth.zone"
|
||||||
"https://hflqp2ejxygpj6cdwo3ogfieqmxw3b56w7dblt7bor2ltwk6kcfa.b32.i2p"
|
|
||||||
],
|
],
|
||||||
"tor": [
|
"tor": [
|
||||||
"http://dj2tbh2nqfxyfmvq33cjmhuw7nb6am7thzd3zsjvizeqf374fixbrxyd.onion",
|
"http://dj2tbh2nqfxyfmvq33cjmhuw7nb6am7thzd3zsjvizeqf374fixbrxyd.onion",
|
||||||
"http://c2pesewpalbi6lbfc5hf53q4g3ovnxe4s7tfa6k2aqkf7jd7a7dlz5ad.onion"
|
"http://c2pesewpalbi6lbfc5hf53q4g3ovnxe4s7tfa6k2aqkf7jd7a7dlz5ad.onion"
|
||||||
|
],
|
||||||
|
"i2p": [
|
||||||
|
"http://hflqp2ejxygpj6cdwo3ogfieqmxw3b56w7dblt7bor2ltwk6kcfa.b32.i2p"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"scribe": {
|
"scribe": {
|
||||||
|
|
|
@ -5,13 +5,19 @@ import json
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import re
|
import re
|
||||||
|
from colorama import Fore, Back, Style
|
||||||
|
|
||||||
mightyList = {}
|
mightyList = {}
|
||||||
|
|
||||||
|
def filterLastSlash(urlList):
|
||||||
def get_host_name(link):
|
tmp = []
|
||||||
url = urlparse(link)
|
for i in urlList:
|
||||||
return url.netloc
|
if i.endswith('/'):
|
||||||
|
tmp.append(i[:-1])
|
||||||
|
print(Fore.YELLOW + "filtered " + Style.RESET_ALL + i)
|
||||||
|
else:
|
||||||
|
tmp.append(i)
|
||||||
|
return tmp
|
||||||
|
|
||||||
|
|
||||||
# Invidious
|
# Invidious
|
||||||
|
@ -26,7 +32,7 @@ for instance in rJson:
|
||||||
elif instance[1]['type'] == 'onion':
|
elif instance[1]['type'] == 'onion':
|
||||||
invidiousList['tor'].append(instance[1]['uri'])
|
invidiousList['tor'].append(instance[1]['uri'])
|
||||||
mightyList['invidious'] = invidiousList
|
mightyList['invidious'] = invidiousList
|
||||||
print('fetched Invidious')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Invidious')
|
||||||
|
|
||||||
|
|
||||||
# Nitter
|
# Nitter
|
||||||
|
@ -53,10 +59,10 @@ for table in tables:
|
||||||
url = 'https://' + url
|
url = 'https://' + url
|
||||||
nitterList['normal'].append(url)
|
nitterList['normal'].append(url)
|
||||||
mightyList['nitter'] = nitterList
|
mightyList['nitter'] = nitterList
|
||||||
print('fetched Nitter')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Nitter')
|
||||||
|
|
||||||
# Bibliogram
|
# Bibliogram
|
||||||
r = requests.get('https://bibliogram.1d4.us/api/instances')
|
r = requests.get('https://bibliogram.pussthecat.org/api/instances')
|
||||||
rJson = json.loads(r.text)
|
rJson = json.loads(r.text)
|
||||||
bibliogramList = {}
|
bibliogramList = {}
|
||||||
bibliogramList['normal'] = []
|
bibliogramList['normal'] = []
|
||||||
|
@ -64,7 +70,7 @@ bibliogramList['tor'] = []
|
||||||
for item in rJson['data']:
|
for item in rJson['data']:
|
||||||
bibliogramList['normal'].append(item['address'])
|
bibliogramList['normal'].append(item['address'])
|
||||||
mightyList['bibliogram'] = bibliogramList
|
mightyList['bibliogram'] = bibliogramList
|
||||||
print('fetched Bibliogram')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Bibliogram')
|
||||||
|
|
||||||
# LibReddit
|
# LibReddit
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
|
@ -72,15 +78,19 @@ r = requests.get(
|
||||||
libredditList = {}
|
libredditList = {}
|
||||||
libredditList['normal'] = []
|
libredditList['normal'] = []
|
||||||
libredditList['tor'] = []
|
libredditList['tor'] = []
|
||||||
|
|
||||||
tmp = re.findall(
|
tmp = re.findall(
|
||||||
r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|", r.text)
|
r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|", r.text)
|
||||||
|
|
||||||
|
tmp = filterLastSlash(tmp)
|
||||||
|
|
||||||
for item in tmp:
|
for item in tmp:
|
||||||
if item.endswith('.onion'):
|
if item.endswith('.onion'):
|
||||||
libredditList['tor'].append(item)
|
libredditList['tor'].append(item)
|
||||||
else:
|
else:
|
||||||
libredditList['normal'].append(item)
|
libredditList['normal'].append(item)
|
||||||
mightyList['libreddit'] = libredditList
|
mightyList['libreddit'] = libredditList
|
||||||
print('fetched LibReddit')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'LibReddit')
|
||||||
|
|
||||||
# Teddit
|
# Teddit
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
|
@ -99,7 +109,7 @@ for item in rJson:
|
||||||
tedditList['tor'].append(onion)
|
tedditList['tor'].append(onion)
|
||||||
|
|
||||||
mightyList['teddit'] = tedditList
|
mightyList['teddit'] = tedditList
|
||||||
print('fetched Teddit')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Teddit')
|
||||||
|
|
||||||
|
|
||||||
# Wikiless
|
# Wikiless
|
||||||
|
@ -117,7 +127,7 @@ for item in rJson:
|
||||||
else:
|
else:
|
||||||
wikilessList['normal'].append('https://' + item)
|
wikilessList['normal'].append('https://' + item)
|
||||||
mightyList['wikiless'] = wikilessList
|
mightyList['wikiless'] = wikilessList
|
||||||
print('fetched Wikiless')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Wikiless')
|
||||||
|
|
||||||
# Scribe
|
# Scribe
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
|
@ -129,8 +139,7 @@ scribeList['tor'] = []
|
||||||
for item in rJson:
|
for item in rJson:
|
||||||
scribeList['normal'].append(item)
|
scribeList['normal'].append(item)
|
||||||
mightyList['scribe'] = scribeList
|
mightyList['scribe'] = scribeList
|
||||||
print('fetched Scribe')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Scribe')
|
||||||
|
|
||||||
|
|
||||||
# SimplyTranslate
|
# SimplyTranslate
|
||||||
r = requests.get('https://simple-web.org/instances/simplytranslate')
|
r = requests.get('https://simple-web.org/instances/simplytranslate')
|
||||||
|
@ -145,7 +154,7 @@ for item in r.text.strip().split('\n'):
|
||||||
simplyTranslateList['tor'].append('http://' + item)
|
simplyTranslateList['tor'].append('http://' + item)
|
||||||
|
|
||||||
mightyList['simplyTranslate'] = simplyTranslateList
|
mightyList['simplyTranslate'] = simplyTranslateList
|
||||||
print('fetched SimplyTranslate')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'SimplyTranslate')
|
||||||
|
|
||||||
# LinvgaTranslate
|
# LinvgaTranslate
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
|
@ -157,7 +166,7 @@ lingvaList['tor'] = []
|
||||||
for item in rJson:
|
for item in rJson:
|
||||||
lingvaList['normal'].append(item)
|
lingvaList['normal'].append(item)
|
||||||
mightyList['lingva'] = lingvaList
|
mightyList['lingva'] = lingvaList
|
||||||
print('fetched LinvgaTranslate')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'LinvgaTranslate')
|
||||||
|
|
||||||
|
|
||||||
# SearX, SearXNG
|
# SearX, SearXNG
|
||||||
|
@ -190,7 +199,7 @@ for item in rJson['instances']:
|
||||||
|
|
||||||
mightyList['searx'] = searxList
|
mightyList['searx'] = searxList
|
||||||
mightyList['searxng'] = searxngList
|
mightyList['searxng'] = searxngList
|
||||||
print('fetched SearX, SearXNG')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'SearX, SearXNG')
|
||||||
|
|
||||||
# Whoogle
|
# Whoogle
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
|
@ -202,8 +211,7 @@ whoogleList['tor'] = []
|
||||||
for item in tmpList:
|
for item in tmpList:
|
||||||
whoogleList['normal'].append(item)
|
whoogleList['normal'].append(item)
|
||||||
mightyList['whoogle'] = whoogleList
|
mightyList['whoogle'] = whoogleList
|
||||||
print('fetched Whoogle')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Whoogle')
|
||||||
|
|
||||||
|
|
||||||
# Rimgo
|
# Rimgo
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
|
@ -219,7 +227,7 @@ for item in rJson:
|
||||||
else:
|
else:
|
||||||
rimgoList['normal'].append('https://' + item)
|
rimgoList['normal'].append('https://' + item)
|
||||||
mightyList['rimgo'] = rimgoList
|
mightyList['rimgo'] = rimgoList
|
||||||
print('fetched Rimgo')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Rimgo')
|
||||||
|
|
||||||
# Peertube
|
# Peertube
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
|
@ -231,12 +239,31 @@ for k in rJson['data']:
|
||||||
myList.append('https://'+k['host'])
|
myList.append('https://'+k['host'])
|
||||||
|
|
||||||
mightyList['peertube'] = myList
|
mightyList['peertube'] = myList
|
||||||
print('fetched Peertube')
|
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Peertube')
|
||||||
|
|
||||||
|
|
||||||
|
def isValid(url): # This code is contributed by avanitrachhadiya2155
|
||||||
|
return re.search(r"([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z0-9]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}", url)
|
||||||
|
|
||||||
|
|
||||||
|
for k1, v1 in mightyList.items():
|
||||||
|
if type(mightyList[k1]) is dict:
|
||||||
|
for k2, v2 in mightyList[k1].items():
|
||||||
|
for instance in mightyList[k1][k2]:
|
||||||
|
if (not isValid(instance)):
|
||||||
|
mightyList[k1][k2].remove(instance)
|
||||||
|
print("removed " + instance)
|
||||||
|
|
||||||
|
elif type(mightyList[k1]) is list:
|
||||||
|
for instance in mightyList[k1]:
|
||||||
|
if (not isValid(instance)):
|
||||||
|
mightyList[k1].remove(instance)
|
||||||
|
print("removed " + instance)
|
||||||
|
|
||||||
# Writing to file
|
# Writing to file
|
||||||
json_object = json.dumps(mightyList, ensure_ascii=False, indent=2)
|
json_object = json.dumps(mightyList, ensure_ascii=False, indent=2)
|
||||||
with open('./src/instances/data.json', 'w') as outfile:
|
with open('./src/instances/data.json', 'w') as outfile:
|
||||||
outfile.write(json_object)
|
outfile.write(json_object)
|
||||||
# print(json_object)
|
# print(json_object)
|
||||||
print('wrote instances/data.json')
|
|
||||||
|
print(Fore.BLUE + 'wrote ' + Style.RESET_ALL + 'instances/data.json')
|
||||||
|
|
Loading…
Reference in New Issue