libredirect/src/instances/get_instances.py

293 lines
9.0 KiB
Python

# Note: Run this script from the root of the repo
import requests
import json
from urllib.parse import urlparse
from bs4 import BeautifulSoup
import re
from colorama import Fore, Back, Style
mightyList = {}
def filterLastSlash(urlList):
tmp = []
for i in urlList:
if i.endswith('/'):
tmp.append(i[:-1])
print(Fore.YELLOW + "filtered " + Style.RESET_ALL + i)
else:
tmp.append(i)
return tmp
# Invidious
r = requests.get('https://api.invidious.io/instances.json')
rJson = json.loads(r.text)
invidiousList = {}
invidiousList['normal'] = []
invidiousList['tor'] = []
for instance in rJson:
if instance[1]['type'] == 'https':
invidiousList['normal'].append(instance[1]['uri'])
elif instance[1]['type'] == 'onion':
invidiousList['tor'].append(instance[1]['uri'])
mightyList['invidious'] = invidiousList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Invidious')
# ProxiTok
r = requests.get('https://raw.githubusercontent.com/wiki/pablouser1/ProxiTok/Public-instances.md')
tmp = re.findall(
r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|", r.text)
proxiTokList = {}
proxiTokList['normal'] = []
proxiTokList['tor'] = []
for item in tmp:
proxiTokList['normal'].append(item)
mightyList['proxiTok'] = proxiTokList
print('fetched ProxiTok')
# Send
r = requests.get('https://gitlab.com/timvisee/send-instances/-/raw/master/README.md')
tmp = re.findall(r"- ([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z0-9]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}", r.text)
sendList = {}
sendList['normal'] = []
sendList['tor'] = []
for item in tmp:
sendList['normal'].append(item)
mightyList['send'] = sendList
print(Fore.GREEN +'fetched ' + Style.RESET_ALL + 'Send')
# Nitter
r = requests.get('https://github.com/zedeus/nitter/wiki/Instances')
soup = BeautifulSoup(r.text, 'html.parser')
markdownBody = soup.find(class_='markdown-body')
tables = markdownBody.find_all('table')
tables.pop(3)
tables.pop(3)
nitterList = {}
nitterList['normal'] = []
nitterList['tor'] = []
for table in tables:
tbody = table.find('tbody')
trs = tbody.find_all('tr')
for tr in trs:
td = tr.find('td')
a = td.find('a')
url = a.contents[0]
if url.endswith('.onion'):
url = 'http://' + url
nitterList['tor'].append(url)
else:
url = 'https://' + url
nitterList['normal'].append(url)
mightyList['nitter'] = nitterList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Nitter')
# Bibliogram
r = requests.get('https://bibliogram.pussthecat.org/api/instances')
rJson = json.loads(r.text)
bibliogramList = {}
bibliogramList['normal'] = []
bibliogramList['tor'] = []
for item in rJson['data']:
bibliogramList['normal'].append(item['address'])
mightyList['bibliogram'] = bibliogramList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Bibliogram')
# LibReddit
r = requests.get(
'https://raw.githubusercontent.com/spikecodes/libreddit/master/README.md')
libredditList = {}
libredditList['normal'] = []
libredditList['tor'] = []
tmp = re.findall(
r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|", r.text)
tmp = filterLastSlash(tmp)
for item in tmp:
if item.endswith('.onion'):
libredditList['tor'].append(item)
else:
libredditList['normal'].append(item)
mightyList['libreddit'] = libredditList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'LibReddit')
# Teddit
r = requests.get(
'https://codeberg.org/teddit/teddit/raw/branch/main/instances.json')
rJson = json.loads(r.text)
tedditList = {}
tedditList['normal'] = []
tedditList['tor'] = []
for item in rJson:
url = item['url']
if url != '':
tedditList['normal'].append(url)
if 'onion' in item:
onion = item['onion']
if onion != '':
tedditList['tor'].append(onion)
mightyList['teddit'] = tedditList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Teddit')
# Wikiless
r = requests.get('https://wikiless.org/instances.json')
rJson = json.loads(r.text)
wikilessList = {}
wikilessList['normal'] = []
wikilessList['tor'] = []
wikilessList['i2p'] = []
for item in rJson:
if item.endswith('.onion'):
wikilessList['tor'].append('http://' + item)
elif item.endswith('.i2p'):
wikilessList['i2p'].append('http://' + item)
else:
wikilessList['normal'].append('https://' + item)
mightyList['wikiless'] = wikilessList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Wikiless')
# Scribe
r = requests.get(
'https://git.sr.ht/~edwardloveall/scribe/blob/main/docs/instances.json')
rJson = json.loads(r.text)
scribeList = {}
scribeList['normal'] = []
scribeList['tor'] = []
for item in rJson:
scribeList['normal'].append(item)
mightyList['scribe'] = scribeList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Scribe')
# SimplyTranslate
r = requests.get('https://simple-web.org/instances/simplytranslate')
simplyTranslateList = {}
simplyTranslateList['normal'] = []
for item in r.text.strip().split('\n'):
simplyTranslateList['normal'].append('https://' + item)
r = requests.get('https://simple-web.org/instances/simplytranslate_onion')
simplyTranslateList['tor'] = []
for item in r.text.strip().split('\n'):
simplyTranslateList['tor'].append('http://' + item)
mightyList['simplyTranslate'] = simplyTranslateList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'SimplyTranslate')
# LinvgaTranslate
r = requests.get(
'https://raw.githubusercontent.com/TheDavidDelta/lingva-translate/main/instances.json')
rJson = json.loads(r.text)
lingvaList = {}
lingvaList['normal'] = []
lingvaList['tor'] = []
for item in rJson:
lingvaList['normal'].append(item)
mightyList['lingva'] = lingvaList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'LinvgaTranslate')
# SearX, SearXNG
r = requests.get('https://searx.space/data/instances.json')
rJson = json.loads(r.text)
searxList = {}
searxList['tor'] = []
searxList['i2p'] = []
searxList['normal'] = []
searxngList = {}
searxngList['tor'] = []
searxngList['i2p'] = []
searxngList['normal'] = []
for item in rJson['instances']:
if item[:-1].endswith('.onion'):
if (rJson['instances'][item].get('generator') == 'searxng'):
searxngList['tor'].append(item[:-1])
else:
searxList['tor'].append(item[:-1])
elif item[:-1].endswith('.i2p'):
if (rJson['instances'][item].get('generator') == 'searxng'):
searxngList['i2p'].append(item[:-1])
else:
searxList['i2p'].append(item[:-1])
else:
if (rJson['instances'][item].get('generator') == 'searxng'):
searxngList['normal'].append(item[:-1])
else:
searxList['normal'].append(item[:-1])
mightyList['searx'] = searxList
mightyList['searxng'] = searxngList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'SearX, SearXNG')
# Whoogle
r = requests.get(
'https://raw.githubusercontent.com/benbusby/whoogle-search/main/misc/instances.txt')
tmpList = r.text.strip().split('\n')
whoogleList = {}
whoogleList['normal'] = []
whoogleList['tor'] = []
for item in tmpList:
whoogleList['normal'].append(item)
mightyList['whoogle'] = whoogleList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Whoogle')
# Rimgo
r = requests.get(
'https://codeberg.org/video-prize-ranch/rimgo/raw/branch/main/instances.json')
rJson = json.loads(r.text)
rimgoList = {}
rimgoList['normal'] = []
rimgoList['tor'] = []
for item in rJson:
if item.endswith('.onion'):
rimgoList['tor'].append('http://' + item)
else:
rimgoList['normal'].append('https://' + item)
mightyList['rimgo'] = rimgoList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Rimgo')
# Peertube
r = requests.get(
'https://instances.joinpeertube.org/api/v1/instances?start=0&count=1045&sort=-createdAt')
rJson = json.loads(r.text)
myList = []
for k in rJson['data']:
myList.append('https://'+k['host'])
mightyList['peertube'] = myList
print(Fore.GREEN + 'fetched ' + Style.RESET_ALL + 'Peertube')
def isValid(url): # This code is contributed by avanitrachhadiya2155
return re.search(r"([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z0-9]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}", url)
for k1, v1 in mightyList.items():
if type(mightyList[k1]) is dict:
for k2, v2 in mightyList[k1].items():
for instance in mightyList[k1][k2]:
if (not isValid(instance)):
mightyList[k1][k2].remove(instance)
print("removed " + instance)
elif type(mightyList[k1]) is list:
for instance in mightyList[k1]:
if (not isValid(instance)):
mightyList[k1].remove(instance)
print("removed " + instance)
# Writing to file
json_object = json.dumps(mightyList, ensure_ascii=False, indent=2)
with open('./src/instances/data.json', 'w') as outfile:
outfile.write(json_object)
# print(json_object)
print(Fore.BLUE + 'wrote ' + Style.RESET_ALL + 'instances/data.json')