Add Copilot image scraping, update and fix telegram async, improve shared API

This commit is contained in:
2024-06-19 01:40:33 +02:00
parent 09cf925850
commit 4c403e516b
21 changed files with 386 additions and 98 deletions

0
ModWinDog/Codings.py Normal file → Executable file
View File

16
ModWinDog/Hashing.py Normal file → Executable file
View File

@ -6,17 +6,19 @@
import hashlib
def cHash(context, data) -> None:
if len(data.Tokens) >= 3 and data.Tokens[1] in hashlib.algorithms_available:
Alg = data.Tokens[1]
Hash = hashlib.new(Alg, Alg.join(data.Body.split(Alg)[1:]).strip().encode()).hexdigest()
algorithm = data.command.arguments["algorithm"]
if data.command.body and algorithm in hashlib.algorithms_available:
hashed = hashlib.new(algorithm, algorithm.join(data.Body.split(algorithm)[1:]).strip().encode()).hexdigest()
SendMsg(context, {
"TextPlain": Hash,
"TextMarkdown": MarkdownCode(Hash, True),
"TextPlain": hashed,
"TextMarkdown": MarkdownCode(hashed, True),
})
else:
SendMsg(context, {"Text": choice(Locale.__('hash.usage')).format(data.Tokens[0], hashlib.algorithms_available)})
SendMsg(context, {"Text": choice(Locale.__('hash.usage')).format(data.command.tokens[0], hashlib.algorithms_available)})
RegisterModule(name="Hashing", group="Geek", summary="Functions for hashing of textual content.", endpoints={
"Hash": CreateEndpoint(["hash"], summary="Responds with the hash-sum of a message received.", handler=cHash),
"Hash": CreateEndpoint(names=["hash"], summary="Responds with the hash-sum of a message received.", handler=cHash, arguments={
"algorithm": True,
}),
})

0
ModWinDog/Help.py Normal file → Executable file
View File

60
ModWinDog/Internet/Internet.py Normal file → Executable file
View File

@ -3,12 +3,18 @@
# Licensed under AGPLv3 by OctoSpacc #
# ================================== #
""" # windog config start # """
MicrosoftBingSettings = {}
""" # end windog config # """
from urlextract import URLExtract
from urllib import parse as UrlParse
from urllib.request import urlopen, Request
def HttpGet(url:str):
return urlopen(Request(url, headers={"User-Agent": WebUserAgent}))
def HttpReq(url:str, method:str|None=None, *, body:bytes=None, headers:dict[str, str]={"User-Agent": WebUserAgent}):
return urlopen(Request(url, method=method, data=body, headers=headers))
def cEmbedded(context, data) -> None:
if len(data.Tokens) >= 2:
@ -49,7 +55,7 @@ def cWeb(context, data) -> None:
if data.Body:
try:
QueryUrl = UrlParse.quote(data.Body)
Req = HttpGet(f'https://html.duckduckgo.com/html?q={QueryUrl}')
Req = HttpReq(f'https://html.duckduckgo.com/html?q={QueryUrl}')
Caption = f'🦆🔎 "{data.Body}": https://duckduckgo.com/?q={QueryUrl}\n\n'
Index = 0
for Line in Req.read().decode().replace('\t', ' ').splitlines():
@ -80,14 +86,14 @@ def cTranslate(context, data) -> None:
try:
toLang = data.Tokens[1]
# TODO: Use many different public Lingva instances in rotation to avoid overloading a specific one
result = json.loads(HttpGet(f'https://lingva.ml/api/v1/auto/{toLang}/{UrlParse.quote(toLang.join(data.Body.split(toLang)[1:]))}').read())
result = json.loads(HttpReq(f'https://lingva.ml/api/v1/auto/{toLang}/{UrlParse.quote(toLang.join(data.Body.split(toLang)[1:]))}').read())
SendMsg(context, {"TextPlain": f"[{result['info']['detectedSource']} (auto) -> {toLang}]\n\n{result['translation']}"})
except Exception:
raise
def cUnsplash(context, data) -> None:
try:
Req = HttpGet(f'https://source.unsplash.com/random/?{UrlParse.quote(data.Body)}')
Req = HttpReq(f'https://source.unsplash.com/random/?{UrlParse.quote(data.Body)}')
ImgUrl = Req.geturl().split('?')[0]
SendMsg(context, {
"TextPlain": f'{{{ImgUrl}}}',
@ -102,18 +108,18 @@ def cSafebooru(context, data) -> None:
try:
if data.Body:
for i in range(7): # retry a bunch of times if we can't find a really random result
ImgUrls = HttpGet(f'{ApiUrl}md5:{RandHexStr(3)}%20{UrlParse.quote(data.Body)}').read().decode().split(' file_url="')[1:]
ImgUrls = HttpReq(f'{ApiUrl}md5:{RandHexStr(3)}%20{UrlParse.quote(data.Body)}').read().decode().split(' file_url="')[1:]
if ImgUrls:
break
if not ImgUrls: # literal search
ImgUrls = HttpGet(f'{ApiUrl}{UrlParse.quote(data.Body)}').read().decode().split(' file_url="')[1:]
ImgUrls = HttpReq(f'{ApiUrl}{UrlParse.quote(data.Body)}').read().decode().split(' file_url="')[1:]
if not ImgUrls:
return SendMsg(context, {"Text": "Error: Could not get any result from Safebooru."})
ImgXml = choice(ImgUrls)
ImgUrl = ImgXml.split('"')[0]
ImgId = ImgXml.split(' id="')[1].split('"')[0]
else:
HtmlReq = HttpGet(HttpGet('https://safebooru.org/index.php?page=post&s=random').geturl())
HtmlReq = HttpReq(HttpReq('https://safebooru.org/index.php?page=post&s=random').geturl())
for Line in HtmlReq.read().decode().replace('\t', ' ').splitlines():
if '<img ' in Line and ' id="image" ' in Line and ' src="':
ImgUrl = Line.split(' src="')[1].split('"')[0]
@ -123,18 +129,52 @@ def cSafebooru(context, data) -> None:
SendMsg(context, {
"TextPlain": f'[{ImgId}]\n{{{ImgUrl}}}',
"TextMarkdown": (f'\\[`{ImgId}`\\]\n' + MarkdownCode(ImgUrl, True)),
"Media": HttpGet(ImgUrl).read(),
"media": {"url": ImgUrl}, #, "bytes": HttpReq(ImgUrl).read()},
})
else:
pass
except Exception:
except Exception as error:
raise
def cDalle(context, data) -> None:
if not data.Body:
return SendMsg(context, {"Text": "Please tell me what to generate."})
image_filter = "&quot;https://th.bing.com/th/id/"
try:
retry_index = 3
result_list = ""
result_id = HttpReq(
f"https://www.bing.com/images/create?q={UrlParse.quote(data.Body)}&rt=3&FORM=GENCRE",#"4&FORM=GENCRE",
body=f"q={UrlParse.urlencode({'q': data.Body})}&qs=ds".encode(),
headers=MicrosoftBingSettings).read().decode()
print(result_id)
result_id = result_id.split('&amp;id=')[1].split('&amp;')[0]
results_url = f"https://www.bing.com/images/create/-/{result_id}?FORM=GENCRE"
SendMsg(context, {"Text": "Request sent, please wait..."})
while retry_index < 12 and image_filter not in result_list:
result_list = HttpReq(results_url, headers={"User-Agent": MicrosoftBingSettings["User-Agent"]}).read().decode()
time.sleep(1.25 * retry_index)
retry_index += 1
if image_filter in result_list:
SendMsg(context, {
"TextPlain": f"{{{results_url}}}",
"TextMarkdown": MarkdownCode(results_url, True),
"Media": HttpReq(
result_list.split(image_filter)[1].split('\\&quot;')[0],
headers={"User-Agent": MicrosoftBingSettings["User-Agent"]}).read(),
})
else:
raise Exception("Something went wrong.")
except Exception as error:
Log(error)
SendMsg(context, {"TextPlain": error})
RegisterModule(name="Internet", summary="Tools and toys related to the Internet.", endpoints={
"Embedded": CreateEndpoint(["embedded"], summary="Rewrites a link, trying to bypass embed view protection.", handler=cEmbedded),
"Web": CreateEndpoint(["web"], summary="Provides results of a DuckDuckGo search.", handler=cWeb),
"Translate": CreateEndpoint(["translate"], summary="Returns the received message after translating it in another language.", handler=cTranslate),
"Unsplash": CreateEndpoint(["unsplash"], summary="Sends a picture sourced from Unsplash.", handler=cUnsplash),
"Safebooru": CreateEndpoint(["safebooru"], summary="Sends a picture sourced from Safebooru.", handler=cSafebooru),
#"DALL-E": CreateEndpoint(["dalle"], summary="Sends an AI-generated picture from DALL-E 3 via Microsoft Bing.", handler=cDalle),
})

0
ModWinDog/Internet/requirements.txt Normal file → Executable file
View File

102
ModWinDog/Scrapers/Scrapers.py Executable file
View File

@ -0,0 +1,102 @@
# ================================== #
# WinDog multi-purpose chatbot #
# Licensed under AGPLv3 by OctoSpacc #
# ================================== #
""" # windog config start # """
SeleniumDriversLimit = 2
""" # end windog config # """
currentSeleniumDrivers = 0
#from selenium import webdriver
#from selenium.webdriver import Chrome
#from selenium.webdriver.common.by import By
from seleniumbase import Driver
def getSelenium() -> Driver:
global currentSeleniumDrivers
if currentSeleniumDrivers >= SeleniumDriversLimit:
return False
#options = webdriver.ChromeOptions()
#options.add_argument("headless=new")
#options.add_argument("user-data-dir=./Selenium-WinDog")
#seleniumDriver = Chrome(options=options)
currentSeleniumDrivers += 1
return Driver(uc=True, headless2=True, user_data_dir=f"./Selenium-WinDog/{currentSeleniumDrivers}")
def closeSelenium(driver:Driver) -> None:
global currentSeleniumDrivers
try:
driver.close()
driver.quit()
except:
Log(format_exc())
if currentSeleniumDrivers > 0:
currentSeleniumDrivers -= 1
def cDalleSelenium(context, data) -> None:
if not data.Body:
return SendMsg(context, {"Text": "Please tell me what to generate."})
#if not seleniumDriver:
# SendMsg(context, {"Text": "Initializing Selenium, please wait..."})
# loadSeleniumDriver()
try:
driver = getSelenium()
if not driver:
return SendMsg(context, {"Text": "Couldn't access a web scraping VM as they are all busy. Please try again later."})
driver.get("https://www.bing.com/images/create/")
driver.refresh()
#retry_index = 3
#while retry_index < 12:
# time.sleep(retry_index := retry_index + 1)
# try:
#seleniumDriver.find_element(By.CSS_SELECTOR, 'form input[name="q"]').send_keys(data.Body)
#seleniumDriver.find_element(By.CSS_SELECTOR, 'form a[role="button"]').submit()
driver.find_element('form input[name="q"]').send_keys(data.Body)
driver.find_element('form a[role="button"]').submit()
try:
driver.find_element('img[alt="Content warning"]')
SendMsg(context, {"Text": "This prompt has been blocked by Microsoft because it violates their content policy. Further attempts might lead to a ban on your profile."})
closeSelenium(driver)
return
except Exception: # warning element was not found, we should be good
pass
SendMsg(context, {"Text": "Request sent successfully, please wait..."})
# except Exception:
# pass
retry_index = 3
while retry_index < 12:
# note that sometimes generation fails and we will never get any image!
#try:
time.sleep(retry_index := retry_index + 1)
driver.refresh()
img_list = driver.find_elements(#By.CSS_SELECTOR,
'div.imgpt a img.mimg')
if not len(img_list):
continue
img_array = []
for img_url in img_list:
img_url = img_url.get_attribute("src").split('?')[0]
img_array.append({"url": img_url}) #, "bytes": HttpReq(img_url).read()})
page_url = driver.current_url.split('?')[0]
SendMsg(context, {
"TextPlain": f'"{data.Body}"\n{{{page_url}}}',
"TextMarkdown": (f'"_{CharEscape(data.Body, "MARKDOWN")}_"\n' + MarkdownCode(page_url, True)),
"media": img_array,
})
closeSelenium(driver)
break
#except Exception as ex:
# pass
except Exception as error:
Log(format_exc())
SendMsg(context, {"TextPlain": "An unexpected error occurred."})
closeSelenium(driver)
RegisterModule(name="Scrapers", endpoints={
"DALL-E": CreateEndpoint(["dalle"], summary="Sends an AI-generated picture from DALL-E 3 via Microsoft Bing.", handler=cDalleSelenium),
})

View File

@ -0,0 +1 @@
seleniumbase

30
ModWinDog/Scripting/Scripting.py Normal file → Executable file
View File

@ -3,13 +3,22 @@
# Licensed under AGPLv3 by OctoSpacc #
# ================================== #
luaCycleLimit = 10000
luaMemoryLimit = (512 * 1024) # 512 KB
luaCrashMessage = f"Script has been forcefully terminated due to having exceeded the max cycle count limit ({luaCycleLimit})."
""" # windog config start # """
# Use specific Lua version; always using the latest is risky due to possible new APIs and using JIT is vulnerable
LuaCycleLimit = 10000
LuaMemoryLimit = (512 * 1024) # 512 KB
LuaCrashMessage = f"Script has been forcefully terminated due to having exceeded the max cycle count limit ({LuaCycleLimit})."
# see <http://lua-users.org/wiki/SandBoxes> for a summary of certainly safe objects (outdated though)
LuaGlobalsWhitelist = ["_windog", "_VERSION", "print", "error", "assert", "tonumber", "tostring", "math", "string", "table"]
LuaTablesWhitelist = {"os": ["clock", "date", "difftime", "time"]}
""" # end windog config # """
# always specify a Lua version; using the default latest is risky due to possible new APIs and using JIT is vulnerable
from lupa.lua54 import LuaRuntime as NewLuaRuntime, LuaError, LuaSyntaxError
# I'm not sure this is actually needed, but better safe than sorry
def luaAttributeFilter(obj, attr_name, is_setting):
raise AttributeError("Access Denied.")
@ -18,15 +27,20 @@ def cLua(context, data=None) -> None:
scriptText = (data.Body or (data.Quoted and data.Quoted.Body))
if not scriptText:
return SendMsg(context, {"Text": "You must provide some Lua code to execute."})
luaRuntime = NewLuaRuntime(max_memory=luaMemoryLimit, register_eval=False, register_builtins=False, attribute_filter=luaAttributeFilter)
luaRuntime = NewLuaRuntime(max_memory=LuaMemoryLimit, register_eval=False, register_builtins=False, attribute_filter=luaAttributeFilter)
luaRuntime.eval(f"""(function()
_windog = {{ stdout = "" }}
function print (text, endl) _windog.stdout = _windog.stdout .. tostring(text) .. (endl ~= false and "\\n" or "") end
function luaCrashHandler () return error("{luaCrashMessage}") end
debug.sethook(luaCrashHandler, "", {luaCycleLimit})
function luaCrashHandler () return error("{LuaCrashMessage}") end
debug.sethook(luaCrashHandler, "", {LuaCycleLimit})
end)()""")
# delete unsafe objects
for key in luaRuntime.globals():
if key not in ["error", "assert", "math", "string", "tostring", "print", "_windog"]:
if key in LuaTablesWhitelist:
for tabKey in luaRuntime.globals()[key]:
if tabKey not in LuaTablesWhitelist[key]:
del luaRuntime.globals()[key][tabKey]
elif key not in LuaGlobalsWhitelist:
del luaRuntime.globals()[key]
try:
textOutput = ("[ʟᴜᴀ ꜱᴛᴅᴏᴜᴛ]\n\n" + luaRuntime.eval(f"""(function()

0
ModWinDog/Scripting/requirements.txt Normal file → Executable file
View File