WinDog/ModWinDog/Scrapers/Scrapers.py

126 lines
5.1 KiB
Python
Executable File

# ================================== #
# WinDog multi-purpose chatbot #
# Licensed under AGPLv3 by OctoSpacc #
# ================================== #
""" # windog config start # """
SeleniumDriversLimit = 2
""" # end windog config # """
currentSeleniumDrivers = []
from seleniumbase import Driver
# TODO implement some kind of timeout after a closure of a browser, since otherwise we get in a buggy state sometimes?
def getSelenium() -> tuple[int, Driver]|bool:
if len(currentSeleniumDrivers) == SeleniumDriversLimit:
return False
for index in range(1, (SeleniumDriversLimit + 1)):
if index not in currentSeleniumDrivers:
currentSeleniumDrivers.append(index)
break
return (index, Driver(uc=True, headless2=True, user_data_dir=f"./Selenium-WinDog/{index}"))
def closeSelenium(index:int, driver:Driver) -> None:
if driver:
try:
driver.close()
driver.quit()
except:
Log(format_exc())
if index:
currentSeleniumDrivers.remove(index)
def cDalleSelenium(context:EventContext, data:InputMessageData) -> None:
warning_text = "has been blocked by Microsoft because it violates their content policy. Further attempts might lead to a ban on your profile. Please review the Code of Conduct for Image Creator in this picture or at https://www.bing.com/new/termsofuseimagecreator#content-policy."
if not (prompt := data.command.body):
return SendMessage(context, {"Text": "Please tell me what to generate."})
driver_index, driver = None, None
try:
driver = getSelenium()
if not driver:
return SendMessage(context, {"Text": "Couldn't access a web scraping VM as they are all busy. Please try again later."})
driver_index, driver = driver
driver.get("https://www.bing.com/images/create/")
driver.refresh()
driver.find_element('form input[name="q"]').send_keys(prompt)
driver.find_element('form a[role="button"]').submit()
try:
driver.find_element('img.gil_err_img[alt="Content warning"]')
SendMessage(context, {"Text": f"Content warning: This prompt {warning_text}", "media": {"bytes": open("./Assets/ImageCreator-CodeOfConduct.png", 'rb').read()}})
return closeSelenium(driver_index, driver)
except Exception: # warning element was not found, we should be good
pass
SendMessage(context, {"Text": "Request sent successfully, please wait..."})
retry_index = 3
while retry_index < 12:
# note that sometimes generation can still fail and we will never get any image!
time.sleep(retry_index := retry_index + 1)
driver.refresh()
img_list = driver.find_elements('div.imgpt a img.mimg')
if not len(img_list):
try:
driver.find_element('img.gil_err_img[alt="Unsafe image content detected"]')
SendMessage(context, {"Text": f"Unsafe image content detected: This result {warning_text}", "media": {"bytes": open("./Assets/ImageCreator-CodeOfConduct.png", 'rb').read()}})
return closeSelenium(driver_index, driver)
except: # no error is present, so we just have to wait more for the images
continue
img_array = []
for img_url in img_list:
img_url = img_url.get_attribute("src").split('?')[0]
img_array.append({"url": img_url}) #, "bytes": HttpReq(img_url).read()})
page_url = driver.current_url.split('?')[0]
SendMessage(context, OutputMessageData(
text_plain=f'"{prompt}"\n{{{page_url}}}',
text_html=f'"<i>{html_escape(prompt)}</i>"\n<pre>{page_url}</pre>',
media=img_array))
return closeSelenium(driver_index, driver)
raise Exception("VM timed out.")
except Exception as error:
Log(format_exc())
SendMessage(context, {"TextPlain": "An unexpected error occurred."})
closeSelenium(driver_index, driver)
def cCraiyonSelenium(context:EventContext, data:InputMessageData) -> None:
if not (prompt := data.command.body):
return SendMessage(context, {"Text": "Please tell me what to generate."})
driver_index, driver = None, None
try:
driver = getSelenium()
if not driver:
return SendMessage(context, {"Text": "Couldn't access a web scraping VM as they are all busy. Please try again later."})
driver_index, driver = driver
driver.get("https://www.craiyon.com/")
driver.find_element('textarea#prompt').send_keys(prompt)
driver.execute_script("arguments[0].click();", driver.find_element('button#generateButton'))
SendMessage(context, {"Text": "Request sent successfully, please wait up to 60 seconds..."})
retry_index = 3
while retry_index < 16:
time.sleep(retry_index := retry_index + 1)
img_list = driver.find_elements('div.image-container > img')
if not len(img_list):
continue
img_array = []
for img_elem in img_list:
img_array.append({"url": img_elem.get_attribute("src")}) #, "bytes": HttpReq(img_url).read()})
SendMessage(context, {
"text_plain": f'"{prompt}"',
"text_html": f'"<i>{html_escape(prompt)}</i>"',
"media": img_array,
})
return closeSelenium(driver_index, driver)
raise Exception("VM timed out.")
except Exception as error:
Log(format_exc())
SendMessage(context, {"TextPlain": "An unexpected error occurred."})
closeSelenium(driver_index, driver)
RegisterModule(name="Scrapers", endpoints=[
SafeNamespace(names=["dalle"], handler=cDalleSelenium),
SafeNamespace(names=["craiyon", "crayion"], handler=cCraiyonSelenium),
])