From af806c4f16fa72237a2215ecab7b739a6de824c4 Mon Sep 17 00:00:00 2001 From: Stefano Assenzo Date: Thu, 5 Jan 2023 11:40:54 +0000 Subject: [PATCH] Now it works correctly, I just have to rename the file it downloads and I'm done --- .env.example | 3 +- src/events/school_time/email_read.py | 31 ++++++++++++++ src/events/school_time/scraping_excelfile.py | 45 -------------------- 3 files changed, 32 insertions(+), 47 deletions(-) create mode 100644 src/events/school_time/email_read.py delete mode 100644 src/events/school_time/scraping_excelfile.py diff --git a/.env.example b/.env.example index da77629..854727a 100644 --- a/.env.example +++ b/.env.example @@ -5,5 +5,4 @@ EMAIL = " SMTP_SERVER = "" SMTP_PORT = "" EMAIL_SCHOOL = "" -LINK_SCHOOL_TIME = "" -FILE_DIRECTORY_SCHOOL = "" \ No newline at end of file +DOWNLOAD_FOLDER = "" \ No newline at end of file diff --git a/src/events/school_time/email_read.py b/src/events/school_time/email_read.py new file mode 100644 index 0000000..4a30788 --- /dev/null +++ b/src/events/school_time/email_read.py @@ -0,0 +1,31 @@ +import os +from imbox import Imbox +import traceback + + +host = os.getenv('SMTP_SERVER') +username = os.getenv('EMAIL') +password = os.getenv('PWD_EMAIL') +download_folder = os.getenv('DOWNLOAD_FOLDER') +EMAIL_SCHOOL = os.getenv('EMAIL_SCHOOL') + +if not os.path.isdir(download_folder): + os.makedirs(download_folder, exist_ok=True) + +mail = Imbox(host, username=username, password=password, ssl=True, ssl_context=None, starttls=False) +messages = mail.messages(sent_from=EMAIL_SCHOOL) + +for (uid, message) in messages: + mail.mark_seen(uid) # optional, mark message as read + + for idx, attachment in enumerate(message.attachments): + try: + att_fn = attachment.get('filename') + download_path = f"{download_folder}/{att_fn}" + print(download_path) + with open(download_path, "wb") as fp: + fp.write(attachment.get('content').read()) + except: + print(traceback.print_exc()) + +mail.logout() \ No newline at end of file diff --git a/src/events/school_time/scraping_excelfile.py b/src/events/school_time/scraping_excelfile.py deleted file mode 100644 index 6ec2680..0000000 --- a/src/events/school_time/scraping_excelfile.py +++ /dev/null @@ -1,45 +0,0 @@ -import subprocess -import os -import tabula -from dotenv import load_dotenv -from selenium import webdriver -from selenium.webdriver.firefox.options import Options -from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC - -load_dotenv() -LINK_SCHOOL_TIME = os.getenv('LINK_SCHOOL_TIME') -FILE_DIRECTORY_SCHOOL = os.getenv('FILE_DIRECTORY_SCHOOL') -options = Options() -options.add_argument("--headless") -options.add_argument('--disable-gpu') -options.add_argument('--disable-software-rasterizer') - -driver = webdriver.Firefox(options=options) -#url launch -driver.get(LINK_SCHOOL_TIME) -#identify link with partial link text - -elems = driver.find_elements(By.XPATH, "/html/body/section[2]/div/div/main/div/div/div/div/div[2]/p[2]/a") - -for elem in elems: - link = elem.get_attribute("href") - -remove_things_in_front = link.split(FILE_DIRECTORY_SCHOOL, 1)[1] -print(remove_things_in_front) -subprocess.run(["wget", link]) - -driver.close() - -namefile = remove_things_in_front -df = tabula.read_pdf(namefile, pages = 'all')[0] -tabula.convert_into(namefile, "test.csv", output_format="csv", pages='all') -print(df) - -from pyexcel.cookbook import merge_all_to_a_book -# import pyexcel.ext.xlsx # no longer required if you use pyexcel >= 0.2.2 -import glob - - -merge_all_to_a_book(glob.glob("*.csv"), "school_time.xlsx") \ No newline at end of file