Now it works correctly, I just have to rename the file it downloads and I'm done
This commit is contained in:
parent
bced0ee355
commit
af806c4f16
|
@ -5,5 +5,4 @@ EMAIL = "
|
||||||
SMTP_SERVER = ""
|
SMTP_SERVER = ""
|
||||||
SMTP_PORT = ""
|
SMTP_PORT = ""
|
||||||
EMAIL_SCHOOL = ""
|
EMAIL_SCHOOL = ""
|
||||||
LINK_SCHOOL_TIME = ""
|
DOWNLOAD_FOLDER = ""
|
||||||
FILE_DIRECTORY_SCHOOL = ""
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
import os
|
||||||
|
from imbox import Imbox
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
|
host = os.getenv('SMTP_SERVER')
|
||||||
|
username = os.getenv('EMAIL')
|
||||||
|
password = os.getenv('PWD_EMAIL')
|
||||||
|
download_folder = os.getenv('DOWNLOAD_FOLDER')
|
||||||
|
EMAIL_SCHOOL = os.getenv('EMAIL_SCHOOL')
|
||||||
|
|
||||||
|
if not os.path.isdir(download_folder):
|
||||||
|
os.makedirs(download_folder, exist_ok=True)
|
||||||
|
|
||||||
|
mail = Imbox(host, username=username, password=password, ssl=True, ssl_context=None, starttls=False)
|
||||||
|
messages = mail.messages(sent_from=EMAIL_SCHOOL)
|
||||||
|
|
||||||
|
for (uid, message) in messages:
|
||||||
|
mail.mark_seen(uid) # optional, mark message as read
|
||||||
|
|
||||||
|
for idx, attachment in enumerate(message.attachments):
|
||||||
|
try:
|
||||||
|
att_fn = attachment.get('filename')
|
||||||
|
download_path = f"{download_folder}/{att_fn}"
|
||||||
|
print(download_path)
|
||||||
|
with open(download_path, "wb") as fp:
|
||||||
|
fp.write(attachment.get('content').read())
|
||||||
|
except:
|
||||||
|
print(traceback.print_exc())
|
||||||
|
|
||||||
|
mail.logout()
|
|
@ -1,45 +0,0 @@
|
||||||
import subprocess
|
|
||||||
import os
|
|
||||||
import tabula
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from selenium import webdriver
|
|
||||||
from selenium.webdriver.firefox.options import Options
|
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
LINK_SCHOOL_TIME = os.getenv('LINK_SCHOOL_TIME')
|
|
||||||
FILE_DIRECTORY_SCHOOL = os.getenv('FILE_DIRECTORY_SCHOOL')
|
|
||||||
options = Options()
|
|
||||||
options.add_argument("--headless")
|
|
||||||
options.add_argument('--disable-gpu')
|
|
||||||
options.add_argument('--disable-software-rasterizer')
|
|
||||||
|
|
||||||
driver = webdriver.Firefox(options=options)
|
|
||||||
#url launch
|
|
||||||
driver.get(LINK_SCHOOL_TIME)
|
|
||||||
#identify link with partial link text
|
|
||||||
|
|
||||||
elems = driver.find_elements(By.XPATH, "/html/body/section[2]/div/div/main/div/div/div/div/div[2]/p[2]/a")
|
|
||||||
|
|
||||||
for elem in elems:
|
|
||||||
link = elem.get_attribute("href")
|
|
||||||
|
|
||||||
remove_things_in_front = link.split(FILE_DIRECTORY_SCHOOL, 1)[1]
|
|
||||||
print(remove_things_in_front)
|
|
||||||
subprocess.run(["wget", link])
|
|
||||||
|
|
||||||
driver.close()
|
|
||||||
|
|
||||||
namefile = remove_things_in_front
|
|
||||||
df = tabula.read_pdf(namefile, pages = 'all')[0]
|
|
||||||
tabula.convert_into(namefile, "test.csv", output_format="csv", pages='all')
|
|
||||||
print(df)
|
|
||||||
|
|
||||||
from pyexcel.cookbook import merge_all_to_a_book
|
|
||||||
# import pyexcel.ext.xlsx # no longer required if you use pyexcel >= 0.2.2
|
|
||||||
import glob
|
|
||||||
|
|
||||||
|
|
||||||
merge_all_to_a_book(glob.glob("*.csv"), "school_time.xlsx")
|
|
Loading…
Reference in New Issue