Now I can download and convert the file automatically, but the automatic generation of the school timetable doesn't work correctly at the moment
This commit is contained in:
parent
03ce9b49e8
commit
bced0ee355
|
@ -0,0 +1,9 @@
|
||||||
|
PASSWORD_MONGODB = ""
|
||||||
|
URL_MONGODB = ""
|
||||||
|
PWD_EMAIL = ""
|
||||||
|
EMAIL = "
|
||||||
|
SMTP_SERVER = ""
|
||||||
|
SMTP_PORT = ""
|
||||||
|
EMAIL_SCHOOL = ""
|
||||||
|
LINK_SCHOOL_TIME = ""
|
||||||
|
FILE_DIRECTORY_SCHOOL = ""
|
|
@ -1,2 +1,3 @@
|
||||||
test.xlsx
|
test.xlsx
|
||||||
.env
|
.env
|
||||||
|
geckodriver.log
|
|
@ -1,40 +0,0 @@
|
||||||
import imaplib
|
|
||||||
import email
|
|
||||||
import traceback
|
|
||||||
import os
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
FROM_PWD = os.getenv('PWD_EMAIL')
|
|
||||||
FROM_EMAIL = os.getenv('EMAIL')
|
|
||||||
SMTP_SERVER = os.getenv('SMTP_SERVER')
|
|
||||||
SMTP_PORT = os.getenv('SMTP_PORT')
|
|
||||||
|
|
||||||
def read_email_from_gmail():
|
|
||||||
try:
|
|
||||||
mail = imaplib.IMAP4_SSL(SMTP_SERVER)
|
|
||||||
mail.login(FROM_EMAIL,FROM_PWD)
|
|
||||||
mail.select('inbox')
|
|
||||||
|
|
||||||
data = mail.search(None, 'ALL')
|
|
||||||
mail_ids = data[1]
|
|
||||||
id_list = mail_ids[0].split()
|
|
||||||
first_email_id = int(id_list[0])
|
|
||||||
latest_email_id = int(id_list[-1])
|
|
||||||
|
|
||||||
for i in range(latest_email_id,first_email_id, -1):
|
|
||||||
data = mail.fetch(str(i), '(RFC822)' )
|
|
||||||
for response_part in data:
|
|
||||||
arr = response_part[0]
|
|
||||||
if isinstance(arr, tuple):
|
|
||||||
msg = email.message_from_string(str(arr[1],'utf-8'))
|
|
||||||
email_subject = msg['subject']
|
|
||||||
email_from = msg['from']
|
|
||||||
print('From : ' + email_from + '\n')
|
|
||||||
print('Subject : ' + email_subject + '\n')
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
traceback.print_exc()
|
|
||||||
print(str(e))
|
|
||||||
|
|
||||||
read_email_from_gmail()
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import tabula
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.firefox.options import Options
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
LINK_SCHOOL_TIME = os.getenv('LINK_SCHOOL_TIME')
|
||||||
|
FILE_DIRECTORY_SCHOOL = os.getenv('FILE_DIRECTORY_SCHOOL')
|
||||||
|
options = Options()
|
||||||
|
options.add_argument("--headless")
|
||||||
|
options.add_argument('--disable-gpu')
|
||||||
|
options.add_argument('--disable-software-rasterizer')
|
||||||
|
|
||||||
|
driver = webdriver.Firefox(options=options)
|
||||||
|
#url launch
|
||||||
|
driver.get(LINK_SCHOOL_TIME)
|
||||||
|
#identify link with partial link text
|
||||||
|
|
||||||
|
elems = driver.find_elements(By.XPATH, "/html/body/section[2]/div/div/main/div/div/div/div/div[2]/p[2]/a")
|
||||||
|
|
||||||
|
for elem in elems:
|
||||||
|
link = elem.get_attribute("href")
|
||||||
|
|
||||||
|
remove_things_in_front = link.split(FILE_DIRECTORY_SCHOOL, 1)[1]
|
||||||
|
print(remove_things_in_front)
|
||||||
|
subprocess.run(["wget", link])
|
||||||
|
|
||||||
|
driver.close()
|
||||||
|
|
||||||
|
namefile = remove_things_in_front
|
||||||
|
df = tabula.read_pdf(namefile, pages = 'all')[0]
|
||||||
|
tabula.convert_into(namefile, "test.csv", output_format="csv", pages='all')
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
from pyexcel.cookbook import merge_all_to_a_book
|
||||||
|
# import pyexcel.ext.xlsx # no longer required if you use pyexcel >= 0.2.2
|
||||||
|
import glob
|
||||||
|
|
||||||
|
|
||||||
|
merge_all_to_a_book(glob.glob("*.csv"), "school_time.xlsx")
|
|
@ -19,7 +19,7 @@ collection_archive = database["archive-school-time-table"]
|
||||||
x = collection.delete_many({})
|
x = collection.delete_many({})
|
||||||
|
|
||||||
#using read_excel() method to read our excel file and storing the same in the variable named "df "
|
#using read_excel() method to read our excel file and storing the same in the variable named "df "
|
||||||
workbook = xl.load_workbook(filename="test.xlsx")
|
workbook = xl.load_workbook(filename="school_time.xlsx")
|
||||||
|
|
||||||
ws = workbook.active
|
ws = workbook.active
|
||||||
|
|
||||||
|
@ -63,19 +63,19 @@ for row in range (1, 100):
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
remove_things_in_front = school_subject.split(' ', 1)[1]
|
#remove_things_in_front = school_subject.split(' ', 1)[1]
|
||||||
find_document_username = list(collection.find({}, {"Date": long_date}))
|
find_document_username = list(collection.find({}, {"Date": long_date}))
|
||||||
array_username = find_document_username[0]["_id"]
|
array_username = find_document_username[0]["_id"]
|
||||||
collection.update_one(
|
collection.update_one(
|
||||||
{ "_id": ObjectId(array_username)},
|
{ "_id": ObjectId(array_username)},
|
||||||
{
|
{
|
||||||
"$push": { "School Subject": str(remove_things_in_front) }
|
"$push": { "School Subject": school_subject }
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
collection_archive.update_one(
|
collection_archive.update_one(
|
||||||
{ "_id": ObjectId(array_username)},
|
{ "_id": ObjectId(array_username)},
|
||||||
{
|
{
|
||||||
"$push": { "School Subject": str(remove_things_in_front) }
|
"$push": { "School Subject": school_subject }
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -112,4 +112,6 @@ for row in range (1, 100):
|
||||||
{
|
{
|
||||||
"$push": { "Teacher": teacher }
|
"$push": { "Teacher": teacher }
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
os.remove("school_time.xlsx")
|
Loading…
Reference in New Issue