Now I can download and convert the file automatically, but the automatic generation of the school timetable doesn't work correctly at the moment

2025-06-06 00:39:12 +02:00 · 2023-01-04 16:33:28 +00:00
parent 03ce9b49e8
commit bced0ee355
5 changed files with 63 additions and 46 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,9 @@
 PASSWORD_MONGODB = ""
 URL_MONGODB = ""
 PWD_EMAIL = ""
 EMAIL = "
 SMTP_SERVER = ""
 SMTP_PORT = ""
 EMAIL_SCHOOL = ""
 LINK_SCHOOL_TIME = ""
 FILE_DIRECTORY_SCHOOL = ""
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 test.xlsx
-.env
+.env
 geckodriver.log
--- a/src/events/school_time/check_email.py
+++ b/src/events/school_time/check_email.py
@@ -1,40 +0,0 @@
 import imaplib
 import email
 import traceback 
 import os
 from dotenv import load_dotenv
 load_dotenv()
 FROM_PWD = os.getenv('PWD_EMAIL')
 FROM_EMAIL = os.getenv('EMAIL')
 SMTP_SERVER = os.getenv('SMTP_SERVER')
 SMTP_PORT = os.getenv('SMTP_PORT')
 def read_email_from_gmail():
    try:
        mail = imaplib.IMAP4_SSL(SMTP_SERVER)
        mail.login(FROM_EMAIL,FROM_PWD)
        mail.select('inbox')
        data = mail.search(None, 'ALL')
        mail_ids = data[1]
        id_list = mail_ids[0].split()   
        first_email_id = int(id_list[0])
        latest_email_id = int(id_list[-1])
        for i in range(latest_email_id,first_email_id, -1):
            data = mail.fetch(str(i), '(RFC822)' )
            for response_part in data:
                arr = response_part[0]
                if isinstance(arr, tuple):
                    msg = email.message_from_string(str(arr[1],'utf-8'))
                    email_subject = msg['subject']
                    email_from = msg['from']
                    print('From : ' + email_from + '\n')
                    print('Subject : ' + email_subject + '\n')
    except Exception as e:
        traceback.print_exc() 
        print(str(e))
 read_email_from_gmail()
--- a/src/events/school_time/scraping_excelfile.py
+++ b/src/events/school_time/scraping_excelfile.py
@@ -0,0 +1,45 @@
 import subprocess
 import os
 import tabula
 from dotenv import load_dotenv
 from selenium import webdriver
 from selenium.webdriver.firefox.options import Options
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 load_dotenv()
 LINK_SCHOOL_TIME = os.getenv('LINK_SCHOOL_TIME')
 FILE_DIRECTORY_SCHOOL = os.getenv('FILE_DIRECTORY_SCHOOL')
 options = Options()
 options.add_argument("--headless")
 options.add_argument('--disable-gpu')
 options.add_argument('--disable-software-rasterizer')
 driver = webdriver.Firefox(options=options)
 #url launch
 driver.get(LINK_SCHOOL_TIME)
 #identify link with partial link text
 elems = driver.find_elements(By.XPATH, "/html/body/section[2]/div/div/main/div/div/div/div/div[2]/p[2]/a")
 for elem in elems:
    link = elem.get_attribute("href")
 remove_things_in_front = link.split(FILE_DIRECTORY_SCHOOL, 1)[1]
 print(remove_things_in_front)
 subprocess.run(["wget", link])
 driver.close()
 namefile = remove_things_in_front
 df = tabula.read_pdf(namefile, pages = 'all')[0]
 tabula.convert_into(namefile, "test.csv", output_format="csv", pages='all')
 print(df)
 from pyexcel.cookbook import merge_all_to_a_book
 # import pyexcel.ext.xlsx # no longer required if you use pyexcel >= 0.2.2 
 import glob
 merge_all_to_a_book(glob.glob("*.csv"), "school_time.xlsx")
--- a/src/events/school_time/update_time_school.py
+++ b/src/events/school_time/update_time_school.py
@@ -19,7 +19,7 @@ collection_archive = database["archive-school-time-table"]
 x = collection.delete_many({})
 #using read_excel() method to read our excel file and storing the same in the variable named "df "
-workbook = xl.load_workbook(filename="test.xlsx")
+workbook = xl.load_workbook(filename="school_time.xlsx")
 ws = workbook.active
@@ -63,19 +63,19 @@ for row in range (1, 100):
                            }
                        )
                else:
-                    remove_things_in_front = school_subject.split(' ', 1)[1]
+                    #remove_things_in_front = school_subject.split(' ', 1)[1]
                    find_document_username = list(collection.find({}, {"Date": long_date}))
                    array_username = find_document_username[0]["_id"]
                    collection.update_one(
                        { "_id": ObjectId(array_username)},
                            {
-                                "$push": { "School Subject": str(remove_things_in_front) }
+                                "$push": { "School Subject": school_subject }
                            }
                        )
                    collection_archive.update_one(
                        { "_id": ObjectId(array_username)},
                            {
-                                "$push": { "School Subject": str(remove_things_in_front) }
+                                "$push": { "School Subject": school_subject }
                            }
                        )
@@ -112,4 +112,6 @@ for row in range (1, 100):
                            {
                                "$push": { "Teacher": teacher }
                            }
-                        )
+                        )
 os.remove("school_time.xlsx")