monitor-planning/scrape-new-applications.py

56 lines
1.8 KiB
Python

import os
import sys
from application import Application
from weeklyList import WeeklyList
import sqlite3
from selenium import webdriver
import re
search_past_week = 0
search_num_weeks = 1
reset_table = False
TAG_RE = re.compile(r'<[^>]+>')
if __name__ == '__main__':
try:
with sqlite3.connect("database.db") as connection:
cursor = connection.cursor()
Application.CreateTableIfNotExists(cursor, reset_table)
options = webdriver.ChromeOptions()
options.add_argument('--headless')
with webdriver.Chrome(options=options) as browser:
print("Scrape Weekly List(s)")
weeklyList = WeeklyList(cursor)
for search_week_idx in range(search_past_week, min(search_past_week + search_num_weeks, 9)): # Council only allow latest 9 weeks
weeklyList.scrape(browser, search_week_idx)
print("Number of new decided applications: " + str(len(weeklyList.new_applications)))
print("Number of existing applications: " + str(len(weeklyList.existing_applications)))
print("")
cursor.execute("SELECT reference FROM applications WHERE caseOfficer IS NULL")
newly_decided_applications = cursor.fetchall()
if len(newly_decided_applications) > 0:
print(f"Scrape Newly Decided Applications: {len(newly_decided_applications)}")
for (application_ref, ) in newly_decided_applications:
application = Application(cursor, application_ref)
application.scrape_portal(browser)
print("")
print("Done")
except KeyboardInterrupt:
print('Interrupted')
try:
sys.exit(130)
except SystemExit:
os._exit(130)