diff --git a/.idea/misc.xml b/.idea/misc.xml index 1b334b0..09edde9 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/jobhunter.py b/jobhunter.py index ea9fb15..ebfb0ee 100644 --- a/jobhunter.py +++ b/jobhunter.py @@ -1,8 +1,17 @@ +# Tyler Sabin +# CNE340 Winter Quarter 2024 +# 2/12/2024 +# follow instructions here and on Canvas to complete program +# https://rtc.instructure.com/courses/2439016/assignments/31830474?module_item_id=79735018 +# code below modified by Tyler Sabin and Brian Huang +# https://github.com/profproix/cne340_jobhunter +# 2/7/2024 Ixius Procopios said to revert comments back to original + import mysql.connector import time import json import requests -from datetime import date +import datetime import html2text @@ -21,6 +30,7 @@ def create_tables(cursor): # Python is in latin-1 and error (Incorrect string value: '\xE2\x80\xAFAbi...') will occur if Description is not in unicode format due to the json data cursor.execute('''CREATE TABLE IF NOT EXISTS jobs (id INT PRIMARY KEY auto_increment, Job_id varchar(50) , company varchar (300), Created_at DATE, url varchar(30000), Title LONGBLOB, Description LONGBLOB ); ''') + cursor.execute("ALTER TABLE jobs CHANGE company company VARCHAR(300) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;") # Query the database. @@ -33,32 +43,41 @@ def query_sql(cursor, query): # Add a new job def add_new_job(cursor, jobdetails): # extract all required columns - description = html2text.html2text(jobdetails['description']) + job_posting_id = jobdetails['id'] + company_called = jobdetails['company_name'] date = jobdetails['publication_date'][0:10] - query = cursor.execute("INSERT INTO jobs( Description, Created_at " ") " - "VALUES(%s,%s)", ( description, date)) - # %s is what is needed for Mysqlconnector as SQLite3 uses ? the Mysqlconnector uses %s + web_address = jobdetails['url'] + job_title = jobdetails['title'] + description = html2text.html2text(jobdetails['description']) + query = cursor.execute("INSERT INTO jobs(Job_id, company, Created_at, url, Title, Description) " + "VALUES(%s,%s,%s,%s,%s,%s)", (job_posting_id, company_called, date, web_address, job_title, description)) + # %s is what is needed for Mysqlconnector as SQLite3 uses ? the Mysqlconnector uses %s return query_sql(cursor, query) # Check if new job def check_if_job_exists(cursor, jobdetails): ##Add your code here - query = "UPDATE" + job_posting_id = jobdetails['id'] + query = "SELECT * FROM jobs WHERE Job_id = \"%s\"" % job_posting_id return query_sql(cursor, query) # Deletes job def delete_job(cursor, jobdetails): ##Add your code here - query = "UPDATE" - return query_sql(cursor, query) + # deletes job if over 14 days old + import datetime + job_age = get_date_of_job_posting_vs_current_date(cursor) + if job_age > 14: + job_posting_id = jobdetails['id'] + query = "DELETE FROM jobs WHERE Job_id = \"%s\"" % job_posting_id + return query_sql(cursor, query) # Grab new jobs from a website, Parses JSON code and inserts the data into a list of dictionaries do not need to edit def fetch_new_jobs(): query = requests.get("https://remotive.io/api/remote-jobs") datas = json.loads(query.text) - return datas @@ -71,19 +90,33 @@ def jobhunt(cursor): add_or_delete_job(jobpage, cursor) +def get_date_of_job_posting_vs_current_date(cursor): + # getting the difference between two date objects + import datetime + cursor.execute("SELECT * FROM jobs") + row = cursor.fetchall() + time1 = row[0][3] + time2 = datetime.date.today() + diff = time2 - time1 + job_age = diff.days + return job_age + + def add_or_delete_job(jobpage, cursor): # Add your code here to parse the job page for jobdetails in jobpage['jobs']: # EXTRACTS EACH JOB FROM THE JOB LIST. It errored out until I specified jobs. This is because it needs to look at the jobs dictionary from the API. https://careerkarma.com/blog/python-typeerror-int-object-is-not-iterable/ # Add in your code here to check if the job already exists in the DB check_if_job_exists(cursor, jobdetails) - is_job_found = len( - cursor.fetchall()) > 0 # https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect + is_job_found = len(cursor.fetchall()) > 0 # https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect if is_job_found: + print("job already exists") + delete_job(cursor, jobdetails) else: # INSERT JOB # Add in your code here to notify the user of a new posting. This code will notify the new user - + print("new job found") + add_new_job(cursor, jobdetails) # Setup portion of the program. Take arguments and set up the script @@ -95,9 +128,9 @@ def main(): cursor = conn.cursor() create_tables(cursor) - while (1): # Infinite Loops. Only way to kill it is to crash or manually crash it. We did this as a background process/passive scraper + while True: # Infinite Loops. Only way to kill it is to crash or manually crash it. We did this as a background process/passive scraper jobhunt(cursor) - time.sleep(21600) # Sleep for 1h, this is ran every hour because API or web interfaces have request limits. Your reqest will get blocked. + time.sleep(14400) # Sleep for 1h, this is ran every hour because API or web interfaces have request limits. Your reqest will get blocked. # Sleep does a rough cycle count, system is not entirely accurate