From a5043332055b7853f95d5779a4e7e572f679d80b Mon Sep 17 00:00:00 2001 From: tsabin2023 <148308137+tsabin2023@users.noreply.github.com> Date: Sun, 4 Feb 2024 15:45:58 -0800 Subject: [PATCH 1/3] Set it up in 3.11 --- .idea/misc.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.idea/misc.xml b/.idea/misc.xml index 1b334b0..09edde9 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file From fc41cf8127dd757369df29139a9eea2c5098f48b Mon Sep 17 00:00:00 2001 From: tsabin2023 <148308137+tsabin2023@users.noreply.github.com> Date: Sat, 10 Feb 2024 15:21:16 -0800 Subject: [PATCH 2/3] Nearly ready to merge with main --- jobhunter.py | 61 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/jobhunter.py b/jobhunter.py index ea9fb15..cb044c3 100644 --- a/jobhunter.py +++ b/jobhunter.py @@ -1,8 +1,17 @@ +# Tyler Sabin +# CNE340 Winter Quarter 2024 +# 2/10/2024 +# follow instructions here and on Canvas to complete program +# https://rtc.instructure.com/courses/2439016/assignments/31830474?module_item_id=79735018 +# code below modified by Tyler Sabin and Brian Huang +# https://github.com/profproix/cne340_jobhunter +# 2/7/2024 Ixius Procopios said to revert comments back to original + import mysql.connector import time import json import requests -from datetime import date +import datetime import html2text @@ -10,7 +19,7 @@ # You may need to edit the connect function based on your local settings.#I made a password for my database because it is important to do so. Also make sure MySQL server is running or it will not connect def connect_to_sql(): conn = mysql.connector.connect(user='root', password='', - host='127.0.0.1', database='cne340') + host='127.0.0.1', database='cne340test') return conn @@ -21,6 +30,7 @@ def create_tables(cursor): # Python is in latin-1 and error (Incorrect string value: '\xE2\x80\xAFAbi...') will occur if Description is not in unicode format due to the json data cursor.execute('''CREATE TABLE IF NOT EXISTS jobs (id INT PRIMARY KEY auto_increment, Job_id varchar(50) , company varchar (300), Created_at DATE, url varchar(30000), Title LONGBLOB, Description LONGBLOB ); ''') + cursor.execute("ALTER TABLE jobs CHANGE company company VARCHAR(300) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;") # Query the database. @@ -33,10 +43,14 @@ def query_sql(cursor, query): # Add a new job def add_new_job(cursor, jobdetails): # extract all required columns - description = html2text.html2text(jobdetails['description']) + job_posting_id = jobdetails['id'] + company_called = jobdetails['company_name'] date = jobdetails['publication_date'][0:10] - query = cursor.execute("INSERT INTO jobs( Description, Created_at " ") " - "VALUES(%s,%s)", ( description, date)) + web_address = jobdetails['url'] + job_title = jobdetails['title'] + description = html2text.html2text(jobdetails['description']) + query = cursor.execute("INSERT INTO jobs(Job_id, company, Created_at, url, Title, Description) " + "VALUES(%s,%s,%s,%s,%s,%s)", (job_posting_id, company_called, date, web_address, job_title, description)) # %s is what is needed for Mysqlconnector as SQLite3 uses ? the Mysqlconnector uses %s return query_sql(cursor, query) @@ -44,21 +58,24 @@ def add_new_job(cursor, jobdetails): # Check if new job def check_if_job_exists(cursor, jobdetails): ##Add your code here - query = "UPDATE" + job_posting_id = jobdetails['id'] + query = "SELECT * FROM jobs WHERE Job_id = \"%s\"" % job_posting_id return query_sql(cursor, query) # Deletes job def delete_job(cursor, jobdetails): ##Add your code here - query = "UPDATE" - return query_sql(cursor, query) - + import datetime + job_age = get_date_of_job_posting_vs_current_date(cursor) + if job_age > 14: + job_posting_id = jobdetails['id'] + query = "DELETE FROM jobs WHERE Job_id = \"%s\"" % job_posting_id + return query_sql(cursor, query) # Grab new jobs from a website, Parses JSON code and inserts the data into a list of dictionaries do not need to edit def fetch_new_jobs(): query = requests.get("https://remotive.io/api/remote-jobs") datas = json.loads(query.text) - return datas @@ -71,19 +88,33 @@ def jobhunt(cursor): add_or_delete_job(jobpage, cursor) +def get_date_of_job_posting_vs_current_date(cursor): + # getting the difference between two date objects + import datetime + cursor.execute("SELECT * FROM jobs") + row = cursor.fetchall() # [ (1,2,3,4) ] + time1 = row[0][3] + time2 = datetime.date.today() + diff = time2 - time1 + job_age = diff.days + return job_age + + def add_or_delete_job(jobpage, cursor): # Add your code here to parse the job page for jobdetails in jobpage['jobs']: # EXTRACTS EACH JOB FROM THE JOB LIST. It errored out until I specified jobs. This is because it needs to look at the jobs dictionary from the API. https://careerkarma.com/blog/python-typeerror-int-object-is-not-iterable/ # Add in your code here to check if the job already exists in the DB check_if_job_exists(cursor, jobdetails) - is_job_found = len( - cursor.fetchall()) > 0 # https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect + is_job_found = len(cursor.fetchall()) > 0 # https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect if is_job_found: + print("job already exists") # does this meet requirements? + delete_job(cursor, jobdetails) else: # INSERT JOB # Add in your code here to notify the user of a new posting. This code will notify the new user - + print("new job found") + add_new_job(cursor, jobdetails) # Setup portion of the program. Take arguments and set up the script @@ -95,9 +126,9 @@ def main(): cursor = conn.cursor() create_tables(cursor) - while (1): # Infinite Loops. Only way to kill it is to crash or manually crash it. We did this as a background process/passive scraper + while True: # Infinite Loops. Only way to kill it is to crash or manually crash it. We did this as a background process/passive scraper jobhunt(cursor) - time.sleep(21600) # Sleep for 1h, this is ran every hour because API or web interfaces have request limits. Your reqest will get blocked. + time.sleep(14400) # Sleep for 1h, this is ran every hour because API or web interfaces have request limits. Your reqest will get blocked. # Sleep does a rough cycle count, system is not entirely accurate From ac04fcae7a73af2d8962b41cf845208ef21be9b0 Mon Sep 17 00:00:00 2001 From: tsabin2023 <148308137+tsabin2023@users.noreply.github.com> Date: Mon, 12 Feb 2024 14:30:09 -0800 Subject: [PATCH 3/3] Ready for merge --- jobhunter.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/jobhunter.py b/jobhunter.py index cb044c3..ebfb0ee 100644 --- a/jobhunter.py +++ b/jobhunter.py @@ -1,6 +1,6 @@ # Tyler Sabin # CNE340 Winter Quarter 2024 -# 2/10/2024 +# 2/12/2024 # follow instructions here and on Canvas to complete program # https://rtc.instructure.com/courses/2439016/assignments/31830474?module_item_id=79735018 # code below modified by Tyler Sabin and Brian Huang @@ -19,7 +19,7 @@ # You may need to edit the connect function based on your local settings.#I made a password for my database because it is important to do so. Also make sure MySQL server is running or it will not connect def connect_to_sql(): conn = mysql.connector.connect(user='root', password='', - host='127.0.0.1', database='cne340test') + host='127.0.0.1', database='cne340') return conn @@ -51,7 +51,7 @@ def add_new_job(cursor, jobdetails): description = html2text.html2text(jobdetails['description']) query = cursor.execute("INSERT INTO jobs(Job_id, company, Created_at, url, Title, Description) " "VALUES(%s,%s,%s,%s,%s,%s)", (job_posting_id, company_called, date, web_address, job_title, description)) - # %s is what is needed for Mysqlconnector as SQLite3 uses ? the Mysqlconnector uses %s + # %s is what is needed for Mysqlconnector as SQLite3 uses ? the Mysqlconnector uses %s return query_sql(cursor, query) @@ -65,6 +65,7 @@ def check_if_job_exists(cursor, jobdetails): # Deletes job def delete_job(cursor, jobdetails): ##Add your code here + # deletes job if over 14 days old import datetime job_age = get_date_of_job_posting_vs_current_date(cursor) if job_age > 14: @@ -72,6 +73,7 @@ def delete_job(cursor, jobdetails): query = "DELETE FROM jobs WHERE Job_id = \"%s\"" % job_posting_id return query_sql(cursor, query) + # Grab new jobs from a website, Parses JSON code and inserts the data into a list of dictionaries do not need to edit def fetch_new_jobs(): query = requests.get("https://remotive.io/api/remote-jobs") @@ -89,10 +91,10 @@ def jobhunt(cursor): def get_date_of_job_posting_vs_current_date(cursor): - # getting the difference between two date objects + # getting the difference between two date objects import datetime cursor.execute("SELECT * FROM jobs") - row = cursor.fetchall() # [ (1,2,3,4) ] + row = cursor.fetchall() time1 = row[0][3] time2 = datetime.date.today() diff = time2 - time1 @@ -107,7 +109,7 @@ def add_or_delete_job(jobpage, cursor): check_if_job_exists(cursor, jobdetails) is_job_found = len(cursor.fetchall()) > 0 # https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect if is_job_found: - print("job already exists") # does this meet requirements? + print("job already exists") delete_job(cursor, jobdetails) else: