Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 47 additions & 14 deletions jobhunter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# Tyler Sabin
# CNE340 Winter Quarter 2024
# 2/12/2024
# follow instructions here and on Canvas to complete program
# https://rtc.instructure.com/courses/2439016/assignments/31830474?module_item_id=79735018
# code below modified by Tyler Sabin and Brian Huang
# https://github.com/profproix/cne340_jobhunter
# 2/7/2024 Ixius Procopios said to revert comments back to original

import mysql.connector
import time
import json
import requests
from datetime import date
import datetime
import html2text


Expand All @@ -21,6 +30,7 @@ def create_tables(cursor):
# Python is in latin-1 and error (Incorrect string value: '\xE2\x80\xAFAbi...') will occur if Description is not in unicode format due to the json data
cursor.execute('''CREATE TABLE IF NOT EXISTS jobs (id INT PRIMARY KEY auto_increment, Job_id varchar(50) ,
company varchar (300), Created_at DATE, url varchar(30000), Title LONGBLOB, Description LONGBLOB ); ''')
cursor.execute("ALTER TABLE jobs CHANGE company company VARCHAR(300) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;")


# Query the database.
Expand All @@ -33,32 +43,41 @@ def query_sql(cursor, query):
# Add a new job
def add_new_job(cursor, jobdetails):
# extract all required columns
description = html2text.html2text(jobdetails['description'])
job_posting_id = jobdetails['id']
company_called = jobdetails['company_name']
date = jobdetails['publication_date'][0:10]
query = cursor.execute("INSERT INTO jobs( Description, Created_at " ") "
"VALUES(%s,%s)", ( description, date))
# %s is what is needed for Mysqlconnector as SQLite3 uses ? the Mysqlconnector uses %s
web_address = jobdetails['url']
job_title = jobdetails['title']
description = html2text.html2text(jobdetails['description'])
query = cursor.execute("INSERT INTO jobs(Job_id, company, Created_at, url, Title, Description) "
"VALUES(%s,%s,%s,%s,%s,%s)", (job_posting_id, company_called, date, web_address, job_title, description))
# %s is what is needed for Mysqlconnector as SQLite3 uses ? the Mysqlconnector uses %s
return query_sql(cursor, query)


# Check if new job
def check_if_job_exists(cursor, jobdetails):
##Add your code here
query = "UPDATE"
job_posting_id = jobdetails['id']
query = "SELECT * FROM jobs WHERE Job_id = \"%s\"" % job_posting_id
return query_sql(cursor, query)

# Deletes job
def delete_job(cursor, jobdetails):
##Add your code here
query = "UPDATE"
return query_sql(cursor, query)
# deletes job if over 14 days old
import datetime
job_age = get_date_of_job_posting_vs_current_date(cursor)
if job_age > 14:
job_posting_id = jobdetails['id']
query = "DELETE FROM jobs WHERE Job_id = \"%s\"" % job_posting_id
return query_sql(cursor, query)


# Grab new jobs from a website, Parses JSON code and inserts the data into a list of dictionaries do not need to edit
def fetch_new_jobs():
query = requests.get("https://remotive.io/api/remote-jobs")
datas = json.loads(query.text)

return datas


Expand All @@ -71,19 +90,33 @@ def jobhunt(cursor):
add_or_delete_job(jobpage, cursor)


def get_date_of_job_posting_vs_current_date(cursor):
# getting the difference between two date objects
import datetime
cursor.execute("SELECT * FROM jobs")
row = cursor.fetchall()
time1 = row[0][3]
time2 = datetime.date.today()
diff = time2 - time1
job_age = diff.days
return job_age


def add_or_delete_job(jobpage, cursor):
# Add your code here to parse the job page
for jobdetails in jobpage['jobs']: # EXTRACTS EACH JOB FROM THE JOB LIST. It errored out until I specified jobs. This is because it needs to look at the jobs dictionary from the API. https://careerkarma.com/blog/python-typeerror-int-object-is-not-iterable/
# Add in your code here to check if the job already exists in the DB
check_if_job_exists(cursor, jobdetails)
is_job_found = len(
cursor.fetchall()) > 0 # https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect
is_job_found = len(cursor.fetchall()) > 0 # https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect
if is_job_found:
print("job already exists")
delete_job(cursor, jobdetails)

else:
# INSERT JOB
# Add in your code here to notify the user of a new posting. This code will notify the new user

print("new job found")
add_new_job(cursor, jobdetails)


# Setup portion of the program. Take arguments and set up the script
Expand All @@ -95,9 +128,9 @@ def main():
cursor = conn.cursor()
create_tables(cursor)

while (1): # Infinite Loops. Only way to kill it is to crash or manually crash it. We did this as a background process/passive scraper
while True: # Infinite Loops. Only way to kill it is to crash or manually crash it. We did this as a background process/passive scraper
jobhunt(cursor)
time.sleep(21600) # Sleep for 1h, this is ran every hour because API or web interfaces have request limits. Your reqest will get blocked.
time.sleep(14400) # Sleep for 1h, this is ran every hour because API or web interfaces have request limits. Your reqest will get blocked.


# Sleep does a rough cycle count, system is not entirely accurate
Expand Down