Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 68 additions & 35 deletions JobHunter/JobHunter.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
# This script pulls from a job website and stores positions into a database. If there is a new posting it notifies the user.
# CNA 330
# Zachary Rubin, zrubin@rtc.edu
# Fall 2019 CNA 330
# Emily Billings, ejbillings@student.rtc.edu
# Sources: Robin Cunanan Kevin Huynh (AT&T Dev), rest of sources can be found in README and throughout the code
#I also worked with Sam and Brandon and Simien

import mysql.connector
import sys
import json
import urllib.request
import os
import time
from datetime import datetime
# import os, not needed
# import sys, not needed

# Connect to database
# You may need to edit the connect function based on your local settings.
Expand All @@ -18,8 +22,11 @@ def connect_to_sql():

# Create the table structure
def create_tables(cursor, table):
## Add your code here. Starter code below
cursor.execute('''CREATE TABLE IF NOT EXISTS tablename (id INT PRIMARY KEY); ''')
# Creates table
# Must set Description to CHARSET utf8 unicode Source: http://mysql.rjweb.org/doc.php/charcoll
# Python is in latin-1 and error (Incorrect string value: '\xE2\x80\xAFAbi...') will occur if Description is not in unicode format due to the json data
cursor.execute('''CREATE TABLE IF NOT EXISTS jobs (id INT PRIMARY KEY auto_increment, Type varchar(10), Title varchar(100), Description text CHARSET utf8,
Job_id varchar(50), Created_at DATE, Company varchar(100), Location varchar(50), How_to_apply varchar(300)); ''')
return

# Query the database.
Expand All @@ -30,36 +37,46 @@ def query_sql(cursor, query):

# Add a new job
def add_new_job(cursor, jobdetails):
## Add your code here
query = "INSERT INTO"
# extract all required columns
type = jobdetails['type']
created_at = time.strptime(jobdetails['created_at'], "%a %b %d %H:%M:%S %Z %Y") # https://www.programiz.com/python-programming/datetime/strftime & https://docs.python.org/3/library/datetime.html
company = jobdetails['company']
location = jobdetails['location']
title = jobdetails['title']
description = jobdetails['description']
how_to_apply = jobdetails['how_to_apply']
job_id = jobdetails['id']
query = cursor.execute("INSERT INTO jobs(Type, Title, Description, Job_id, Created_at, Company, Location, How_to_apply" ") "
"VALUES(%s,%s,%s,%s,%s,%s,%s,%s)", (type, title, description, job_id, created_at, company, location, how_to_apply)) # https://stackoverflow.com/questions/20818155/not-all-parameters-were-used-in-the-sql-statement-python-mysql/20818201#20818201
return query_sql(cursor, query)

# Check if new job
def check_if_job_exists(cursor, jobdetails):
## Add your code here
query = "SELECT"
job_id = jobdetails['id']
query = "SELECT * FROM jobs WHERE Job_id = \"%s\"" % job_id # Help from Kevin Huynh
return query_sql(cursor, query)

# Deletes job
def delete_job(cursor, jobdetails):
## Add your code here
query = "UPDATE"
job_id = jobdetails['id']
query = "DELETE FROM jobs WHERE Job_id = \"%s\"" % job_id # Help from Kevin Huynh & https://www.tutorialspoint.com/mysql/mysql-delete-query.htm
return query_sql(cursor, query)

# Grab new jobs from a website
# Grab new jobs from a website, Parses JSON code and inserts the data into a list of dictionaries
def fetch_new_jobs(arg_dict):
# Code from https://github.com/RTCedu/CNA336/blob/master/Spring2018/Sql.py
query = "https://jobs.github.com/positions.json?" + "location=seattle" ## Add arguments here
query = "https://jobs.github.com/positions.json?location=seattle" # "https://jobs.github.com/positions.json?" + "location=seattle" ## Add arguments here #Use & after seattle to do &description=python&full_time=no this is how to chain
jsonpage = 0
try:
contents = urllib.request.urlopen(query)
response = contents.read()
jsonpage = json.loads(response)
response = contents.read() # Loads from config file
jsonpage = json.loads(response) # checks database, any jobs that find
except:
pass
return jsonpage

# Load a text-based configuration file
def load_config_file(filename):
# Load a text-based configuration file, not function needed per Zak
"""def load_config_file(filename):
argument_dictionary = 0
# Code from https://github.com/RTCedu/CNA336/blob/master/Spring2018/FileIO.py
rel_path = os.path.abspath(os.path.dirname(__file__))
Expand All @@ -73,35 +90,51 @@ def load_config_file(filename):
file = open(filename, "w")
file.write("")
file.close()

## Add in information for argument dictionary
return argument_dictionary
return argument_dictionary"""

# Main area of the code.
def jobhunt(arg_dict):
def jobhunt(arg_dict, cursor):
# Fetch jobs from website
jobpage = fetch_new_jobs(arg_dict)
# print (jobpage)
## Add your code here to parse the job page

## Add in your code here to check if the job already exists in the DB
jobpage = fetch_new_jobs(arg_dict) # Gets github website and holds the json data in it as a list
# use below print statement to view list in json format
# print(jobpage)
add_or_delete_job(jobpage, cursor)

## Add in your code here to notify the user of a new posting

## EXTRA CREDIT: Add your code to delete old entries
def add_or_delete_job(jobpage, cursor):
# Add your code here to parse the job page
for jobdetails in jobpage: # EXTRACTS EACH JOB FROM THE JOB LIST
# Add in your code here to check if the job already exists in the DB
check_if_job_exists(cursor, jobdetails)
is_job_found = len(cursor.fetchall()) > 0 # https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect
if is_job_found: # Help from Kevin Huynh
# DELETE JOB
# EXTRA CREDIT: Add your code to delete old entries
now = datetime.now()
job_date = datetime.strptime(jobdetails['created_at'], "%a %b %d %H:%M:%S %Z %Y")
if (now - job_date).days > 30: # https://stackoverflow.com/questions/46563442/check-if-dates-on-a-list-are-older-than-2-days
print("Delete job: " + jobdetails["title"] + " from " + jobdetails["company"] + ", Created at: " + jobdetails["created_at"] + ", JobID: " + jobdetails['id'])
delete_job(cursor, jobdetails)
else:
# INSERT JOB
# Add in your code here to notify the user of a new posting
print("New job is found: " + jobdetails["title"] + " from " + jobdetails["company"] + ", Created at: " + jobdetails["created_at"] + ", JobID: " + jobdetails['id'])
add_new_job(cursor, jobdetails)

# Setup portion of the program. Take arguments and set up the script
# You should not need to edit anything here.
def main():
# Important, rest are supporting functions
# Connect to SQL and get cursor
conn = connect_to_sql()
cursor = conn.cursor()
create_tables(cursor, "table")
# Load text file and store arguments into dictionary
arg_dict = load_config_file(sys.argv[1])
while(1):
jobhunt(arg_dict)
time.sleep(3600) # Sleep for 1h

arg_dict = 0
while(1): # Infinite Loops. Only way to kill it is to crash or manually crash it. We did this as a background process/passive scraper
jobhunt(arg_dict, cursor) # arg_dict is argument dictionary,
time.sleep(3600) # Sleep for 1h, this is ran every hour because API or web interfaces have request limits. Your reqest will get blocked.
# Sleep does a rough cycle count, system is not entirely accurate
# If you want to test if script works change time.sleep() to 10 seconds and delete your table in MySQL
if __name__ == '__main__':
main()
main()
39 changes: 37 additions & 2 deletions JobHunter/README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,37 @@
# Job Hunter
This is a template for the Job Hunter SQL project
Sources Used for this project, sourced with help from Robin Cunanan

# CNA330
Code and Resources for CNA330 Network Databases & SQL

https://docs.python.org/3/library/json.html
https://realpython.com/python-json/
**https://www.w3schools.com/python/python_json.asp
https://stackoverflow.com/questions/40247392/inserting-json-object-into-mysql-using-python
https://stackoverflow.com/questions/35407560/attributeerror-dict-object-has-no-attribute-predictors

useful githubs
https://github.com/ZennyBaff/CNA330/blob/master/JobHunter/JobHunter.py
https://github.com/KaiserTom/CNA330/blob/master/JobHunter/JobHunter.py
https://github.com/billerhard/CNA330/blob/master/JobHunter/JobHunter.py
https://github.com/pdswift/CNA330/blob/master/JobHunter/JobHunter.py
https://github.com/Rushin85/CNA330/blob/master/JobHunter/JobHunter.py

random stackoverflows
https://stackoverflow.max-everyday.com/2017/09/python-mysql-connector-internalerror-unread-result-found/
https://stackoverflow.com/questions/44962932/how-to-use-rowcount-in-mysql-using-python
https://linuxconfig.org/how-to-parse-data-from-json-into-python
https://pynative.com/python-mysql-select-query-to-fetch-data/

Sources as as 11/10:
https://stackoverflow.com/questions/21639275/python-syntaxerror-non-ascii-character-xe2-in-file
https://stackoverflow.com/questions/46046136/find-out-if-a-date-is-more-than-30-days-old
https://python-forum.io/Thread-MySQL-Insert-Not-all-parameters-were-used-in-the-SQL-statement
https://stackoverflow.com/questions/45650853/how-to-convert-string-with-timezone-to-datetime-in-utc-with-python
https://www.babelstone.co.uk/Unicode/whatisit.html
https://www.tutorialspoint.com/mysql/mysql-delete-query.htm
https://stackoverflow.com/questions/46563442/check-if-dates-on-a-list-are-older-than-2-days
http://mysql.rjweb.org/doc.php/charcoll
https://docs.python.org/3/library/datetime.html
https://www.programiz.com/python-programming/datetime/strftime
https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect
https://stackoverflow.com/questions/28973453/mysql2error-incorrect-string-value-xe2-x80-xa8-x09