diff --git a/JobHunter/JobHunter.py b/JobHunter/JobHunter.py index 98d5b2f..764500e 100644 --- a/JobHunter/JobHunter.py +++ b/JobHunter/JobHunter.py @@ -1,12 +1,16 @@ # This script pulls from a job website and stores positions into a database. If there is a new posting it notifies the user. -# CNA 330 -# Zachary Rubin, zrubin@rtc.edu +# Fall 2019 CNA 330 +# Emily Billings, ejbillings@student.rtc.edu +# Sources: Robin Cunanan Kevin Huynh (AT&T Dev), rest of sources can be found in README and throughout the code +#I also worked with Sam and Brandon and Simien + import mysql.connector -import sys import json import urllib.request -import os import time +from datetime import datetime +# import os, not needed +# import sys, not needed # Connect to database # You may need to edit the connect function based on your local settings. @@ -18,8 +22,11 @@ def connect_to_sql(): # Create the table structure def create_tables(cursor, table): - ## Add your code here. Starter code below - cursor.execute('''CREATE TABLE IF NOT EXISTS tablename (id INT PRIMARY KEY); ''') + # Creates table + # Must set Description to CHARSET utf8 unicode Source: http://mysql.rjweb.org/doc.php/charcoll + # Python is in latin-1 and error (Incorrect string value: '\xE2\x80\xAFAbi...') will occur if Description is not in unicode format due to the json data + cursor.execute('''CREATE TABLE IF NOT EXISTS jobs (id INT PRIMARY KEY auto_increment, Type varchar(10), Title varchar(100), Description text CHARSET utf8, + Job_id varchar(50), Created_at DATE, Company varchar(100), Location varchar(50), How_to_apply varchar(300)); ''') return # Query the database. @@ -30,36 +37,46 @@ def query_sql(cursor, query): # Add a new job def add_new_job(cursor, jobdetails): - ## Add your code here - query = "INSERT INTO" + # extract all required columns + type = jobdetails['type'] + created_at = time.strptime(jobdetails['created_at'], "%a %b %d %H:%M:%S %Z %Y") # https://www.programiz.com/python-programming/datetime/strftime & https://docs.python.org/3/library/datetime.html + company = jobdetails['company'] + location = jobdetails['location'] + title = jobdetails['title'] + description = jobdetails['description'] + how_to_apply = jobdetails['how_to_apply'] + job_id = jobdetails['id'] + query = cursor.execute("INSERT INTO jobs(Type, Title, Description, Job_id, Created_at, Company, Location, How_to_apply" ") " + "VALUES(%s,%s,%s,%s,%s,%s,%s,%s)", (type, title, description, job_id, created_at, company, location, how_to_apply)) # https://stackoverflow.com/questions/20818155/not-all-parameters-were-used-in-the-sql-statement-python-mysql/20818201#20818201 return query_sql(cursor, query) # Check if new job def check_if_job_exists(cursor, jobdetails): - ## Add your code here - query = "SELECT" + job_id = jobdetails['id'] + query = "SELECT * FROM jobs WHERE Job_id = \"%s\"" % job_id # Help from Kevin Huynh return query_sql(cursor, query) +# Deletes job def delete_job(cursor, jobdetails): - ## Add your code here - query = "UPDATE" + job_id = jobdetails['id'] + query = "DELETE FROM jobs WHERE Job_id = \"%s\"" % job_id # Help from Kevin Huynh & https://www.tutorialspoint.com/mysql/mysql-delete-query.htm return query_sql(cursor, query) -# Grab new jobs from a website +# Grab new jobs from a website, Parses JSON code and inserts the data into a list of dictionaries def fetch_new_jobs(arg_dict): # Code from https://github.com/RTCedu/CNA336/blob/master/Spring2018/Sql.py - query = "https://jobs.github.com/positions.json?" + "location=seattle" ## Add arguments here + query = "https://jobs.github.com/positions.json?location=seattle" # "https://jobs.github.com/positions.json?" + "location=seattle" ## Add arguments here #Use & after seattle to do &description=python&full_time=no this is how to chain jsonpage = 0 try: contents = urllib.request.urlopen(query) - response = contents.read() - jsonpage = json.loads(response) + response = contents.read() # Loads from config file + jsonpage = json.loads(response) # checks database, any jobs that find except: pass return jsonpage -# Load a text-based configuration file -def load_config_file(filename): +# Load a text-based configuration file, not function needed per Zak +"""def load_config_file(filename): argument_dictionary = 0 # Code from https://github.com/RTCedu/CNA336/blob/master/Spring2018/FileIO.py rel_path = os.path.abspath(os.path.dirname(__file__)) @@ -73,35 +90,51 @@ def load_config_file(filename): file = open(filename, "w") file.write("") file.close() - ## Add in information for argument dictionary - return argument_dictionary + return argument_dictionary""" # Main area of the code. -def jobhunt(arg_dict): +def jobhunt(arg_dict, cursor): # Fetch jobs from website - jobpage = fetch_new_jobs(arg_dict) - # print (jobpage) - ## Add your code here to parse the job page - - ## Add in your code here to check if the job already exists in the DB + jobpage = fetch_new_jobs(arg_dict) # Gets github website and holds the json data in it as a list + # use below print statement to view list in json format + # print(jobpage) + add_or_delete_job(jobpage, cursor) - ## Add in your code here to notify the user of a new posting - - ## EXTRA CREDIT: Add your code to delete old entries +def add_or_delete_job(jobpage, cursor): + # Add your code here to parse the job page + for jobdetails in jobpage: # EXTRACTS EACH JOB FROM THE JOB LIST + # Add in your code here to check if the job already exists in the DB + check_if_job_exists(cursor, jobdetails) + is_job_found = len(cursor.fetchall()) > 0 # https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect + if is_job_found: # Help from Kevin Huynh + # DELETE JOB + # EXTRA CREDIT: Add your code to delete old entries + now = datetime.now() + job_date = datetime.strptime(jobdetails['created_at'], "%a %b %d %H:%M:%S %Z %Y") + if (now - job_date).days > 30: # https://stackoverflow.com/questions/46563442/check-if-dates-on-a-list-are-older-than-2-days + print("Delete job: " + jobdetails["title"] + " from " + jobdetails["company"] + ", Created at: " + jobdetails["created_at"] + ", JobID: " + jobdetails['id']) + delete_job(cursor, jobdetails) + else: + # INSERT JOB + # Add in your code here to notify the user of a new posting + print("New job is found: " + jobdetails["title"] + " from " + jobdetails["company"] + ", Created at: " + jobdetails["created_at"] + ", JobID: " + jobdetails['id']) + add_new_job(cursor, jobdetails) # Setup portion of the program. Take arguments and set up the script # You should not need to edit anything here. def main(): + # Important, rest are supporting functions # Connect to SQL and get cursor conn = connect_to_sql() cursor = conn.cursor() create_tables(cursor, "table") # Load text file and store arguments into dictionary - arg_dict = load_config_file(sys.argv[1]) - while(1): - jobhunt(arg_dict) - time.sleep(3600) # Sleep for 1h - + arg_dict = 0 + while(1): # Infinite Loops. Only way to kill it is to crash or manually crash it. We did this as a background process/passive scraper + jobhunt(arg_dict, cursor) # arg_dict is argument dictionary, + time.sleep(3600) # Sleep for 1h, this is ran every hour because API or web interfaces have request limits. Your reqest will get blocked. +# Sleep does a rough cycle count, system is not entirely accurate +# If you want to test if script works change time.sleep() to 10 seconds and delete your table in MySQL if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/JobHunter/README.md b/JobHunter/README.md index e13a9f9..ad0536d 100644 --- a/JobHunter/README.md +++ b/JobHunter/README.md @@ -1,2 +1,37 @@ -# Job Hunter -This is a template for the Job Hunter SQL project \ No newline at end of file +Sources Used for this project, sourced with help from Robin Cunanan + +# CNA330 +Code and Resources for CNA330 Network Databases & SQL + +https://docs.python.org/3/library/json.html +https://realpython.com/python-json/ +**https://www.w3schools.com/python/python_json.asp +https://stackoverflow.com/questions/40247392/inserting-json-object-into-mysql-using-python +https://stackoverflow.com/questions/35407560/attributeerror-dict-object-has-no-attribute-predictors + +useful githubs +https://github.com/ZennyBaff/CNA330/blob/master/JobHunter/JobHunter.py +https://github.com/KaiserTom/CNA330/blob/master/JobHunter/JobHunter.py +https://github.com/billerhard/CNA330/blob/master/JobHunter/JobHunter.py +https://github.com/pdswift/CNA330/blob/master/JobHunter/JobHunter.py +https://github.com/Rushin85/CNA330/blob/master/JobHunter/JobHunter.py + +random stackoverflows +https://stackoverflow.max-everyday.com/2017/09/python-mysql-connector-internalerror-unread-result-found/ +https://stackoverflow.com/questions/44962932/how-to-use-rowcount-in-mysql-using-python +https://linuxconfig.org/how-to-parse-data-from-json-into-python +https://pynative.com/python-mysql-select-query-to-fetch-data/ + +Sources as as 11/10: +https://stackoverflow.com/questions/21639275/python-syntaxerror-non-ascii-character-xe2-in-file +https://stackoverflow.com/questions/46046136/find-out-if-a-date-is-more-than-30-days-old +https://python-forum.io/Thread-MySQL-Insert-Not-all-parameters-were-used-in-the-SQL-statement +https://stackoverflow.com/questions/45650853/how-to-convert-string-with-timezone-to-datetime-in-utc-with-python +https://www.babelstone.co.uk/Unicode/whatisit.html +https://www.tutorialspoint.com/mysql/mysql-delete-query.htm +https://stackoverflow.com/questions/46563442/check-if-dates-on-a-list-are-older-than-2-days +http://mysql.rjweb.org/doc.php/charcoll +https://docs.python.org/3/library/datetime.html +https://www.programiz.com/python-programming/datetime/strftime +https://stackoverflow.com/questions/2511679/python-number-of-rows-affected-by-cursor-executeselect +https://stackoverflow.com/questions/28973453/mysql2error-incorrect-string-value-xe2-x80-xa8-x09 \ No newline at end of file