diff --git a/HW.py b/HW.py new file mode 100644 index 0000000..65e2136 --- /dev/null +++ b/HW.py @@ -0,0 +1,53 @@ +import sqlite3 +import pandas as pd +import re + +def strip_tags(value): + return re.sub(r'<[^>]*>', '', str(value)) + +con = sqlite3.connect('works.sqlite') +cursor = con.cursor() +cursor.execute('PRAGMA foreign_keys = true') +con.commit() + +cursor.execute('DROP TABLE IF EXISTS works') +cursor.execute('CREATE TABLE works (' + 'ID INTEGER PRIMARY KEY AUTOINCREMENT,' + 'salary INTEGER,' + 'educationType TEXT,' + 'jobTitle TEXT,' + 'qualification TEXT,' + 'gender TEXT,' + 'dateModify TEXT,' + 'skills TEXT,' + 'otherInfo TEXT)') + +df = pd.read_csv("works.csv") + +df['skills'] = df['skills'].apply(strip_tags) +df['otherInfo'] = df['otherInfo'].apply(strip_tags) +df.to_sql("works", con, if_exists='append', index=False) +con.commit() + +cursor.execute('DROP TABLE IF EXISTS genders') +cursor.execute('CREATE TABLE genders(genderName TEXT PRIMARY KEY )') +cursor.execute('INSERT INTO genders SELECT DISTINCT gender FROM works WHERE gender IS NOT NULL') +cursor.execute('DROP TABLE IF EXISTS educations') +cursor.execute('CREATE TABLE educations(educationType TEXT PRIMARY KEY )') +cursor.execute('INSERT INTO educations SELECT DISTINCT educationType FROM works WHERE works.educationType IS NOT NULL') +con.commit() + +cursor.execute('CREATE TABLE new_works (' + 'ID INTEGER PRIMARY KEY AUTOINCREMENT,' + 'salary INTEGER,' + 'educationType TEXT REFERENCES educations(educationType) ON DELETE CASCADE ON UPDATE CASCADE,' + 'jobTitle TEXT,' + 'qualification TEXT,' + 'gender TEXT REFERENCES genders(genderName) ON DELETE CASCADE ON UPDATE CASCADE,' + 'dateModify TEXT,' + 'skills TEXT,' + 'otherInfo TEXT)') +cursor.execute('INSERT INTO new_works SELECT * FROM works') +cursor.execute('DROP TABLE works') +cursor.execute('ALTER TABLE new_works RENAME TO works') +con.commit() \ No newline at end of file diff --git a/class.py b/class.py new file mode 100644 index 0000000..baff6d6 --- /dev/null +++ b/class.py @@ -0,0 +1,62 @@ +import sqlite3 +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + +#1 +con = sqlite3.connect('works.sqlite') +cursor = con.cursor() +cursor.execute('drop table if exists works') +cursor.execute('create table works (' + 'ID INTEGER PRIMARY KEY AUTOINCREMENT,' + 'salary INTEGER,' + 'educationType TEXT,' + 'jobTitle TEXT,' + 'qualification TEXT,' + 'gender TEXT,' + 'dateModify TEXT,' + 'skills TEXT,' + 'otherInfo TEXT)') +con.commit() + +df = pd.read_csv("works.csv") +df.to_sql("works", con, if_exists='append', index=False) +con.commit() + +#2 +cursor.execute('create index salary_index on works (salary)') +con.commit() + +#3 +cursor.execute('SELECT count(*) FROM works') +print(cursor.fetchall()[0][0]) + +#4 +cursor.execute("SELECT gender, count(*) FROM works GROUP BY works.gender") +print(cursor.fetchall()) + +#5 +cursor.execute("SELECT count(*) FROM works WHERE works.skills IS NOT NULL") +print(cursor.fetchall()[0][0]) + +#6 +cursor.execute("SELECT * FROM works WHERE works.skills IS NOT NULL") +print(cursor.fetchall()) + +#7 +cursor.execute("SELECT salary FROM works WHERE skills LIKE '%Python%'") +print(cursor.fetchall()) + +#8 +cursor.execute("SELECT salary FROM works WHERE gender = 'Мужской'") +m_salary = [i[0] for i in cursor.fetchall()] +cursor.execute("SELECT salary FROM works WHERE gender = 'Женский'") +w_salary = [i[0] for i in cursor.fetchall()] + +m_quantile = np.quantile(m_salary, np.linspace(0.1, 1, 10)) +w_quantile = np.quantile(w_salary, np.linspace(0.1, 1, 10)) +plt.hist(m_quantile, 100, color='blue') +plt.show() +plt.hist(w_quantile, 100, color='red') +plt.show() +con.commit() \ No newline at end of file