diff --git a/homework.py b/homework.py new file mode 100644 index 0000000..2955c98 --- /dev/null +++ b/homework.py @@ -0,0 +1,70 @@ +import sqlite3 +import pandas as pd +import re + + +def get_out_tags(field): + return re.sub(r'\<[^>]*\>', '', str(field)) + + +con = sqlite3.connect('works.sqlite') +cursor = con.cursor() +cursor.execute('drop table if exists works') +cursor.execute( + 'create table if not exists works (ID INTEGER PRIMARY KEY AUTOINCREMENT,salary INTEGER,educationType TEXT,' + 'jobTitle TEXT,qualification TEXT,gender TEXT,dateModify TEXT,skills TEXT,otherInfo TEXT)') +con.commit() + +# Скиллы и otherInfo +df = pd.read_csv('works.csv') +df['skills'] = df['skills'].apply(get_out_tags) +df['otherInfo'] = df['otherInfo'].apply(get_out_tags) +df.to_sql('works', con, if_exists='append', index=False) +con.commit() + +# ДЗ Пробуем нормализовать базу данных и немного почистить поля. +cursor.execute('drop table if exists genders') +cursor.execute('create table genders(id INTEGER PRIMARY KEY AUTOINCREMENT, gender_val TEXT)') +con.commit() + +cursor.execute('INSERT INTO genders(gender_val) SELECT DISTINCT gender FROM works WHERE gender IS NOT NULL') +con.commit() + +cursor.execute('ALTER TABLE works ADD COLUMN gender_id INTEGER REFERENCES genders(id)') +con.commit() + +cursor.execute('UPDATE works SET gender_id = (SELECT id FROM genders WHERE gender_val = works.gender)') +con.commit() + +cursor.execute('ALTER TABLE works DROP COLUMN gender') +con.commit() + +# Проверка вывода +# cursor.execute('SELECT * FROM genders') +# print(cursor.fetchall()) +# cursor.execute('SELECT gender_val FROM genders,works WHERE genders.id = works.gender_id') +# print(cursor.fetchall()) + + +cursor.execute('drop table if exists education') +cursor.execute('create table education(id INTEGER PRIMARY KEY AUTOINCREMENT, edu_val TEXT)') +con.commit() + +cursor.execute('INSERT INTO education(edu_val) SELECT DISTINCT educationType FROM works WHERE educationType IS NOT NULL') +con.commit() + +cursor.execute('ALTER TABLE works ADD COLUMN educationType_id INTEGER REFERENCES education(id)') +con.commit() + +cursor.execute('UPDATE works SET educationType_id = (SELECT id FROM education WHERE edu_val = works.educationType)') +con.commit() + +cursor.execute('ALTER TABLE works DROP COLUMN educationType') +con.commit() + +# Проверка вывода +# cursor.execute('SELECT * FROM education') +# print(cursor.fetchall()) +# cursor.execute('SELECT edu_val FROM education,works WHERE education.id = works.educationType_id') +# print(cursor.fetchall()) + diff --git a/tasks_on_lesson.py b/tasks_on_lesson.py new file mode 100644 index 0000000..32c97f3 --- /dev/null +++ b/tasks_on_lesson.py @@ -0,0 +1,63 @@ +import sqlite3 +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt + +con = sqlite3.connect('works.sqlite') +cursor = con.cursor() +cursor.execute('drop table if exists works') +# 1 +cursor.execute( + 'create table if not exists works (ID INTEGER PRIMARY KEY AUTOINCREMENT,salary INTEGER,educationType TEXT,' + 'jobTitle TEXT,qualification TEXT,gender TEXT,dateModify TEXT,skills TEXT,otherInfo TEXT)') +con.commit() + +df = pd.read_csv('works.csv') +df.to_sql('works', con, if_exists='append', index=False) +con.commit() + +# 2 6.3 kb +cursor.execute('create index salary_index on works (salary)') +con.commit() +# 6.6 kb + +# 3 +cursor.execute('SELECT COUNT(*) FROM works') +# print(cursor.fetchall()[0][0]) + +# 4 +cursor.execute("SELECT COUNT(*) FROM works WHERE gender = 'Мужской'") +# print(cursor.fetchall()[0][0]) + +cursor.execute("SELECT COUNT(*) FROM works WHERE gender = 'Женский'") +# print(cursor.fetchall()[0][0]) + +# 5 +cursor.execute("SELECT COUNT(*) FROM works WHERE skills NOT NULL") +# print(cursor.fetchall()[0][0]) + +# 6 +cursor.execute("SELECT skills FROM works WHERE skills NOT NULL") +# print(cursor.fetchall()) + +# 7 +cursor.execute("SELECT salary FROM works WHERE skills LIKE '%Python%'") +# print(cursor.fetchall()) + +# 8 +cursor.execute("SELECT salary FROM works WHERE gender = 'Мужской'") +m_salary = [t[0] for t in cursor.fetchall()] +# print(m_salary) + +cursor.execute("SELECT salary FROM works WHERE gender = 'Женский'") +w_salary = [t[0] for t in cursor.fetchall()] +# print(w_salary) + +# 9 +m_salary = np.quantile(m_salary, np.linspace(0.1, 1, 10)) +w_salary = np.quantile(w_salary, np.linspace(0.1, 1, 10)) + +plt.hist(m_salary, bins=100, color='blue') +plt.show() +plt.hist(w_salary, bins=100, color='red') +plt.show() diff --git a/works.sqlite b/works.sqlite new file mode 100644 index 0000000..21f2b36 Binary files /dev/null and b/works.sqlite differ