diff --git a/hw.py b/hw.py new file mode 100644 index 0000000..8d3aef1 --- /dev/null +++ b/hw.py @@ -0,0 +1,60 @@ +import sqlite3 +import pandas as pd +import re + + +def strip_tags(value): + return re.sub(r'<[^>]*>', '', str(value)) + + +# Создание БД +con = sqlite3.connect('works.sqlite') +cursor = con.cursor() +cursor.execute('PRAGMA foreign_keys = true') +con.commit() + +# Создание и заполнение таблицы works +cursor.execute('DROP TABLE IF EXISTS works') +cursor.execute('CREATE TABLE works (' + 'ID INTEGER PRIMARY KEY AUTOINCREMENT,' + 'salary INTEGER,' + 'educationType TEXT,' + 'jobTitle TEXT,' + 'qualification TEXT,' + 'gender TEXT,' + 'dateModify TEXT,' + 'skills TEXT,' + 'otherInfo TEXT)') + +df = pd.read_csv("works.csv") + +# Очистка от тегов +df['skills'] = df['skills'].apply(strip_tags) +df['otherInfo'] = df['otherInfo'].apply(strip_tags) +df.to_sql("works", con, if_exists='append', index=False) +con.commit() + +# Создание и заполнение словарей genders и educations +cursor.execute('DROP TABLE IF EXISTS genders') +cursor.execute('CREATE TABLE genders(genderName TEXT PRIMARY KEY )') +cursor.execute('INSERT INTO genders SELECT DISTINCT gender FROM works WHERE gender IS NOT NULL') +cursor.execute('DROP TABLE IF EXISTS educations') +cursor.execute('CREATE TABLE educations(educationType TEXT PRIMARY KEY )') +cursor.execute('INSERT INTO educations SELECT DISTINCT educationType FROM works WHERE works.educationType IS NOT NULL') +con.commit() + +# "Обновление" таблицы works с добавлением в нее зависимостей +cursor.execute('CREATE TABLE new_works (' + 'ID INTEGER PRIMARY KEY AUTOINCREMENT,' + 'salary INTEGER,' + 'educationType TEXT REFERENCES educations(educationType) ON DELETE CASCADE ON UPDATE CASCADE,' + 'jobTitle TEXT,' + 'qualification TEXT,' + 'gender TEXT REFERENCES genders(genderName) ON DELETE CASCADE ON UPDATE CASCADE,' + 'dateModify TEXT,' + 'skills TEXT,' + 'otherInfo TEXT)') +cursor.execute('INSERT INTO new_works SELECT * FROM works') +cursor.execute('DROP TABLE works') +cursor.execute('ALTER TABLE new_works RENAME TO works') +con.commit() diff --git a/tasks.py b/tasks.py new file mode 100644 index 0000000..2a65173 --- /dev/null +++ b/tasks.py @@ -0,0 +1,63 @@ +import sqlite3 +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +# Задание 1 +con = sqlite3.connect('works.sqlite') +cursor = con.cursor() +cursor.execute('drop table if exists works') +cursor.execute('create table works (' + 'ID INTEGER PRIMARY KEY AUTOINCREMENT,' + 'salary INTEGER,' + 'educationType TEXT,' + 'jobTitle TEXT,' + 'qualification TEXT,' + 'gender TEXT,' + 'dateModify TEXT,' + 'skills TEXT,' + 'otherInfo TEXT)') +con.commit() + +df = pd.read_csv("works.csv") +df.to_sql("works", con, if_exists='append', index=False) +con.commit() + +# Задание 2 +cursor.execute('create index salary_index on works (salary)') +con.commit() + +# Задание 3 +cursor.execute('SELECT count(*) FROM works') +print(cursor.fetchall()[0][0]) + +# Задание 4 +cursor.execute("SELECT gender, count(*) FROM works GROUP BY works.gender") +print(cursor.fetchall()) + +# Задание 5 +cursor.execute("SELECT count(*) FROM works WHERE works.skills IS NOT NULL") +print(cursor.fetchall()[0][0]) + +# Задание 6 +cursor.execute("SELECT * FROM works WHERE works.skills IS NOT NULL") +print(cursor.fetchall()) + +# Задание 7 +cursor.execute("SELECT salary FROM works WHERE skills LIKE '%Python%'") +print(cursor.fetchall()) + +# Задание 8 +cursor.execute("SELECT salary FROM works WHERE gender = 'Мужской'") +m_salary = [i[0] for i in cursor.fetchall()] +cursor.execute("SELECT salary FROM works WHERE gender = 'Женский'") +w_salary = [i[0] for i in cursor.fetchall()] + +m_quantile = np.quantile(m_salary, np.linspace(0.1, 1, 10)) +w_quantile = np.quantile(w_salary, np.linspace(0.1, 1, 10)) +plt.hist(m_quantile, 100, color='blue') +plt.show() +plt.hist(w_quantile, 100, color='red') +plt.show() +con.commit()