diff --git a/classwork.py b/classwork.py new file mode 100644 index 0000000..7748222 --- /dev/null +++ b/classwork.py @@ -0,0 +1,66 @@ +import sqlite3 +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + + +con = sqlite3.connect('works.sqlite') +cursor = con.cursor() +cursor.execute('drop table if exists works') +cursor.execute('create table works (' + 'ID INTEGER PRIMARY KEY AUTOINCREMENT,' + 'salary INTEGER,' + 'educationType TEXT,' + 'jobTitle TEXT,' + 'qualification TEXT,' + 'gender TEXT,' + 'dateModify TEXT,' + 'skills TEXT,' + 'otherInfo TEXT)') +con.commit() + +df = pd.read_csv("works.csv") +df.to_sql("works", con, if_exists='append', index=False) +con.commit() + +cursor.execute('create index salary_index on works (salary)') +con.commit() +cursor.execute('SELECT COUNT(*) FROM works') +print(cursor.fetchall()[0][0]) + +cursor.execute("SELECT COUNT(*) FROM works WHERE gender = 'Мужской'") +print(cursor.fetchall()[0][0]) + +cursor.execute("SELECT COUNT(*) FROM works WHERE gender = 'Женский'") +print(cursor.fetchall()[0][0]) + +cursor.execute("SELECT gender, COUNT(*) FROM works group by gender") +print(cursor.fetchall()) + +cursor.execute("SELECT COUNT(*) FROM works WHERE skills NOT NULL") +print(cursor.fetchall()[0][0]) + +cursor.execute("SELECT skills FROM works WHERE skills NOT NULL") +print(cursor.fetchall()) + +cursor.execute("SELECT salary FROM works WHERE skills LIKE '%Python%'") +print(cursor.fetchall()) + + +cursor.execute("SELECT salary FROM works WHERE gender = 'Мужской'") +m_salary = [t[0] for t in cursor.fetchall()] +# print(m_salary) + +cursor.execute("SELECT salary FROM works WHERE gender = 'Женский'") +w_salary = [t[0] for t in cursor.fetchall()] +# print(w_salary) + + +m_salary = np.quantile(m_salary, np.linspace(0.1, 1, 10)) +w_salary = np.quantile(w_salary, np.linspace(0.1, 1, 10)) + +plt.plot(np.linspace(0.1, 1, 10), m_salary) +plt.plot(np.linspace(0.1, 1, 10), w_salary) +plt.xlabel("Перцентили") +plt.ylabel("Зарплата") +plt.show() diff --git a/homework.py b/homework.py new file mode 100644 index 0000000..cededc3 --- /dev/null +++ b/homework.py @@ -0,0 +1,64 @@ +import sqlite3 +import pandas as pd +import re + + +def remove_tags(field): + return re.sub(r'\<[^>]*\>', '', str(field)) + + +con = sqlite3.connect('works.sqlite') +cursor = con.cursor() +cursor.execute('drop table if exists works') +cursor.execute('create table works (' + 'ID INTEGER PRIMARY KEY AUTOINCREMENT,' + 'salary INTEGER,' + 'educationType TEXT,' + 'jobTitle TEXT,' + 'qualification TEXT,' + 'gender TEXT,' + 'dateModify TEXT,' + 'skills TEXT,' + 'otherInfo TEXT)') +con.commit() + +df = pd.read_csv("works.csv") +df['skills'] = df['skills'].apply(remove_tags) +df['otherInfo'] = df['otherInfo'].apply(remove_tags) +df.to_sql("works", con, if_exists='append', index=False) +con.commit() + +cursor.execute('drop table if exists genders') +cursor.execute('create table genders(id INTEGER PRIMARY KEY AUTOINCREMENT, gender_val TEXT)') +con.commit() +cursor.execute('INSERT INTO genders(gender_val) SELECT DISTINCT gender FROM works WHERE gender IS NOT NULL') +con.commit() +cursor.execute('ALTER TABLE works ADD COLUMN gender_id INTEGER REFERENCES genders(id)') +con.commit() +cursor.execute('UPDATE works SET gender_id = (SELECT id FROM genders WHERE gender_val = works.gender)') +con.commit() +cursor.execute('ALTER TABLE works DROP COLUMN gender') +con.commit() + +cursor.execute('SELECT * FROM genders') +print(cursor.fetchall()) +cursor.execute('SELECT gender_val FROM genders,works WHERE genders.id = works.gender_id') +print(cursor.fetchall()) + + +cursor.execute('drop table if exists education') +cursor.execute('create table education(id INTEGER PRIMARY KEY AUTOINCREMENT, edu_val TEXT)') +con.commit() +cursor.execute('INSERT INTO education(edu_val) SELECT DISTINCT educationType FROM works WHERE educationType IS NOT NULL') +con.commit() +cursor.execute('ALTER TABLE works ADD COLUMN educationType_id INTEGER REFERENCES education(id)') +con.commit() +cursor.execute('UPDATE works SET educationType_id = (SELECT id FROM education WHERE edu_val = works.educationType)') +con.commit() +cursor.execute('ALTER TABLE works DROP COLUMN educationType') +con.commit() + +cursor.execute('SELECT * FROM education') +print(cursor.fetchall()) +cursor.execute('SELECT edu_val FROM education,works WHERE education.id = works.educationType_id') +print(cursor.fetchall()) diff --git a/works.sqlite b/works.sqlite new file mode 100644 index 0000000..c5634ee Binary files /dev/null and b/works.sqlite differ