diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..cf9f7e9 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..d56657a --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..b02178e --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/sql.iml b/.idea/sql.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/sql.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/code.py b/code.py new file mode 100644 index 0000000..0212f28 --- /dev/null +++ b/code.py @@ -0,0 +1,87 @@ +import math +import sqlite3 +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import os.path + +connection = sqlite3.connect("task.sqlite") +cursor = connection.cursor() + +cursor.execute("drop table if exists works") + +cursor.execute("create table works(" + "ID INTEGER PRIMARY KEY AUTOINCREMENT," + "salary INTEGER," + "educationType TEXT," + "jobTitle TEXT," + "qualification TEXT," + "gender TEXT," + "dateModify TEXT," + "skills TEXT," + "otherInfo TEXT" + ");") + +connection.commit() + +data = pd.read_csv('works.csv') +data.to_sql('works', connection, if_exists="append", index=None) +connection.commit() +cursor.execute("select * from works limit 5") +print(cursor.fetchall()) +print(os.path.getsize("task.sqlite") / 1024 / 1024, "Mб") +cursor.execute("create index salary_index on works(salary);") +connection.commit() + +cursor.execute("select count(*) from works") +cursor.execute("select count(*) from works where gender='Мужской'") +cursor.execute("select count(*) from works where gender='Женский'") +cursor.execute("select gender,count(*) from works group by gender") +print(cursor.fetchall()) +cursor.execute('select count(*) from works where skills not null') +print(cursor.fetchall()) +cursor.execute('select skills from works where skills not null') +print(cursor.fetchall()) +cursor.execute("select salary from works where skills like '%Python%'") +print(cursor.fetchall()) +print(os.path.getsize("task.sqlite") / 1024 / 1024, "Mб") +cursor.execute("select salary from works where gender = 'Мужской'") +mens_salary = [row[0] for row in cursor.fetchall()] +cursor.execute("select salary from works where gender = 'Женский'") +women_salary = [row[0] for row in cursor.fetchall()] + +percintile = np.linspace(0.1, 1, 10) +q = np.quantile(mens_salary, percintile) +q1 = np.quantile(women_salary, percintile) +print(list(zip(percintile, q))) +print(list(zip(percintile, q1))) + +plt.plot(percintile, q, color="b") +plt.plot(percintile, q1, color="r") +plt.xlabel("Перцентили") +plt.ylabel("Зарплата женщин") +plt.show() + +params = [("Мужской", "Высшее"), ("Мужской", "Незаконченное высшее"), ("Мужской", "Среднее"), + ("Мужской", "Среднее профессиональное"), ("Женский", "Высшее"), ("Женский", "Незаконченное высшее"), + ("Женский", "Среднее"), ("Женский", "Среднее профессиональное")] + +for p in params: + sql_query = f"SELECT salary FROM works WHERE gender = '{p[0]}' and educationType = '{p[1]}'" + salary = [row[0] for row in cursor.execute(sql_query).fetchall()] + plt.hist(salary, bins=100) + plt.title(f"Зарплата {p[0]} с образованием {p[1]}") + plt.show() + +l = pow(10, -10) + +x = np.linspace(0.0 * l, 1 * l, 100) +y = [] +fig, ax = plt.subplots() +for i in x: + temp_x = i + temp_y = 2.0 / l * math.pow(np.sin(3.0 * 3.1415 * i / l), 2) + y.append(temp_y) + +plt.plot(x, y, scalex=True, scaley=True) +plt.show() \ No newline at end of file diff --git a/homework.py b/homework.py new file mode 100644 index 0000000..96c4151 --- /dev/null +++ b/homework.py @@ -0,0 +1,53 @@ +import sqlite3 +import pandas as pd +import re + +con = sqlite3.connect('works.sqlite') +df = pd.read_csv("works.csv") +cursor = con.cursor() + + +def removeTags(field): + return re.sub(r'\<[^>]*\>', '', str(field)) + + +df['skills'] = df['skills'].apply(removeTags) +df['otherInfo'] = df['otherInfo'].apply(removeTags) + +df.to_sql("works", con, if_exists='append', index=False) +con.commit() + +cursor.execute('drop table if exists genders') +cursor.execute('CREATE TABLE genders(' + 'id INTEGER PRIMARY KEY AUTOINCREMENT,' + 'gender TEXT)') +cursor.execute('INSERT INTO genders(gender)' + 'SELECT DISTINCT gender' + 'FROM works WHERE gender IS NOT NULL') +cursor.execute('ALTER TABLE works' + 'ADD COLUMN gender_id INTEGER REFERENCES genders(id)') +cursor.execute('UPDATE works SET gender_id =' + '(SELECT id FROM genders' + 'WHERE gender = works.gender)') +cursor.execute('ALTER TABLE works' + 'DROP COLUMN gender') +con.commit() + +cursor.execute('drop table if exists education') +cursor.execute('CREATE TABLE education' + '(id INTEGER PRIMARY KEY AUTOINCREMENT, ' + 'level_of_edu TEXT)') +cursor.execute('INSERT INTO education(level_of_edu)' + ' SELECT DISTINCT educationType ' + 'FROM works' + ' WHERE educationType IS NOT NULL') +cursor.execute('ALTER TABLE works' + ' ADD COLUMN educationType_id INTEGER REFERENCES education(id)') +cursor.execute('UPDATE works' + ' SET educationType_id =' + ' (SELECT id' + ' FROM education' + ' WHERE level_of_edu = works.educationType)') +cursor.execute('ALTER TABLE works' + ' DROP COLUMN educationType') +con.commit() diff --git a/task.sqlite b/task.sqlite new file mode 100644 index 0000000..7f23e36 Binary files /dev/null and b/task.sqlite differ