From ae8f988719252a2e936b82812bf9d10c767ab581 Mon Sep 17 00:00:00 2001 From: Polina Kipriyanova <93437990+2Pirozhochek7@users.noreply.github.com> Date: Sun, 26 Dec 2021 15:13:02 +0500 Subject: [PATCH 1/2] =?UTF-8?q?=D0=9A=D0=B8=D0=BF=D1=80=D0=B8=D1=8F=D0=BD?= =?UTF-8?q?=D0=BE=D0=B2=D0=B0=20=D0=9F=D0=BE=D0=BB=D0=B8=D0=BD=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- task.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ task2.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 task.py create mode 100644 task2.py diff --git a/task.py b/task.py new file mode 100644 index 0000000..1f1ed3a --- /dev/null +++ b/task.py @@ -0,0 +1,59 @@ +import sqlite3 +import pandas as pd +import numpy +import matplotlib.pyplot as plt +# task1 and task2 +con = sqlite3.connect('works.sqlite') +cursor = con.cursor() +cursor.execute('drop table if exists works') +cursor.execute('create table works (' + 'ID INTEGER PRIMARY KEY AUTOINCREMENT,' + 'salary INTEGER,' + 'educationType TEXT,' + 'jobTitle TEXT,' + 'qualification TEXT,' + 'gender TEXT,' + 'dateModify TEXT,' + 'skills TEXT,' + 'otherInfo TEXT)') +con.commit() + +df = pd.read_csv("works.csv") +df.to_sql("works", con, if_exists='append', index=False) +con.commit() + +cursor.execute('create index salary_index on works (salary)') +con.commit() + +#task 3, 4, 5, 6, 7 +cursor.execute('SELECT COUNT(*) FROM works') +print(cursor.fetchall()[0][0]) + +cursor.execute('SELECT COUNT(*) FROM works WHERE gender = "Женский"') +w_salary = [t[0] for t in cursor.fetchall()] +cursor.execute('SELECT COUNT(*) FROM works WHERE gender = "Мужской"') +m_salary = [t[0] for t in cursor.fetchall()] +cursor.execute('SELECT gender, COUNT(*) FROM works GROUP BY gender') +cursor.execute('SELECT skills FROM works WHERE skills NOT NULL') +cursor.execute('SELECT salary FROM works WHERE skills LIKE "%Python%"') + +#tasks 8, 9 +percentiles = numpy.linspace(.1, 1, 10) + +w_salary = numpy.quantile(w_salary, percentiles) +m_salary = numpy.quantile(m_salary, percentiles) + +plt.hist(m_salary, bins=100) +plt.show() +plt.hist(w_salary, bins=100) +plt.show() + +plt.plot(percentiles, m_salary) +plt.xlabel("Перцентили") +plt.ylabel("Зарплата у мужчин") +plt.show() + +plt.plot(percentiles, w_salary) +plt.xlabel("Перцентили") +plt.ylabel("Зарплата у женщин") +plt.show() \ No newline at end of file diff --git a/task2.py b/task2.py new file mode 100644 index 0000000..b8a2b3e --- /dev/null +++ b/task2.py @@ -0,0 +1,52 @@ +import sqlite3 +import pandas as pd +import numpy +import matplotlib.pyplot as plt +import re + +con = sqlite3.connect('works.sqlite') +df = pd.read_csv("works.csv") +cursor = con.cursor() +def clean(field): + return re.sub(r'\<[^>]*\>', '', str(field)) + +df['skills'] = df['skills'].apply(clean) +df['otherInfo'] = df['otherInfo'].apply(clean) + +df.to_sql("works", con, if_exists='append', index=False) +con.commit() + +cursor.execute('drop table if exists genders') +cursor.execute('CREATE TABLE genders(' + 'id INTEGER PRIMARY KEY AUTOINCREMENT,' + 'gender TEXT)') +cursor.execute('INSERT INTO genders(gender)' + 'SELECT DISTINCT gender' + 'FROM works WHERE gender IS NOT NULL') +cursor.execute('ALTER TABLE works' + 'ADD COLUMN gender_id INTEGER REFERENCES genders(id)') +cursor.execute('UPDATE works SET gender_id =' + '(SELECT id FROM genders' + 'WHERE gender = works.gender)') +cursor.execute('ALTER TABLE works' + 'DROP COLUMN gender') +con.commit() + +cursor.execute('drop table if exists education') +cursor.execute('CREATE TABLE education' + '(id INTEGER PRIMARY KEY AUTOINCREMENT, ' + 'level_of_edu TEXT)') +cursor.execute('INSERT INTO education(level_of_edu)' + ' SELECT DISTINCT educationType ' + 'FROM works' + ' WHERE educationType IS NOT NULL') +cursor.execute('ALTER TABLE works' + ' ADD COLUMN educationType_id INTEGER REFERENCES education(id)') +cursor.execute('UPDATE works' + ' SET educationType_id =' + ' (SELECT id' + ' FROM education' + ' WHERE level_of_edu = works.educationType)') +cursor.execute('ALTER TABLE works' + ' DROP COLUMN educationType') +con.commit() \ No newline at end of file From 524f4a16dc47f92561b807657cf5f249801a8341 Mon Sep 17 00:00:00 2001 From: Polina Kipriyanova <93437990+2Pirozhochek7@users.noreply.github.com> Date: Sun, 26 Dec 2021 15:29:42 +0500 Subject: [PATCH 2/2] =?UTF-8?q?Revert=20"=D0=9A=D0=B8=D0=BF=D1=80=D0=B8?= =?UTF-8?q?=D1=8F=D0=BD=D0=BE=D0=B2=D0=B0=20=D0=9F=D0=BE=D0=BB=D0=B8=D0=BD?= =?UTF-8?q?=D0=B0"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit ae8f988719252a2e936b82812bf9d10c767ab581. --- task.py | 59 -------------------------------------------------------- task2.py | 52 ------------------------------------------------- 2 files changed, 111 deletions(-) delete mode 100644 task.py delete mode 100644 task2.py diff --git a/task.py b/task.py deleted file mode 100644 index 1f1ed3a..0000000 --- a/task.py +++ /dev/null @@ -1,59 +0,0 @@ -import sqlite3 -import pandas as pd -import numpy -import matplotlib.pyplot as plt -# task1 and task2 -con = sqlite3.connect('works.sqlite') -cursor = con.cursor() -cursor.execute('drop table if exists works') -cursor.execute('create table works (' - 'ID INTEGER PRIMARY KEY AUTOINCREMENT,' - 'salary INTEGER,' - 'educationType TEXT,' - 'jobTitle TEXT,' - 'qualification TEXT,' - 'gender TEXT,' - 'dateModify TEXT,' - 'skills TEXT,' - 'otherInfo TEXT)') -con.commit() - -df = pd.read_csv("works.csv") -df.to_sql("works", con, if_exists='append', index=False) -con.commit() - -cursor.execute('create index salary_index on works (salary)') -con.commit() - -#task 3, 4, 5, 6, 7 -cursor.execute('SELECT COUNT(*) FROM works') -print(cursor.fetchall()[0][0]) - -cursor.execute('SELECT COUNT(*) FROM works WHERE gender = "Женский"') -w_salary = [t[0] for t in cursor.fetchall()] -cursor.execute('SELECT COUNT(*) FROM works WHERE gender = "Мужской"') -m_salary = [t[0] for t in cursor.fetchall()] -cursor.execute('SELECT gender, COUNT(*) FROM works GROUP BY gender') -cursor.execute('SELECT skills FROM works WHERE skills NOT NULL') -cursor.execute('SELECT salary FROM works WHERE skills LIKE "%Python%"') - -#tasks 8, 9 -percentiles = numpy.linspace(.1, 1, 10) - -w_salary = numpy.quantile(w_salary, percentiles) -m_salary = numpy.quantile(m_salary, percentiles) - -plt.hist(m_salary, bins=100) -plt.show() -plt.hist(w_salary, bins=100) -plt.show() - -plt.plot(percentiles, m_salary) -plt.xlabel("Перцентили") -plt.ylabel("Зарплата у мужчин") -plt.show() - -plt.plot(percentiles, w_salary) -plt.xlabel("Перцентили") -plt.ylabel("Зарплата у женщин") -plt.show() \ No newline at end of file diff --git a/task2.py b/task2.py deleted file mode 100644 index b8a2b3e..0000000 --- a/task2.py +++ /dev/null @@ -1,52 +0,0 @@ -import sqlite3 -import pandas as pd -import numpy -import matplotlib.pyplot as plt -import re - -con = sqlite3.connect('works.sqlite') -df = pd.read_csv("works.csv") -cursor = con.cursor() -def clean(field): - return re.sub(r'\<[^>]*\>', '', str(field)) - -df['skills'] = df['skills'].apply(clean) -df['otherInfo'] = df['otherInfo'].apply(clean) - -df.to_sql("works", con, if_exists='append', index=False) -con.commit() - -cursor.execute('drop table if exists genders') -cursor.execute('CREATE TABLE genders(' - 'id INTEGER PRIMARY KEY AUTOINCREMENT,' - 'gender TEXT)') -cursor.execute('INSERT INTO genders(gender)' - 'SELECT DISTINCT gender' - 'FROM works WHERE gender IS NOT NULL') -cursor.execute('ALTER TABLE works' - 'ADD COLUMN gender_id INTEGER REFERENCES genders(id)') -cursor.execute('UPDATE works SET gender_id =' - '(SELECT id FROM genders' - 'WHERE gender = works.gender)') -cursor.execute('ALTER TABLE works' - 'DROP COLUMN gender') -con.commit() - -cursor.execute('drop table if exists education') -cursor.execute('CREATE TABLE education' - '(id INTEGER PRIMARY KEY AUTOINCREMENT, ' - 'level_of_edu TEXT)') -cursor.execute('INSERT INTO education(level_of_edu)' - ' SELECT DISTINCT educationType ' - 'FROM works' - ' WHERE educationType IS NOT NULL') -cursor.execute('ALTER TABLE works' - ' ADD COLUMN educationType_id INTEGER REFERENCES education(id)') -cursor.execute('UPDATE works' - ' SET educationType_id =' - ' (SELECT id' - ' FROM education' - ' WHERE level_of_edu = works.educationType)') -cursor.execute('ALTER TABLE works' - ' DROP COLUMN educationType') -con.commit() \ No newline at end of file