railbotan · shiriaeva · Dec 25, 2021 · Dec 26, 2021
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/sql.iml b/.idea/sql.iml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/dz.py b/dz.py
@@ -0,0 +1,54 @@
+import sqlite3
+import pandas as pd
+import re
+
+con = sqlite3.connect('works.sqlite')
+df = pd.read_csv("works.csv")
+cursor = con.cursor()
+
+
+def remove_tags(field):
+    return re.sub(r'\<[^>]*\>', '', str(field))
+
+
+df['skills'] = df['skills'].apply(remove_tags)
+df['otherInfo'] = df['otherInfo'].apply(remove_tags)
+
+df.to_sql("works", con, if_exists='append', index=False)
+con.commit()
+
+cursor.execute('drop table if exists genders')
+cursor.execute('CREATE TABLE genders('
+               'id INTEGER PRIMARY KEY AUTOINCREMENT,'
+               'gender TEXT)')
+#cursor.execute('INSERT INTO genders(gender)'
+  #             'SELECT DISTINCT gender'
+     #          'FROM works WHERE gender IS NOT NULL')
+cursor.execute('ALTER TABLE works'
+               'ADD COLUMN gender_id INTEGER REFERENCES genders(id)')
+cursor.execute('UPDATE works SET gender_id ='
+               '(SELECT id FROM genders'
+               'WHERE gender = works.gender)')
+cursor.execute('ALTER TABLE works'
+               'DROP COLUMN gender')
+con.commit()
+
+cursor.execute('drop table if exists education')
+cursor.execute('CREATE TABLE education'
+               '(id INTEGER PRIMARY KEY AUTOINCREMENT, '
+               'level_of_edu TEXT)')
+cursor.execute('INSERT INTO education(level_of_edu)'
+               ' SELECT DISTINCT educationType '
+               'FROM works'
+               ' WHERE educationType IS NOT NULL')
+cursor.execute('ALTER TABLE works'
+               ' ADD COLUMN educationType_id INTEGER REFERENCES education(id)')
+cursor.execute('UPDATE works'
+               ' SET educationType_id ='
+               ' (SELECT id'
+               ' FROM education'
+               ' WHERE level_of_edu = works.educationType)')
+cursor.execute('ALTER TABLE works'
+               ' DROP COLUMN educationType')
+con.commit()
+
diff --git a/tasks.py b/tasks.py
@@ -0,0 +1,61 @@
+import sqlite3
+import pandas as pd
+import numpy
+import matplotlib.pyplot as plt
+
+# задание 1 и 2
+con = sqlite3.connect('works.sqlite')
+cursor = con.cursor()
+cursor.execute('drop table if exists works')
+cursor.execute('create table works ('
+               'ID INTEGER PRIMARY KEY AUTOINCREMENT,'
+               'salary INTEGER,'
+               'educationType TEXT,'
+               'jobTitle TEXT,'
+               'qualification TEXT,'
+               'gender TEXT,'
+               'dateModify TEXT,'
+               'skills TEXT,'
+               'otherInfo TEXT)')
+con.commit()
+
+df = pd.read_csv("works.csv")
+df.to_sql("works", con, if_exists='append', index=False)
+con.commit()
+
+cursor.execute('create index salary_index on works (salary)')
+con.commit()
+
+# задания 3 - 7
+cursor.execute('SELECT COUNT(*) FROM works')
+print(cursor.fetchall()[0][0])
+
+cursor.execute('SELECT COUNT(*) FROM works WHERE gender = "Женский"')
+w_salary = [t[0] for t in cursor.fetchall()]
+cursor.execute('SELECT COUNT(*) FROM works WHERE gender = "Мужской"')
+m_salary = [t[0] for t in cursor.fetchall()]
+cursor.execute('SELECT gender, COUNT(*) FROM works GROUP BY gender')
+cursor.execute('SELECT skills FROM works WHERE skills NOT NULL')
+cursor.execute('SELECT salary FROM works WHERE skills LIKE "%Python%"')
+
+# задания 8 - 9
+percentiles = numpy.linspace(.1, 1, 10)
+
+w_salary = numpy.quantile(w_salary, percentiles)
+m_salary = numpy.quantile(m_salary, percentiles)
+
+plt.hist(m_salary, bins=100)
+plt.show()
+plt.hist(w_salary, bins=100)
+plt.show()
+
+plt.plot(percentiles, m_salary)
+plt.xlabel("Перцентили")
+plt.ylabel("Зарплата у мужчин")
+plt.show()
+
+plt.plot(percentiles, w_salary)
+plt.xlabel("Перцентили")
+plt.ylabel("Зарплата у женщин")
+plt.show()
+
diff --git a/works.sqlite b/works.sqlite