Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/dataSources.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/Project_Default.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/sql.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions dz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import sqlite3
import pandas as pd
import re

con = sqlite3.connect('works.sqlite')
df = pd.read_csv("works.csv")
cursor = con.cursor()


def remove_tags(field):
return re.sub(r'\<[^>]*\>', '', str(field))


df['skills'] = df['skills'].apply(remove_tags)
df['otherInfo'] = df['otherInfo'].apply(remove_tags)

df.to_sql("works", con, if_exists='append', index=False)
con.commit()

cursor.execute('drop table if exists genders')
cursor.execute('CREATE TABLE genders('
'id INTEGER PRIMARY KEY AUTOINCREMENT,'
'gender TEXT)')
#cursor.execute('INSERT INTO genders(gender)'
# 'SELECT DISTINCT gender'
# 'FROM works WHERE gender IS NOT NULL')
cursor.execute('ALTER TABLE works'
'ADD COLUMN gender_id INTEGER REFERENCES genders(id)')
cursor.execute('UPDATE works SET gender_id ='
'(SELECT id FROM genders'
'WHERE gender = works.gender)')
cursor.execute('ALTER TABLE works'
'DROP COLUMN gender')
con.commit()

cursor.execute('drop table if exists education')
cursor.execute('CREATE TABLE education'
'(id INTEGER PRIMARY KEY AUTOINCREMENT, '
'level_of_edu TEXT)')
cursor.execute('INSERT INTO education(level_of_edu)'
' SELECT DISTINCT educationType '
'FROM works'
' WHERE educationType IS NOT NULL')
cursor.execute('ALTER TABLE works'
' ADD COLUMN educationType_id INTEGER REFERENCES education(id)')
cursor.execute('UPDATE works'
' SET educationType_id ='
' (SELECT id'
' FROM education'
' WHERE level_of_edu = works.educationType)')
cursor.execute('ALTER TABLE works'
' DROP COLUMN educationType')
con.commit()

61 changes: 61 additions & 0 deletions tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import sqlite3
import pandas as pd
import numpy
import matplotlib.pyplot as plt

# задание 1 и 2
con = sqlite3.connect('works.sqlite')
cursor = con.cursor()
cursor.execute('drop table if exists works')
cursor.execute('create table works ('
'ID INTEGER PRIMARY KEY AUTOINCREMENT,'
'salary INTEGER,'
'educationType TEXT,'
'jobTitle TEXT,'
'qualification TEXT,'
'gender TEXT,'
'dateModify TEXT,'
'skills TEXT,'
'otherInfo TEXT)')
con.commit()

df = pd.read_csv("works.csv")
df.to_sql("works", con, if_exists='append', index=False)
con.commit()

cursor.execute('create index salary_index on works (salary)')
con.commit()

# задания 3 - 7
cursor.execute('SELECT COUNT(*) FROM works')
print(cursor.fetchall()[0][0])

cursor.execute('SELECT COUNT(*) FROM works WHERE gender = "Женский"')
w_salary = [t[0] for t in cursor.fetchall()]
cursor.execute('SELECT COUNT(*) FROM works WHERE gender = "Мужской"')
m_salary = [t[0] for t in cursor.fetchall()]
cursor.execute('SELECT gender, COUNT(*) FROM works GROUP BY gender')
cursor.execute('SELECT skills FROM works WHERE skills NOT NULL')
cursor.execute('SELECT salary FROM works WHERE skills LIKE "%Python%"')

# задания 8 - 9
percentiles = numpy.linspace(.1, 1, 10)

w_salary = numpy.quantile(w_salary, percentiles)
m_salary = numpy.quantile(m_salary, percentiles)

plt.hist(m_salary, bins=100)
plt.show()
plt.hist(w_salary, bins=100)
plt.show()

plt.plot(percentiles, m_salary)
plt.xlabel("Перцентили")
plt.ylabel("Зарплата у мужчин")
plt.show()

plt.plot(percentiles, w_salary)
plt.xlabel("Перцентили")
plt.ylabel("Зарплата у женщин")
plt.show()

Binary file added works.sqlite
Binary file not shown.