Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions HW.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import sqlite3
import pandas as pd
import re

def strip_tags(value):
return re.sub(r'<[^>]*>', '', str(value))

con = sqlite3.connect('works.sqlite')
cursor = con.cursor()
cursor.execute('PRAGMA foreign_keys = true')
con.commit()

cursor.execute('DROP TABLE IF EXISTS works')
cursor.execute('CREATE TABLE works ('
'ID INTEGER PRIMARY KEY AUTOINCREMENT,'
'salary INTEGER,'
'educationType TEXT,'
'jobTitle TEXT,'
'qualification TEXT,'
'gender TEXT,'
'dateModify TEXT,'
'skills TEXT,'
'otherInfo TEXT)')

df = pd.read_csv("works.csv")

df['skills'] = df['skills'].apply(strip_tags)
df['otherInfo'] = df['otherInfo'].apply(strip_tags)
df.to_sql("works", con, if_exists='append', index=False)
con.commit()

cursor.execute('DROP TABLE IF EXISTS genders')
cursor.execute('CREATE TABLE genders(genderName TEXT PRIMARY KEY )')
cursor.execute('INSERT INTO genders SELECT DISTINCT gender FROM works WHERE gender IS NOT NULL')
cursor.execute('DROP TABLE IF EXISTS educations')
cursor.execute('CREATE TABLE educations(educationType TEXT PRIMARY KEY )')
cursor.execute('INSERT INTO educations SELECT DISTINCT educationType FROM works WHERE works.educationType IS NOT NULL')
con.commit()

cursor.execute('CREATE TABLE new_works ('
'ID INTEGER PRIMARY KEY AUTOINCREMENT,'
'salary INTEGER,'
'educationType TEXT REFERENCES educations(educationType) ON DELETE CASCADE ON UPDATE CASCADE,'
'jobTitle TEXT,'
'qualification TEXT,'
'gender TEXT REFERENCES genders(genderName) ON DELETE CASCADE ON UPDATE CASCADE,'
'dateModify TEXT,'
'skills TEXT,'
'otherInfo TEXT)')
cursor.execute('INSERT INTO new_works SELECT * FROM works')
cursor.execute('DROP TABLE works')
cursor.execute('ALTER TABLE new_works RENAME TO works')
con.commit()
62 changes: 62 additions & 0 deletions class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#1
con = sqlite3.connect('works.sqlite')
cursor = con.cursor()
cursor.execute('drop table if exists works')
cursor.execute('create table works ('
'ID INTEGER PRIMARY KEY AUTOINCREMENT,'
'salary INTEGER,'
'educationType TEXT,'
'jobTitle TEXT,'
'qualification TEXT,'
'gender TEXT,'
'dateModify TEXT,'
'skills TEXT,'
'otherInfo TEXT)')
con.commit()

df = pd.read_csv("works.csv")
df.to_sql("works", con, if_exists='append', index=False)
con.commit()

#2
cursor.execute('create index salary_index on works (salary)')
con.commit()

#3
cursor.execute('SELECT count(*) FROM works')
print(cursor.fetchall()[0][0])

#4
cursor.execute("SELECT gender, count(*) FROM works GROUP BY works.gender")
print(cursor.fetchall())

#5
cursor.execute("SELECT count(*) FROM works WHERE works.skills IS NOT NULL")
print(cursor.fetchall()[0][0])

#6
cursor.execute("SELECT * FROM works WHERE works.skills IS NOT NULL")
print(cursor.fetchall())

#7
cursor.execute("SELECT salary FROM works WHERE skills LIKE '%Python%'")
print(cursor.fetchall())

#8
cursor.execute("SELECT salary FROM works WHERE gender = 'Мужской'")
m_salary = [i[0] for i in cursor.fetchall()]
cursor.execute("SELECT salary FROM works WHERE gender = 'Женский'")
w_salary = [i[0] for i in cursor.fetchall()]

m_quantile = np.quantile(m_salary, np.linspace(0.1, 1, 10))
w_quantile = np.quantile(w_salary, np.linspace(0.1, 1, 10))
plt.hist(m_quantile, 100, color='blue')
plt.show()
plt.hist(w_quantile, 100, color='red')
plt.show()
con.commit()