Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions lesson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import sqlite3 as sql
import pandas as pd

# Задача 1
con = sql.connect('works.sqlite')
cursor = con.cursor()
cursor.execute('drop table if exists works')
cursor.execute('create table if not exists works (ID INTEGER PRIMARY KEY AUTOINCREMENT, salary INTEGER, \
educationType TEXT, jobTitle TEXT, qualification TEXT, gender TEXT, dateModify TEXT, \
skills TEXT, otherInfo TEXT)')
# print(cursor.execute("pragma table_info(works)").fetchall())

df = pd.read_csv('works.csv')
df.to_sql('works', con, if_exists="append", index=False)
#print(cursor.execute("select * from works limit 5").fetchall()) +++++++++++++++++++++++++

# Задача 2
# Сейчас размер файла составляет 6.3MB
cursor.execute('create index salary_index on works (salary)')
# Размер файла увеличился до 6.6MB

# Задача 3
# 32683
print(cursor.execute("select count(*) from works").fetchone()[0])

# Задача 4
# Количество мужчин
# 13386
print(cursor.execute("select count(*) from works where gender = 'Мужской'").fetchall()[0][0])
# Количество женщин
# 17910
print(cursor.execute("select count(*) from works where gender = 'Женский'").fetchall()[0][0])

# Задача 5
# У скольки записей заполнены скиллы
# 8972
print(cursor.execute('select count(*) from works where skills not null').fetchall()[0][0])

# Задача 6
# Получить заполненные скиллы
print(cursor.execute('SELECT skills FROM works where skills not null').fetchall())

# Задача 7
# Вывести зарплату только у тех, у кого в скилах есть Python
print(cursor.execute('SELECT salary FROM works where skills LIKE "%Python%"').fetchall())
53 changes: 53 additions & 0 deletions sql_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import sqlite3 as sql
import pandas as pd
import re

con = sql.connect('works.sqlite')
cursor = con.cursor()
cursor.execute('drop table if exists works')
cursor.execute('create table if not exists works (ID INTEGER PRIMARY KEY AUTOINCREMENT, salary INTEGER, \
educationType TEXT, jobTitle TEXT, qualification TEXT, gender TEXT, dateModify TEXT, \
skills TEXT, otherInfo TEXT)')
con.commit()
df = pd.read_csv('works.csv')

def clean(file):
return re.sub(r'\<[^>]*\>', '', str(file))

df.to_sql("works", con, if_exists='append', index=False)
con.commit()

cursor.execute('drop table if exists genders')
cursor.execute('create table genders(id INTEGER PRIMARY KEY AUTOINCREMENT, gender_val TEXT)')
con.commit()

cursor.execute('INSERT INTO genders(gender_val) SELECT DISTINCT gender FROM works WHERE gender IS NOT NULL')
con.commit()

cursor.execute('ALTER TABLE works ADD COLUMN gender_id INTEGER REFERENCES genders(id)')
con.commit()

cursor.execute('UPDATE works SET gender_id = (SELECT id FROM genders WHERE gender_val = works.gender)')
con.commit()

cursor.execute('ALTER TABLE works DROP COLUMN gender')
con.commit()

cursor.execute('drop table if exists education')
cursor.execute('create table education(id INTEGER PRIMARY KEY AUTOINCREMENT, edu_val TEXT)')
con.commit()

cursor.execute('INSERT INTO education(edu_val) SELECT DISTINCT educationType FROM works WHERE educationType IS NOT NULL')
con.commit()

cursor.execute('ALTER TABLE works ADD COLUMN educationType_id INTEGER REFERENCES education(id)')
con.commit()

cursor.execute('UPDATE works SET educationType_id = (SELECT id FROM education WHERE edu_val = works.educationType)')
con.commit()

cursor.execute('ALTER TABLE works DROP COLUMN educationType')
con.commit()

df['skills'] = df['skills'].apply(clean)
df['otherInfo'] = df['otherInfo'].apply(clean)
Loading