railbotan · Wanderer76 · Dec 24, 2021
diff --git a/homework.py b/homework.py
@@ -0,0 +1,80 @@
+import re
+import sqlite3
+import pandas as pd
+
+
+def createTables():
+    cursor.execute("drop table if exists works")
+    cursor.execute("drop table if exists gender")
+    cursor.execute("drop table if exists education")
+    cursor.execute("create table gender("
+                   "id INTEGER PRIMARY KEY AUTOINCREMENT,"
+                   "gender TEXT"
+                   ");")
+    cursor.execute("create table education("
+                   "id INTEGER primary key autoincrement,"
+                   "educationType TEXT"
+                   ");")
+    cursor.execute("create unique index education_educationType_uindex on education (educationType);")
+    cursor.execute("create unique index gender_gender_uindex on gender (gender);")
+    cursor.execute("create table works("
+                   "ID INTEGER PRIMARY KEY AUTOINCREMENT,"
+                   "salary INTEGER,"
+                   "educationType references education,"
+                   "jobTitle TEXT,"
+                   "qualification TEXT,"
+                   "gender references gender,"
+                   "dateModify TEXT,"
+                   "skills TEXT,"
+                   "otherInfo TEXT"
+                   ");")
+
+    connection.commit()
+
+
+def clearFromHtml(line):
+    if not isinstance(line, str):
+        return line
+    clean = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});')
+    a = re.sub(clean, '', line)
+    return a
+
+
+connection = sqlite3.connect("homework.sqlite")
+cursor = connection.cursor()
+createTables()
+
+data = pd.read_csv('works.csv')
+
+for i in data['gender'].drop_duplicates():
+    cursor.execute(f"insert into gender(gender) values('{i}');")
+
+for i in data['educationType'].drop_duplicates():
+    cursor.execute(f"insert into education (educationType) values ('{i}');")
+
+connection.commit()
+
+index = 1
+for key, values in data.iterrows():
+    cursor.execute(f"select id from education where educationType='{values['educationType']}';")
+    education_id = cursor.fetchone()[0]
+
+    cursor.execute(f"select id from gender where gender='{values['gender']}';")
+    gender_id = cursor.fetchone()[0]
+
+    cursor.execute("insert into works "
+                   "(salary, educationType, jobTitle, qualification, gender, dateModify, skills, otherInfo)"
+                   f"values ("
+                   f"'{values['salary']}',"
+                   f"'{education_id}',"
+                   f"'{values['jobTitle']}',"
+                   f"'{values['qualification']}',"
+                   f"'{gender_id}',"
+                   f"'{values['dateModify']}',"
+                   f"'{clearFromHtml(values['skills'])}',"
+                   f"'{clearFromHtml(values['otherInfo'])}');")
+    connection.commit()
+    print(f"Добавленна {index} строка")
+    index += 1
+
+connection.commit()
diff --git a/main.py b/main.py
@@ -0,0 +1,87 @@
+import math
+import sqlite3
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import os.path
+
+connection = sqlite3.connect("task.sqlite")
+cursor = connection.cursor()
+
+cursor.execute("drop table if exists works")
+
+cursor.execute("create table works("
+               "ID INTEGER PRIMARY KEY AUTOINCREMENT,"
+               "salary INTEGER,"
+               "educationType TEXT,"
+               "jobTitle TEXT,"
+               "qualification TEXT,"
+               "gender TEXT,"
+               "dateModify TEXT,"
+               "skills TEXT,"
+               "otherInfo TEXT"
+               ");")
+
+connection.commit()
+
+data = pd.read_csv('works.csv')
+data.to_sql('works', connection, if_exists="append", index=None)
+connection.commit()
+cursor.execute("select * from works limit 5")
+print(cursor.fetchall())
+print(os.path.getsize("task.sqlite") / 1024 / 1024, "Mб")
+cursor.execute("create index salary_index on works(salary);")
+connection.commit()
+
+cursor.execute("select count(*) from works")
+cursor.execute("select count(*) from works where gender='Мужской'")
+cursor.execute("select count(*) from works where gender='Женский'")
+cursor.execute("select gender,count(*) from works group by gender")
+print(cursor.fetchall())
+cursor.execute('select count(*) from works where skills not null')
+print(cursor.fetchall())
+cursor.execute('select skills from works where skills not null')
+print(cursor.fetchall())
+cursor.execute("select salary from works where skills like '%Python%'")
+print(cursor.fetchall())
+print(os.path.getsize("task.sqlite") / 1024 / 1024, "Mб")
+cursor.execute("select salary from works where gender = 'Мужской'")
+mens_salary = [row[0] for row in cursor.fetchall()]
+cursor.execute("select salary from works where gender = 'Женский'")
+women_salary = [row[0] for row in cursor.fetchall()]
+
+percintile = np.linspace(0.1, 1, 10)
+q = np.quantile(mens_salary, percintile)
+q1 = np.quantile(women_salary, percintile)
+print(list(zip(percintile, q)))
+print(list(zip(percintile, q1)))
+
+plt.plot(percintile, q, color="b")
+plt.plot(percintile, q1, color="r")
+plt.xlabel("Перцентили")
+plt.ylabel("Зарплата женщин")
+plt.show()
+
+params = [("Мужской", "Высшее"), ("Мужской", "Незаконченное высшее"), ("Мужской", "Среднее"),
+          ("Мужской", "Среднее профессиональное"), ("Женский", "Высшее"), ("Женский", "Незаконченное высшее"),
+          ("Женский", "Среднее"), ("Женский", "Среднее профессиональное")]
+
+for p in params:
+    sql_query = f"SELECT salary FROM works WHERE gender = '{p[0]}' and educationType = '{p[1]}'"
+    salary = [row[0] for row in cursor.execute(sql_query).fetchall()]
+    plt.hist(salary, bins=100)
+    plt.title(f"Зарплата {p[0]} с образованием {p[1]}")
+    plt.show()
+
+l = pow(10, -10)
+
+x = np.linspace(0.0 * l, 1 * l, 100)
+y = []
+fig, ax = plt.subplots()
+for i in x:
+    temp_x = i
+    temp_y = 2.0 / l * math.pow(np.sin(3.0 * 3.1415 * i / l), 2)
+    y.append(temp_y)
+
+plt.plot(x, y, scalex=True, scaley=True)
+plt.show()