-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrequestDistribution.py
More file actions
95 lines (82 loc) · 2.78 KB
/
requestDistribution.py
File metadata and controls
95 lines (82 loc) · 2.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import sys
from urllib.parse import urlparse
import os
def checkResExist(df, request_id):
return request_id in df["request_id"].values
def getInitiatorURL(stack):
if len(stack["callFrames"]) != 0:
return stack["callFrames"][0]["url"]
else:
return getInitiatorURL(stack["parent"])
def extractDigits(lst):
return list(map(lambda el: [el], lst))
def countDistribution(experiment):
df1 = pd.DataFrame(
extractDigits(
os.listdir("" + experiment + "/webpage-crawler-extension/server/output")
),
columns=["website"],
)
fold = "" + experiment + "/webpage-crawler-extension/"
website = {}
for j in df1.index:
if df1["website"][j] not in website:
website[df1["website"][j]] = [0, 0, 0, 0]
# try:
df = pd.read_json(
fold + "server/output/" + df1["website"][j] + "/label_request.json"
)
res = pd.read_json(
fold + "server/output/" + df1["website"][j] + "/responses.json", lines=True
)
for i in df.index:
if checkResExist(res, df["request_id"][i]):
if (
df["easylistflag"][i] == 1
or df["easyprivacylistflag"][i] == 1
or df["ancestorflag"][i] == 1
):
website[df1["website"][j]][0] += 1
else:
website[df1["website"][j]][1] += 1
df = pd.read_json(
"Control/webpage-crawler-extension/server/output/"
+ df1["website"][j]
+ "/label_request.json"
)
for i in df.index:
if (
df["easylistflag"][i] == 1
or df["easyprivacylistflag"][i] == 1
or df["ancestorflag"][i] == 1
):
website[df1["website"][j]][2] += 1
else:
website[df1["website"][j]][3] += 1
# except:
# pass
data = {
"Website": list(website.keys()),
"Tracking": [(item[2] - item[0]) * 100 / item[2] for item in website.values()],
"Functional": [
(item[3] - item[1]) * 100 / item[3] for item in website.values()
],
}
# create a pandas dataframe from the dictionary
df = pd.DataFrame(data)
# Use the melt function to unpivot the dataframe
df = pd.melt(df, id_vars=["Website"], var_name="Function", value_name="Value")
# plotting the bar plot
colors = ["#E11916", "#3FD72D"]
sns.displot(
df, x="Value", hue="Function", multiple="dodge", palette=colors, bins=10
)
plt.show()
plt.savefig("Figures/DistributionPlot.pdf")
def main():
countDistribution(sys.argv[1])
main()