-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate.cpp
More file actions
124 lines (122 loc) · 2.54 KB
/
generate.cpp
File metadata and controls
124 lines (122 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#include <iostream>
#include <random>
#include <vector>
#include <algorithm>
std::vector<std::string> data = {
"youtube.com",
"en.wikipedia.org",
"twitter.com",
"facebook.com",
"amazon.com",
"yelp.com",
"reddit.com",
"imdb.com",
"fandom.com",
"pinterest.com",
"tripadvisor.com",
"instagram.com",
"walmart.com",
"craigslist.org",
"ebay.com",
"linkedin.com",
"play.google.com",
"healthline.com",
"etsy.com",
"indeed.com",
"apple.com",
"espn.com",
"webmd.com",
"fb.com",
"nytimes.com",
"google.com",
"cnn.com",
"merriam-webster.com",
"gamepedia.com",
"microsoft.com",
"target.com",
"homedepot.com",
"quora.com",
"nih.gov",
"rottentomatoes.com",
"netflix.com",
"quizlet.com",
"weather.com",
"mapquest.com",
"britannica.com",
"businessinsider.com",
"dictionary.com",
"zillow.com",
"mayoclinic.org",
"bestbuy.com",
"theguardian.com",
"yahoo.com",
"msn.com",
"usatoday.com",
"medicalnewstoday.com",
"urbandictionary.com",
"usnews.com",
"foxnews.com",
"genius.com",
"allrecipes.com",
"spotify.com",
"glassdoor.com",
"forbes.com",
"cnet.com",
"finance.yahoo.com",
"irs.gov",
"lowes.com",
"mail.yahoo.com",
"aol.com",
"steampowered.com",
"washingtonpost.com",
"usps.com",
"office.com",
"retailmenot.com",
"wiktionary.org",
"paypal.com",
"foodnetwork.com",
"hulu.com",
"live.com",
"cbssports.com",
"wayfair.com",
"ca.gov",
"bleacherreport.com",
"macys.com",
"accuweather.com",
"xfinity.com",
"go.com",
"techradar.com",
"groupon.com",
"investopedia.com",
"yellowpages.com",
"steamcommunity.com",
"chase.com",
"wellsfargo.com",
"npr.org",
"apartments.com",
"roblox.com",
"huffpost.com",
"books.google.com",
"bankofamerica.com",
"bbb.org",
"expedia.com",
"wikihow.com",
"ign.com",
"wowhead.com"
};
int main(int argc, char** argv) {
if (argc != 2) {
std::cerr << "There should be 1 argument - number of websites" << std::endl;
return 1;
}
int count = atoi(argv[1]);
if (count > data.size()) {
std::cerr << "There is no possibility to generate such a huge dataset" << std::endl;
return 1;
}
std::shuffle(data.begin(), data.end(), std::mt19937(std::random_device()()));
for (int i = 0; i < count; ++i) {
std::cout << "https://" << data[i] << "\t1" << std::endl;
}
return 0;
}