Skip to content

Commit f3a0178

Browse files
author
bitoollearner
committed
LeetCode PySpark Solution
1 parent ac6a957 commit f3a0178

10 files changed

+2538
-71
lines changed

Solved/3103. Find Trending Hashtags II (Hard)-(Solved).ipynb

Lines changed: 182 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "7d1791bd-9552-4116-90b2-daa678bbffb8",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "2901fbc9-49db-4285-8b94-c460f34d4c4f",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "655120e7-6b25-417c-a08a-c6925feaa425",
4655
"showTitle": false,
@@ -107,15 +116,27 @@
107116
"execution_count": 0,
108117
"metadata": {
109118
"application/vnd.databricks.v1+cell": {
110-
"cellMetadata": {},
119+
"cellMetadata": {
120+
"byteLimit": 2048000,
121+
"rowLimit": 10000
122+
},
111123
"inputWidgets": {},
112124
"nuid": "a2368434-0191-416c-aa1d-12cd44cf48e6",
113125
"showTitle": false,
114126
"tableResultSettingsMap": {},
115127
"title": ""
116128
}
117129
},
118-
"outputs": [],
130+
"outputs": [
131+
{
132+
"output_type": "stream",
133+
"name": "stdout",
134+
"output_type": "stream",
135+
"text": [
136+
"+-------+--------+--------------------+----------+\n|user_id|tweet_id| tweet|tweet_date|\n+-------+--------+--------------------+----------+\n| 135| 13|Enjoying a great ...|2024-02-01|\n| 136| 14|Another #HappyDay...|2024-02-03|\n| 137| 15|Productivity peak...|2024-02-04|\n| 138| 16|Exploring new tec...|2024-02-04|\n| 139| 17|Gratitude for tod...|2024-02-05|\n| 140| 18|Innovation drives...|2024-02-07|\n| 141| 19|Connecting with n...|2024-02-09|\n+-------+--------+--------------------+----------+\n\n"
137+
]
138+
}
139+
],
119140
"source": [
120141
"tweets_data_3103 = [\n",
121142
" (135, 13, \"Enjoying a great start to the day. #HappyDay #MorningVibes\", \"2024-02-01\"),\n",
@@ -131,15 +152,169 @@
131152
"tweets_df_3103 = spark.createDataFrame(tweets_data_3103, tweets_columns_3103)\n",
132153
"tweets_df_3103.show()"
133154
]
155+
},
156+
{
157+
"cell_type": "code",
158+
"execution_count": 0,
159+
"metadata": {
160+
"application/vnd.databricks.v1+cell": {
161+
"cellMetadata": {
162+
"byteLimit": 2048000,
163+
"rowLimit": 10000
164+
},
165+
"inputWidgets": {},
166+
"nuid": "f0551d7b-8f01-4cab-86ca-a6eea077c1c9",
167+
"showTitle": false,
168+
"tableResultSettingsMap": {},
169+
"title": ""
170+
}
171+
},
172+
"outputs": [],
173+
"source": [
174+
"df_hashtags_3103 = tweets_df_3103\\\n",
175+
" .withColumn( \"hashtags\",\n",
176+
" expr(\"regexp_extract_all(tweet, '(#[A-Za-z0-9_]+)', 0)\")\n",
177+
" )"
178+
]
179+
},
180+
{
181+
"cell_type": "code",
182+
"execution_count": 0,
183+
"metadata": {
184+
"application/vnd.databricks.v1+cell": {
185+
"cellMetadata": {
186+
"byteLimit": 2048000,
187+
"rowLimit": 10000
188+
},
189+
"inputWidgets": {},
190+
"nuid": "eac4937b-1147-49ea-b0db-bd25680c661b",
191+
"showTitle": false,
192+
"tableResultSettingsMap": {},
193+
"title": ""
194+
}
195+
},
196+
"outputs": [],
197+
"source": [
198+
"df_exploded_3103 = df_hashtags_3103\\\n",
199+
" .withColumn(\"hashtag\", explode(\"hashtags\"))"
200+
]
201+
},
202+
{
203+
"cell_type": "code",
204+
"execution_count": 0,
205+
"metadata": {
206+
"application/vnd.databricks.v1+cell": {
207+
"cellMetadata": {
208+
"byteLimit": 2048000,
209+
"rowLimit": 10000
210+
},
211+
"inputWidgets": {},
212+
"nuid": "930e4528-ab1e-4ce5-af1b-ff829423a1d3",
213+
"showTitle": false,
214+
"tableResultSettingsMap": {},
215+
"title": ""
216+
}
217+
},
218+
"outputs": [
219+
{
220+
"output_type": "display_data",
221+
"data": {
222+
"text/html": [
223+
"<style scoped>\n",
224+
" .table-result-container {\n",
225+
" max-height: 300px;\n",
226+
" overflow: auto;\n",
227+
" }\n",
228+
" table, th, td {\n",
229+
" border: 1px solid black;\n",
230+
" border-collapse: collapse;\n",
231+
" }\n",
232+
" th, td {\n",
233+
" padding: 5px;\n",
234+
" }\n",
235+
" th {\n",
236+
" text-align: left;\n",
237+
" }\n",
238+
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>hashtag</th><th>count</th></tr></thead><tbody><tr><td>#HappyDay</td><td>3</td></tr><tr><td>#TechLife</td><td>2</td></tr><tr><td>#WorkLife</td><td>1</td></tr></tbody></table></div>"
239+
]
240+
},
241+
"metadata": {
242+
"application/vnd.databricks.v1+output": {
243+
"addedWidgets": {},
244+
"aggData": [],
245+
"aggError": "",
246+
"aggOverflow": false,
247+
"aggSchema": [],
248+
"aggSeriesLimitReached": false,
249+
"aggType": "",
250+
"arguments": {},
251+
"columnCustomDisplayInfos": {},
252+
"data": [
253+
[
254+
"#HappyDay",
255+
3
256+
],
257+
[
258+
"#TechLife",
259+
2
260+
],
261+
[
262+
"#WorkLife",
263+
1
264+
]
265+
],
266+
"datasetInfos": [],
267+
"dbfsResultPath": null,
268+
"isJsonSchema": true,
269+
"metadata": {},
270+
"overflow": false,
271+
"plotOptions": {
272+
"customPlotOptions": {},
273+
"displayType": "table",
274+
"pivotAggregation": null,
275+
"pivotColumns": null,
276+
"xColumns": null,
277+
"yColumns": null
278+
},
279+
"removedWidgets": [],
280+
"schema": [
281+
{
282+
"metadata": "{}",
283+
"name": "hashtag",
284+
"type": "\"string\""
285+
},
286+
{
287+
"metadata": "{}",
288+
"name": "count",
289+
"type": "\"long\""
290+
}
291+
],
292+
"type": "table"
293+
}
294+
},
295+
"output_type": "display_data"
296+
}
297+
],
298+
"source": [
299+
"df_exploded_3103\\\n",
300+
" .groupBy(\"hashtag\").agg(count(\"*\").alias(\"count\"))\\\n",
301+
" .orderBy(desc(\"count\"), desc(\"hashtag\")).limit(3).display()"
302+
]
134303
}
135304
],
136305
"metadata": {
137306
"application/vnd.databricks.v1+notebook": {
138-
"computePreferences": null,
307+
"computePreferences": {
308+
"hardware": {
309+
"accelerator": null,
310+
"gpuPoolId": null,
311+
"memory": null
312+
}
313+
},
139314
"dashboards": [],
140315
"environmentMetadata": {
141316
"base_environment": "",
142-
"environment_version": "1"
317+
"environment_version": "2"
143318
},
144319
"inputWidgetPreferences": null,
145320
"language": "python",

0 commit comments

Comments
 (0)