Skip to content

Commit 77e9e9a

Browse files
author
bitoollearner
committed
LeetCode PySpark Solution
1 parent f3a0178 commit 77e9e9a

11 files changed

+2430
-74
lines changed

Solved/3188. Find Top Scoring Students II (Hard)-(Solved).ipynb

Lines changed: 297 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "7d1791bd-9552-4116-90b2-daa678bbffb8",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "2901fbc9-49db-4285-8b94-c460f34d4c4f",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "655120e7-6b25-417c-a08a-c6925feaa425",
4655
"showTitle": false,
@@ -161,15 +170,27 @@
161170
"execution_count": 0,
162171
"metadata": {
163172
"application/vnd.databricks.v1+cell": {
164-
"cellMetadata": {},
173+
"cellMetadata": {
174+
"byteLimit": 2048000,
175+
"rowLimit": 10000
176+
},
165177
"inputWidgets": {},
166178
"nuid": "a2368434-0191-416c-aa1d-12cd44cf48e6",
167179
"showTitle": false,
168180
"tableResultSettingsMap": {},
169181
"title": ""
170182
}
171183
},
172-
"outputs": [],
184+
"outputs": [
185+
{
186+
"output_type": "stream",
187+
"name": "stdout",
188+
"output_type": "stream",
189+
"text": [
190+
"+----------+-------+----------------+\n|student_id| name| student_major|\n+----------+-------+----------------+\n| 1| Alice|Computer Science|\n| 2| Bob|Computer Science|\n| 3|Charlie| Mathematics|\n| 4| David| Mathematics|\n+----------+-------+----------------+\n\n+---------+-----------------+-------+----------------+---------+\n|course_id| name|credits| course_major|mandatory|\n+---------+-----------------+-------+----------------+---------+\n| 101| Algorithms| 3|Computer Science| yes|\n| 102| Data Structures| 3|Computer Science| yes|\n| 103| Calculus| 4| Mathematics| yes|\n| 104| Linear Algebra| 4| Mathematics| yes|\n| 105| Machine Learning| 3|Computer Science| no|\n| 106| Probability| 3| Mathematics| no|\n| 107|Operating Systems| 3|Computer Science| no|\n| 108| Statistics| 3| Mathematics| no|\n+---------+-----------------+-------+----------------+---------+\n\n+----------+---------+-----------+-----+---+\n|student_id|course_id| semester|grade|GPA|\n+----------+---------+-----------+-----+---+\n| 1| 101| Fall 2023| A|4.0|\n| 1| 102|Spring 2023| A|4.0|\n| 1| 105|Spring 2023| A|4.0|\n| 1| 107| Fall 2023| B|3.5|\n| 2| 101| Fall 2023| A|4.0|\n| 2| 102|Spring 2023| B|3.0|\n| 3| 103| Fall 2023| A|4.0|\n| 3| 104|Spring 2023| A|4.0|\n| 3| 106|Spring 2023| A|4.0|\n| 3| 108| Fall 2023| B|3.5|\n| 4| 103| Fall 2023| B|3.0|\n| 4| 104|Spring 2023| B|3.0|\n+----------+---------+-----------+-----+---+\n\n"
191+
]
192+
}
193+
],
173194
"source": [
174195
"students_data_3188 = [\n",
175196
" (1, \"Alice\", \"Computer Science\"),\n",
@@ -178,7 +199,7 @@
178199
" (4, \"David\", \"Mathematics\")\n",
179200
"]\n",
180201
"\n",
181-
"students_columns_3188 = [\"student_id\", \"name\", \"major\"]\n",
202+
"students_columns_3188 = [\"student_id\", \"name\", \"student_major\"]\n",
182203
"students_df_3188 = spark.createDataFrame(students_data_3188, students_columns_3188)\n",
183204
"students_df_3188.show()\n",
184205
"\n",
@@ -193,7 +214,7 @@
193214
" (108, \"Statistics\", 3, \"Mathematics\", \"no\")\n",
194215
"]\n",
195216
"\n",
196-
"courses_columns_3188 = [\"course_id\", \"name\", \"credits\", \"major\", \"mandatory\"]\n",
217+
"courses_columns_3188 = [\"course_id\", \"name\", \"credits\", \"course_major\", \"mandatory\"]\n",
197218
"courses_df_3188 = spark.createDataFrame(courses_data_3188, courses_columns_3188)\n",
198219
"courses_df_3188.show()\n",
199220
"\n",
@@ -216,15 +237,282 @@
216237
"enrollments_df_3188 = spark.createDataFrame(enrollments_data_3188, enrollments_columns_3188)\n",
217238
"enrollments_df_3188.show()\n"
218239
]
240+
},
241+
{
242+
"cell_type": "code",
243+
"execution_count": 0,
244+
"metadata": {
245+
"application/vnd.databricks.v1+cell": {
246+
"cellMetadata": {
247+
"byteLimit": 2048000,
248+
"rowLimit": 10000
249+
},
250+
"inputWidgets": {},
251+
"nuid": "b9a666b9-1bcd-4c57-afa7-862cd9123f23",
252+
"showTitle": false,
253+
"tableResultSettingsMap": {},
254+
"title": ""
255+
}
256+
},
257+
"outputs": [],
258+
"source": [
259+
"enroll_courses_3188 = enrollments_df_3188\\\n",
260+
" .join(courses_df_3188, \"course_id\")"
261+
]
262+
},
263+
{
264+
"cell_type": "code",
265+
"execution_count": 0,
266+
"metadata": {
267+
"application/vnd.databricks.v1+cell": {
268+
"cellMetadata": {
269+
"byteLimit": 2048000,
270+
"rowLimit": 10000
271+
},
272+
"inputWidgets": {},
273+
"nuid": "5cde8a6b-de4b-4ec2-a236-ea6864ccd7b5",
274+
"showTitle": false,
275+
"tableResultSettingsMap": {},
276+
"title": ""
277+
}
278+
},
279+
"outputs": [],
280+
"source": [
281+
"student_courses_3188 = enroll_courses_3188\\\n",
282+
" .join(students_df_3188, \"student_id\")"
283+
]
284+
},
285+
{
286+
"cell_type": "code",
287+
"execution_count": 0,
288+
"metadata": {
289+
"application/vnd.databricks.v1+cell": {
290+
"cellMetadata": {
291+
"byteLimit": 2048000,
292+
"rowLimit": 10000
293+
},
294+
"inputWidgets": {},
295+
"nuid": "59aefcf7-173d-47f5-ad05-8dcc17c54d3e",
296+
"showTitle": false,
297+
"tableResultSettingsMap": {},
298+
"title": ""
299+
}
300+
},
301+
"outputs": [],
302+
"source": [
303+
"student_gpa_3188 = student_courses_3188\\\n",
304+
" .groupBy(\"student_id\")\\\n",
305+
" .agg(avg(\"GPA\").alias(\"avg_gpa\"))\\\n",
306+
" .filter(col(\"avg_gpa\") >= 2.5)"
307+
]
308+
},
309+
{
310+
"cell_type": "code",
311+
"execution_count": 0,
312+
"metadata": {
313+
"application/vnd.databricks.v1+cell": {
314+
"cellMetadata": {
315+
"byteLimit": 2048000,
316+
"rowLimit": 10000
317+
},
318+
"inputWidgets": {},
319+
"nuid": "a80ffbcd-9fd5-4555-886d-9e052f374879",
320+
"showTitle": false,
321+
"tableResultSettingsMap": {},
322+
"title": ""
323+
}
324+
},
325+
"outputs": [],
326+
"source": [
327+
"mandatory_courses_3188 = courses_df_3188\\\n",
328+
" .filter(col(\"mandatory\") == \"yes\")\\\n",
329+
" .groupBy(\"course_major\")\\\n",
330+
" .agg(countDistinct(\"course_id\").alias(\"total_mandatory\"))"
331+
]
332+
},
333+
{
334+
"cell_type": "code",
335+
"execution_count": 0,
336+
"metadata": {
337+
"application/vnd.databricks.v1+cell": {
338+
"cellMetadata": {
339+
"byteLimit": 2048000,
340+
"rowLimit": 10000
341+
},
342+
"inputWidgets": {},
343+
"nuid": "5eb887d5-3405-4b02-8bea-93ee8e98a7e2",
344+
"showTitle": false,
345+
"tableResultSettingsMap": {},
346+
"title": ""
347+
}
348+
},
349+
"outputs": [],
350+
"source": [
351+
"mandatory_check_3188 = student_courses_3188\\\n",
352+
" .filter((col(\"mandatory\") == \"yes\") & (col(\"grade\") == \"A\"))\\\n",
353+
" .groupBy(\"student_id\", \"student_major\")\\\n",
354+
" .agg(countDistinct(\"course_id\").alias(\"mandatory_A_count\"))"
355+
]
356+
},
357+
{
358+
"cell_type": "code",
359+
"execution_count": 0,
360+
"metadata": {
361+
"application/vnd.databricks.v1+cell": {
362+
"cellMetadata": {
363+
"byteLimit": 2048000,
364+
"rowLimit": 10000
365+
},
366+
"inputWidgets": {},
367+
"nuid": "7e247520-5899-4912-9a36-818d215e19be",
368+
"showTitle": false,
369+
"tableResultSettingsMap": {},
370+
"title": ""
371+
}
372+
},
373+
"outputs": [],
374+
"source": [
375+
"mandatory_valid_3188 = mandatory_check_3188\\\n",
376+
" .join( mandatory_courses_3188, mandatory_check_3188[\"student_major\"] == mandatory_courses_3188[\"course_major\"],\"inner\")\\\n",
377+
" .filter(col(\"mandatory_A_count\") == col(\"total_mandatory\"))\\\n",
378+
" .select(col(\"student_id\"), col(\"student_major\"))"
379+
]
380+
},
381+
{
382+
"cell_type": "code",
383+
"execution_count": 0,
384+
"metadata": {
385+
"application/vnd.databricks.v1+cell": {
386+
"cellMetadata": {
387+
"byteLimit": 2048000,
388+
"rowLimit": 10000
389+
},
390+
"inputWidgets": {},
391+
"nuid": "038b95d1-a917-4464-abf8-dc47c91b1f41",
392+
"showTitle": false,
393+
"tableResultSettingsMap": {},
394+
"title": ""
395+
}
396+
},
397+
"outputs": [],
398+
"source": [
399+
"elective_check_3188 = student_courses_3188\\\n",
400+
" .filter((col(\"mandatory\") == \"no\") & (col(\"grade\").isin([\"A\", \"B\"])))\\\n",
401+
" .groupBy(\"student_id\", \"student_major\")\\\n",
402+
" .agg(countDistinct(\"course_id\").alias(\"elective_B_count\"))\\\n",
403+
" .filter(col(\"elective_B_count\") >= 2)"
404+
]
405+
},
406+
{
407+
"cell_type": "code",
408+
"execution_count": 0,
409+
"metadata": {
410+
"application/vnd.databricks.v1+cell": {
411+
"cellMetadata": {
412+
"byteLimit": 2048000,
413+
"rowLimit": 10000
414+
},
415+
"inputWidgets": {},
416+
"nuid": "fe074ff3-37a5-4892-adef-fd244152d274",
417+
"showTitle": false,
418+
"tableResultSettingsMap": {},
419+
"title": ""
420+
}
421+
},
422+
"outputs": [
423+
{
424+
"output_type": "display_data",
425+
"data": {
426+
"text/html": [
427+
"<style scoped>\n",
428+
" .table-result-container {\n",
429+
" max-height: 300px;\n",
430+
" overflow: auto;\n",
431+
" }\n",
432+
" table, th, td {\n",
433+
" border: 1px solid black;\n",
434+
" border-collapse: collapse;\n",
435+
" }\n",
436+
" th, td {\n",
437+
" padding: 5px;\n",
438+
" }\n",
439+
" th {\n",
440+
" text-align: left;\n",
441+
" }\n",
442+
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>student_id</th></tr></thead><tbody><tr><td>1</td></tr><tr><td>3</td></tr></tbody></table></div>"
443+
]
444+
},
445+
"metadata": {
446+
"application/vnd.databricks.v1+output": {
447+
"addedWidgets": {},
448+
"aggData": [],
449+
"aggError": "",
450+
"aggOverflow": false,
451+
"aggSchema": [],
452+
"aggSeriesLimitReached": false,
453+
"aggType": "",
454+
"arguments": {},
455+
"columnCustomDisplayInfos": {},
456+
"data": [
457+
[
458+
1
459+
],
460+
[
461+
3
462+
]
463+
],
464+
"datasetInfos": [],
465+
"dbfsResultPath": null,
466+
"isJsonSchema": true,
467+
"metadata": {},
468+
"overflow": false,
469+
"plotOptions": {
470+
"customPlotOptions": {},
471+
"displayType": "table",
472+
"pivotAggregation": null,
473+
"pivotColumns": null,
474+
"xColumns": null,
475+
"yColumns": null
476+
},
477+
"removedWidgets": [],
478+
"schema": [
479+
{
480+
"metadata": "{}",
481+
"name": "student_id",
482+
"type": "\"long\""
483+
}
484+
],
485+
"type": "table"
486+
}
487+
},
488+
"output_type": "display_data"
489+
}
490+
],
491+
"source": [
492+
"mandatory_valid_3188.alias(\"mv\")\\\n",
493+
" .join(\n",
494+
" elective_check_3188.alias(\"ec\"), \n",
495+
" (col(\"mv.student_id\") == col(\"ec.student_id\")) & \n",
496+
" (col(\"mv.student_major\") == col(\"ec.student_major\")), \n",
497+
" \"inner\")\\\n",
498+
" .join(student_gpa_3188.alias(\"sg\"), col(\"mv.student_id\") == col(\"sg.student_id\"), \"inner\")\\\n",
499+
" .select(col(\"mv.student_id\").alias(\"student_id\")).distinct().orderBy(\"student_id\").display()"
500+
]
219501
}
220502
],
221503
"metadata": {
222504
"application/vnd.databricks.v1+notebook": {
223-
"computePreferences": null,
505+
"computePreferences": {
506+
"hardware": {
507+
"accelerator": null,
508+
"gpuPoolId": null,
509+
"memory": null
510+
}
511+
},
224512
"dashboards": [],
225513
"environmentMetadata": {
226514
"base_environment": "",
227-
"environment_version": "1"
515+
"environment_version": "2"
228516
},
229517
"inputWidgetPreferences": null,
230518
"language": "python",

0 commit comments

Comments
 (0)