|
4 | 4 | "cell_type": "markdown", |
5 | 5 | "metadata": { |
6 | 6 | "application/vnd.databricks.v1+cell": { |
7 | | - "cellMetadata": {}, |
| 7 | + "cellMetadata": { |
| 8 | + "byteLimit": 2048000, |
| 9 | + "rowLimit": 10000 |
| 10 | + }, |
8 | 11 | "inputWidgets": {}, |
9 | 12 | "nuid": "7d1791bd-9552-4116-90b2-daa678bbffb8", |
10 | 13 | "showTitle": false, |
|
21 | 24 | "execution_count": 0, |
22 | 25 | "metadata": { |
23 | 26 | "application/vnd.databricks.v1+cell": { |
24 | | - "cellMetadata": {}, |
| 27 | + "cellMetadata": { |
| 28 | + "byteLimit": 2048000, |
| 29 | + "rowLimit": 10000 |
| 30 | + }, |
25 | 31 | "inputWidgets": {}, |
26 | 32 | "nuid": "2901fbc9-49db-4285-8b94-c460f34d4c4f", |
27 | 33 | "showTitle": false, |
|
40 | 46 | "cell_type": "markdown", |
41 | 47 | "metadata": { |
42 | 48 | "application/vnd.databricks.v1+cell": { |
43 | | - "cellMetadata": {}, |
| 49 | + "cellMetadata": { |
| 50 | + "byteLimit": 2048000, |
| 51 | + "rowLimit": 10000 |
| 52 | + }, |
44 | 53 | "inputWidgets": {}, |
45 | 54 | "nuid": "655120e7-6b25-417c-a08a-c6925feaa425", |
46 | 55 | "showTitle": false, |
|
161 | 170 | "execution_count": 0, |
162 | 171 | "metadata": { |
163 | 172 | "application/vnd.databricks.v1+cell": { |
164 | | - "cellMetadata": {}, |
| 173 | + "cellMetadata": { |
| 174 | + "byteLimit": 2048000, |
| 175 | + "rowLimit": 10000 |
| 176 | + }, |
165 | 177 | "inputWidgets": {}, |
166 | 178 | "nuid": "a2368434-0191-416c-aa1d-12cd44cf48e6", |
167 | 179 | "showTitle": false, |
168 | 180 | "tableResultSettingsMap": {}, |
169 | 181 | "title": "" |
170 | 182 | } |
171 | 183 | }, |
172 | | - "outputs": [], |
| 184 | + "outputs": [ |
| 185 | + { |
| 186 | + "output_type": "stream", |
| 187 | + "name": "stdout", |
| 188 | + "output_type": "stream", |
| 189 | + "text": [ |
| 190 | + "+----------+-------+----------------+\n|student_id| name| student_major|\n+----------+-------+----------------+\n| 1| Alice|Computer Science|\n| 2| Bob|Computer Science|\n| 3|Charlie| Mathematics|\n| 4| David| Mathematics|\n+----------+-------+----------------+\n\n+---------+-----------------+-------+----------------+---------+\n|course_id| name|credits| course_major|mandatory|\n+---------+-----------------+-------+----------------+---------+\n| 101| Algorithms| 3|Computer Science| yes|\n| 102| Data Structures| 3|Computer Science| yes|\n| 103| Calculus| 4| Mathematics| yes|\n| 104| Linear Algebra| 4| Mathematics| yes|\n| 105| Machine Learning| 3|Computer Science| no|\n| 106| Probability| 3| Mathematics| no|\n| 107|Operating Systems| 3|Computer Science| no|\n| 108| Statistics| 3| Mathematics| no|\n+---------+-----------------+-------+----------------+---------+\n\n+----------+---------+-----------+-----+---+\n|student_id|course_id| semester|grade|GPA|\n+----------+---------+-----------+-----+---+\n| 1| 101| Fall 2023| A|4.0|\n| 1| 102|Spring 2023| A|4.0|\n| 1| 105|Spring 2023| A|4.0|\n| 1| 107| Fall 2023| B|3.5|\n| 2| 101| Fall 2023| A|4.0|\n| 2| 102|Spring 2023| B|3.0|\n| 3| 103| Fall 2023| A|4.0|\n| 3| 104|Spring 2023| A|4.0|\n| 3| 106|Spring 2023| A|4.0|\n| 3| 108| Fall 2023| B|3.5|\n| 4| 103| Fall 2023| B|3.0|\n| 4| 104|Spring 2023| B|3.0|\n+----------+---------+-----------+-----+---+\n\n" |
| 191 | + ] |
| 192 | + } |
| 193 | + ], |
173 | 194 | "source": [ |
174 | 195 | "students_data_3188 = [\n", |
175 | 196 | " (1, \"Alice\", \"Computer Science\"),\n", |
|
178 | 199 | " (4, \"David\", \"Mathematics\")\n", |
179 | 200 | "]\n", |
180 | 201 | "\n", |
181 | | - "students_columns_3188 = [\"student_id\", \"name\", \"major\"]\n", |
| 202 | + "students_columns_3188 = [\"student_id\", \"name\", \"student_major\"]\n", |
182 | 203 | "students_df_3188 = spark.createDataFrame(students_data_3188, students_columns_3188)\n", |
183 | 204 | "students_df_3188.show()\n", |
184 | 205 | "\n", |
|
193 | 214 | " (108, \"Statistics\", 3, \"Mathematics\", \"no\")\n", |
194 | 215 | "]\n", |
195 | 216 | "\n", |
196 | | - "courses_columns_3188 = [\"course_id\", \"name\", \"credits\", \"major\", \"mandatory\"]\n", |
| 217 | + "courses_columns_3188 = [\"course_id\", \"name\", \"credits\", \"course_major\", \"mandatory\"]\n", |
197 | 218 | "courses_df_3188 = spark.createDataFrame(courses_data_3188, courses_columns_3188)\n", |
198 | 219 | "courses_df_3188.show()\n", |
199 | 220 | "\n", |
|
216 | 237 | "enrollments_df_3188 = spark.createDataFrame(enrollments_data_3188, enrollments_columns_3188)\n", |
217 | 238 | "enrollments_df_3188.show()\n" |
218 | 239 | ] |
| 240 | + }, |
| 241 | + { |
| 242 | + "cell_type": "code", |
| 243 | + "execution_count": 0, |
| 244 | + "metadata": { |
| 245 | + "application/vnd.databricks.v1+cell": { |
| 246 | + "cellMetadata": { |
| 247 | + "byteLimit": 2048000, |
| 248 | + "rowLimit": 10000 |
| 249 | + }, |
| 250 | + "inputWidgets": {}, |
| 251 | + "nuid": "b9a666b9-1bcd-4c57-afa7-862cd9123f23", |
| 252 | + "showTitle": false, |
| 253 | + "tableResultSettingsMap": {}, |
| 254 | + "title": "" |
| 255 | + } |
| 256 | + }, |
| 257 | + "outputs": [], |
| 258 | + "source": [ |
| 259 | + "enroll_courses_3188 = enrollments_df_3188\\\n", |
| 260 | + " .join(courses_df_3188, \"course_id\")" |
| 261 | + ] |
| 262 | + }, |
| 263 | + { |
| 264 | + "cell_type": "code", |
| 265 | + "execution_count": 0, |
| 266 | + "metadata": { |
| 267 | + "application/vnd.databricks.v1+cell": { |
| 268 | + "cellMetadata": { |
| 269 | + "byteLimit": 2048000, |
| 270 | + "rowLimit": 10000 |
| 271 | + }, |
| 272 | + "inputWidgets": {}, |
| 273 | + "nuid": "5cde8a6b-de4b-4ec2-a236-ea6864ccd7b5", |
| 274 | + "showTitle": false, |
| 275 | + "tableResultSettingsMap": {}, |
| 276 | + "title": "" |
| 277 | + } |
| 278 | + }, |
| 279 | + "outputs": [], |
| 280 | + "source": [ |
| 281 | + "student_courses_3188 = enroll_courses_3188\\\n", |
| 282 | + " .join(students_df_3188, \"student_id\")" |
| 283 | + ] |
| 284 | + }, |
| 285 | + { |
| 286 | + "cell_type": "code", |
| 287 | + "execution_count": 0, |
| 288 | + "metadata": { |
| 289 | + "application/vnd.databricks.v1+cell": { |
| 290 | + "cellMetadata": { |
| 291 | + "byteLimit": 2048000, |
| 292 | + "rowLimit": 10000 |
| 293 | + }, |
| 294 | + "inputWidgets": {}, |
| 295 | + "nuid": "59aefcf7-173d-47f5-ad05-8dcc17c54d3e", |
| 296 | + "showTitle": false, |
| 297 | + "tableResultSettingsMap": {}, |
| 298 | + "title": "" |
| 299 | + } |
| 300 | + }, |
| 301 | + "outputs": [], |
| 302 | + "source": [ |
| 303 | + "student_gpa_3188 = student_courses_3188\\\n", |
| 304 | + " .groupBy(\"student_id\")\\\n", |
| 305 | + " .agg(avg(\"GPA\").alias(\"avg_gpa\"))\\\n", |
| 306 | + " .filter(col(\"avg_gpa\") >= 2.5)" |
| 307 | + ] |
| 308 | + }, |
| 309 | + { |
| 310 | + "cell_type": "code", |
| 311 | + "execution_count": 0, |
| 312 | + "metadata": { |
| 313 | + "application/vnd.databricks.v1+cell": { |
| 314 | + "cellMetadata": { |
| 315 | + "byteLimit": 2048000, |
| 316 | + "rowLimit": 10000 |
| 317 | + }, |
| 318 | + "inputWidgets": {}, |
| 319 | + "nuid": "a80ffbcd-9fd5-4555-886d-9e052f374879", |
| 320 | + "showTitle": false, |
| 321 | + "tableResultSettingsMap": {}, |
| 322 | + "title": "" |
| 323 | + } |
| 324 | + }, |
| 325 | + "outputs": [], |
| 326 | + "source": [ |
| 327 | + "mandatory_courses_3188 = courses_df_3188\\\n", |
| 328 | + " .filter(col(\"mandatory\") == \"yes\")\\\n", |
| 329 | + " .groupBy(\"course_major\")\\\n", |
| 330 | + " .agg(countDistinct(\"course_id\").alias(\"total_mandatory\"))" |
| 331 | + ] |
| 332 | + }, |
| 333 | + { |
| 334 | + "cell_type": "code", |
| 335 | + "execution_count": 0, |
| 336 | + "metadata": { |
| 337 | + "application/vnd.databricks.v1+cell": { |
| 338 | + "cellMetadata": { |
| 339 | + "byteLimit": 2048000, |
| 340 | + "rowLimit": 10000 |
| 341 | + }, |
| 342 | + "inputWidgets": {}, |
| 343 | + "nuid": "5eb887d5-3405-4b02-8bea-93ee8e98a7e2", |
| 344 | + "showTitle": false, |
| 345 | + "tableResultSettingsMap": {}, |
| 346 | + "title": "" |
| 347 | + } |
| 348 | + }, |
| 349 | + "outputs": [], |
| 350 | + "source": [ |
| 351 | + "mandatory_check_3188 = student_courses_3188\\\n", |
| 352 | + " .filter((col(\"mandatory\") == \"yes\") & (col(\"grade\") == \"A\"))\\\n", |
| 353 | + " .groupBy(\"student_id\", \"student_major\")\\\n", |
| 354 | + " .agg(countDistinct(\"course_id\").alias(\"mandatory_A_count\"))" |
| 355 | + ] |
| 356 | + }, |
| 357 | + { |
| 358 | + "cell_type": "code", |
| 359 | + "execution_count": 0, |
| 360 | + "metadata": { |
| 361 | + "application/vnd.databricks.v1+cell": { |
| 362 | + "cellMetadata": { |
| 363 | + "byteLimit": 2048000, |
| 364 | + "rowLimit": 10000 |
| 365 | + }, |
| 366 | + "inputWidgets": {}, |
| 367 | + "nuid": "7e247520-5899-4912-9a36-818d215e19be", |
| 368 | + "showTitle": false, |
| 369 | + "tableResultSettingsMap": {}, |
| 370 | + "title": "" |
| 371 | + } |
| 372 | + }, |
| 373 | + "outputs": [], |
| 374 | + "source": [ |
| 375 | + "mandatory_valid_3188 = mandatory_check_3188\\\n", |
| 376 | + " .join( mandatory_courses_3188, mandatory_check_3188[\"student_major\"] == mandatory_courses_3188[\"course_major\"],\"inner\")\\\n", |
| 377 | + " .filter(col(\"mandatory_A_count\") == col(\"total_mandatory\"))\\\n", |
| 378 | + " .select(col(\"student_id\"), col(\"student_major\"))" |
| 379 | + ] |
| 380 | + }, |
| 381 | + { |
| 382 | + "cell_type": "code", |
| 383 | + "execution_count": 0, |
| 384 | + "metadata": { |
| 385 | + "application/vnd.databricks.v1+cell": { |
| 386 | + "cellMetadata": { |
| 387 | + "byteLimit": 2048000, |
| 388 | + "rowLimit": 10000 |
| 389 | + }, |
| 390 | + "inputWidgets": {}, |
| 391 | + "nuid": "038b95d1-a917-4464-abf8-dc47c91b1f41", |
| 392 | + "showTitle": false, |
| 393 | + "tableResultSettingsMap": {}, |
| 394 | + "title": "" |
| 395 | + } |
| 396 | + }, |
| 397 | + "outputs": [], |
| 398 | + "source": [ |
| 399 | + "elective_check_3188 = student_courses_3188\\\n", |
| 400 | + " .filter((col(\"mandatory\") == \"no\") & (col(\"grade\").isin([\"A\", \"B\"])))\\\n", |
| 401 | + " .groupBy(\"student_id\", \"student_major\")\\\n", |
| 402 | + " .agg(countDistinct(\"course_id\").alias(\"elective_B_count\"))\\\n", |
| 403 | + " .filter(col(\"elective_B_count\") >= 2)" |
| 404 | + ] |
| 405 | + }, |
| 406 | + { |
| 407 | + "cell_type": "code", |
| 408 | + "execution_count": 0, |
| 409 | + "metadata": { |
| 410 | + "application/vnd.databricks.v1+cell": { |
| 411 | + "cellMetadata": { |
| 412 | + "byteLimit": 2048000, |
| 413 | + "rowLimit": 10000 |
| 414 | + }, |
| 415 | + "inputWidgets": {}, |
| 416 | + "nuid": "fe074ff3-37a5-4892-adef-fd244152d274", |
| 417 | + "showTitle": false, |
| 418 | + "tableResultSettingsMap": {}, |
| 419 | + "title": "" |
| 420 | + } |
| 421 | + }, |
| 422 | + "outputs": [ |
| 423 | + { |
| 424 | + "output_type": "display_data", |
| 425 | + "data": { |
| 426 | + "text/html": [ |
| 427 | + "<style scoped>\n", |
| 428 | + " .table-result-container {\n", |
| 429 | + " max-height: 300px;\n", |
| 430 | + " overflow: auto;\n", |
| 431 | + " }\n", |
| 432 | + " table, th, td {\n", |
| 433 | + " border: 1px solid black;\n", |
| 434 | + " border-collapse: collapse;\n", |
| 435 | + " }\n", |
| 436 | + " th, td {\n", |
| 437 | + " padding: 5px;\n", |
| 438 | + " }\n", |
| 439 | + " th {\n", |
| 440 | + " text-align: left;\n", |
| 441 | + " }\n", |
| 442 | + "</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>student_id</th></tr></thead><tbody><tr><td>1</td></tr><tr><td>3</td></tr></tbody></table></div>" |
| 443 | + ] |
| 444 | + }, |
| 445 | + "metadata": { |
| 446 | + "application/vnd.databricks.v1+output": { |
| 447 | + "addedWidgets": {}, |
| 448 | + "aggData": [], |
| 449 | + "aggError": "", |
| 450 | + "aggOverflow": false, |
| 451 | + "aggSchema": [], |
| 452 | + "aggSeriesLimitReached": false, |
| 453 | + "aggType": "", |
| 454 | + "arguments": {}, |
| 455 | + "columnCustomDisplayInfos": {}, |
| 456 | + "data": [ |
| 457 | + [ |
| 458 | + 1 |
| 459 | + ], |
| 460 | + [ |
| 461 | + 3 |
| 462 | + ] |
| 463 | + ], |
| 464 | + "datasetInfos": [], |
| 465 | + "dbfsResultPath": null, |
| 466 | + "isJsonSchema": true, |
| 467 | + "metadata": {}, |
| 468 | + "overflow": false, |
| 469 | + "plotOptions": { |
| 470 | + "customPlotOptions": {}, |
| 471 | + "displayType": "table", |
| 472 | + "pivotAggregation": null, |
| 473 | + "pivotColumns": null, |
| 474 | + "xColumns": null, |
| 475 | + "yColumns": null |
| 476 | + }, |
| 477 | + "removedWidgets": [], |
| 478 | + "schema": [ |
| 479 | + { |
| 480 | + "metadata": "{}", |
| 481 | + "name": "student_id", |
| 482 | + "type": "\"long\"" |
| 483 | + } |
| 484 | + ], |
| 485 | + "type": "table" |
| 486 | + } |
| 487 | + }, |
| 488 | + "output_type": "display_data" |
| 489 | + } |
| 490 | + ], |
| 491 | + "source": [ |
| 492 | + "mandatory_valid_3188.alias(\"mv\")\\\n", |
| 493 | + " .join(\n", |
| 494 | + " elective_check_3188.alias(\"ec\"), \n", |
| 495 | + " (col(\"mv.student_id\") == col(\"ec.student_id\")) & \n", |
| 496 | + " (col(\"mv.student_major\") == col(\"ec.student_major\")), \n", |
| 497 | + " \"inner\")\\\n", |
| 498 | + " .join(student_gpa_3188.alias(\"sg\"), col(\"mv.student_id\") == col(\"sg.student_id\"), \"inner\")\\\n", |
| 499 | + " .select(col(\"mv.student_id\").alias(\"student_id\")).distinct().orderBy(\"student_id\").display()" |
| 500 | + ] |
219 | 501 | } |
220 | 502 | ], |
221 | 503 | "metadata": { |
222 | 504 | "application/vnd.databricks.v1+notebook": { |
223 | | - "computePreferences": null, |
| 505 | + "computePreferences": { |
| 506 | + "hardware": { |
| 507 | + "accelerator": null, |
| 508 | + "gpuPoolId": null, |
| 509 | + "memory": null |
| 510 | + } |
| 511 | + }, |
224 | 512 | "dashboards": [], |
225 | 513 | "environmentMetadata": { |
226 | 514 | "base_environment": "", |
227 | | - "environment_version": "1" |
| 515 | + "environment_version": "2" |
228 | 516 | }, |
229 | 517 | "inputWidgetPreferences": null, |
230 | 518 | "language": "python", |
|
0 commit comments