diff --git a/.gitignore b/.gitignore deleted file mode 100644 index b6e4761..0000000 --- a/.gitignore +++ /dev/null @@ -1,129 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ diff --git a/06 - Modelo_ALS.ipynb b/06 - Modelo_ALS.ipynb deleted file mode 100644 index cc9e2f0..0000000 --- a/06 - Modelo_ALS.ipynb +++ /dev/null @@ -1,464 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "Modelo ALS", - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "code", - "metadata": { - "id": "-CaeYO4F-OPi" - }, - "source": [ - "# instalar as dependências\n", - "!apt-get install openjdk-8-jdk-headless -qq > /dev/null\n", - "!wget -q https://archive.apache.org/dist/spark/spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz\n", - "!tar xf spark-2.4.4-bin-hadoop2.7.tgz\n", - "!pip install -q findspark" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "sOGzt4aB-fjg" - }, - "source": [ - "# configurar as variáveis de ambiente\n", - "import os\n", - "os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n", - "os.environ[\"SPARK_HOME\"] = \"/content/spark-2.4.4-bin-hadoop2.7\"\n", - "\n", - "# tornar o pyspark \"importável\"\n", - "import findspark\n", - "findspark.init('spark-2.4.4-bin-hadoop2.7')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "uIfWkewv-mXF" - }, - "source": [ - "from __future__ import print_function\n", - "\n", - "import sys\n", - "if sys.version >= '3':\n", - " long = int\n", - "\n", - "from pyspark.sql import SparkSession\n", - "\n", - "from pyspark.ml.evaluation import RegressionEvaluator #evaluation é a biblioteca para verificação da qualidade do modelo\n", - "from pyspark.ml.recommendation import ALS # ALS é o modelo de recomendação que será utilizadp\n", - "from pyspark.sql import Row #row é o formato que o ALS trabalha, row conterá o id do usuario, id filme, nota e timestamp" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "o_MjmoGl_AaX" - }, - "source": [ - "spark = SparkSession.builder.master('local[*]').getOrCreate() #criar/iniciar a sessão spark" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "hbVUD2aqg8Gk" - }, - "source": [ - "lines = spark.read.text(\"sample_movielens_ratings.txt\").rdd #Carregar os dados. RDD é uma estrutura paralelizada do spark" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "eqqx9-H1_WOX" - }, - "source": [ - "parts = lines.map(lambda row: row.value.split(\"::\")) #pega os itens de lines e aplica map para quebrar em partes\n", - "#fez expressão lambda, nomeou cada linha como row e quebra cada row a cada \"::\" retorna um array com 4 itens" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "pWwOX1o4DrlY" - }, - "source": [ - "#ratingsRDD: pega cada parte do item acima e converte para formato Row, instanciando nome e posição\n", - "ratingsRDD = parts.map(lambda p: Row(userId=int(p[0]), movieId=int(p[1]), rating=float(p[2]), timestamp=long(p[3])))" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "W5T4NAi1nS2S" - }, - "source": [ - "ratings = spark.createDataFrame(ratingsRDD) #pega ratingsRDD e coloca em formato de tabela" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "He6QkNy9hXmz", - "outputId": "993b7a78-0593-43c4-dc99-da1649a8842d" - }, - "source": [ - "ratings.show()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "+-------+------+----------+------+\n", - "|movieId|rating| timestamp|userId|\n", - "+-------+------+----------+------+\n", - "| 2| 3.0|1424380312| 0|\n", - "| 3| 1.0|1424380312| 0|\n", - "| 5| 2.0|1424380312| 0|\n", - "| 9| 4.0|1424380312| 0|\n", - "| 11| 1.0|1424380312| 0|\n", - "| 12| 2.0|1424380312| 0|\n", - "| 15| 1.0|1424380312| 0|\n", - "| 17| 1.0|1424380312| 0|\n", - "| 19| 1.0|1424380312| 0|\n", - "| 21| 1.0|1424380312| 0|\n", - "| 23| 1.0|1424380312| 0|\n", - "| 26| 3.0|1424380312| 0|\n", - "| 27| 1.0|1424380312| 0|\n", - "| 28| 1.0|1424380312| 0|\n", - "| 29| 1.0|1424380312| 0|\n", - "| 30| 1.0|1424380312| 0|\n", - "| 31| 1.0|1424380312| 0|\n", - "| 34| 1.0|1424380312| 0|\n", - "| 37| 1.0|1424380312| 0|\n", - "| 41| 2.0|1424380312| 0|\n", - "+-------+------+----------+------+\n", - "only showing top 20 rows\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "-zpPhZ61hgon" - }, - "source": [ - "(training, test) = ratings.randomSplit([0.8, 0.2]) #divide o df em porções para treinamento e teste" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "jp9FAaBshqUh" - }, - "source": [ - "als = ALS(maxIter=5, regParam=0.01, userCol=\"userId\", itemCol=\"movieId\", ratingCol=\"rating\", coldStartStrategy=\"drop\")\n", - "#instancia o modelo ALS; maxIter é o máximo de iterações, regParam é coeficiente de aprendizado,\n", - "#coldstart é quando o usuário fez poucas iterações com o sistemas ou o sistema tem a matriz muito esparsa, drop: se algum usuário\n", - "#tiver problema de coldstart, não será considerado no sistema" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "jLt3j9WXhih9" - }, - "source": [ - "model = als.fit(training) #treina o modelo com o dataset de treinamento" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ooGOA8s8iLES", - "outputId": "958ee0da-8f26-4e4a-cc65-e04adf4ee1b4" - }, - "source": [ - "predictions = model.transform(test) #aplica o modelo no conjunto de teste para fazer predições\n", - "evaluator = RegressionEvaluator(metricName=\"rmse\", labelCol=\"rating\",\n", - " predictionCol=\"prediction\")\n", - "rmse = evaluator.evaluate(predictions)\n", - "print(\"Erro médio quadrático = \" + str(rmse))" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Erro médio quadrático = 1.82015383545805\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "1AayAItWmX53" - }, - "source": [ - "userRec = model.recommendForAllUsers(10) #pegar todos os usuários e gerar 10 recomendações" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "J2D3AX47mbno", - "outputId": "3d399616-f62d-4435-e038-532c6ec5061a" - }, - "source": [ - "userRec.show()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "+------+--------------------+\n", - "|userId| recommendations|\n", - "+------+--------------------+\n", - "| 28|[[91, 7.199192], ...|\n", - "| 26|[[30, 6.8022966],...|\n", - "| 27|[[18, 4.345591], ...|\n", - "| 12|[[35, 5.1877465],...|\n", - "| 22|[[4, 5.187028], [...|\n", - "| 1|[[17, 4.4631], [9...|\n", - "| 13|[[2, 3.0728006], ...|\n", - "| 6|[[25, 4.814437], ...|\n", - "| 16|[[76, 5.6596413],...|\n", - "| 3|[[32, 5.3414116],...|\n", - "| 20|[[46, 5.877379], ...|\n", - "| 5|[[18, 4.877655], ...|\n", - "| 19|[[51, 5.3770857],...|\n", - "| 15|[[46, 4.7499933],...|\n", - "| 17|[[90, 5.0351977],...|\n", - "| 9|[[51, 5.090912], ...|\n", - "| 4|[[92, 5.3576517],...|\n", - "| 8|[[25, 5.7912273],...|\n", - "| 23|[[46, 6.087576], ...|\n", - "| 7|[[25, 4.92321], [...|\n", - "+------+--------------------+\n", - "only showing top 20 rows\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "at1apd0lmhBk" - }, - "source": [ - "movieRecs = model.recommendForAllItems(10) #faz a transposta da matriz de ratings, a fim de recomendar usuários em potencial para itens específicos" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Al3u35R2mi_p", - "outputId": "37ff86e2-aff0-4436-fb6f-bdf585502ba0" - }, - "source": [ - "movieRecs.show()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "+-------+--------------------+\n", - "|movieId| recommendations|\n", - "+-------+--------------------+\n", - "| 31|[[28, 4.4335637],...|\n", - "| 85|[[16, 4.4600816],...|\n", - "| 65|[[23, 4.8359885],...|\n", - "| 53|[[21, 4.8665752],...|\n", - "| 78|[[5, 1.405179], [...|\n", - "| 34|[[23, 5.4059834],...|\n", - "| 81|[[12, 5.1232557],...|\n", - "| 28|[[18, 5.062015], ...|\n", - "| 76|[[16, 5.6596413],...|\n", - "| 26|[[22, 4.0410576],...|\n", - "| 27|[[2, 5.0788355], ...|\n", - "| 44|[[11, 3.253865], ...|\n", - "| 12|[[28, 4.8772264],...|\n", - "| 91|[[28, 7.199192], ...|\n", - "| 22|[[26, 5.148896], ...|\n", - "| 93|[[27, 1.0710598],...|\n", - "| 47|[[8, 4.567012], [...|\n", - "| 1|[[16, 4.053723], ...|\n", - "| 52|[[8, 4.92321], [2...|\n", - "| 13|[[23, 4.0366826],...|\n", - "+-------+--------------------+\n", - "only showing top 20 rows\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "otO0JDGRmmPd" - }, - "source": [ - "users = ratings.select(als.getUserCol()).distinct() #selecina os usuários que existem nesse universo" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "tM8z5hpumo-f", - "outputId": "bbcf1956-9af1-40ac-8f49-41308e775cb1" - }, - "source": [ - "users.show()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "+------+\n", - "|userId|\n", - "+------+\n", - "| 26|\n", - "| 29|\n", - "| 19|\n", - "| 0|\n", - "| 22|\n", - "| 7|\n", - "| 25|\n", - "| 6|\n", - "| 9|\n", - "| 27|\n", - "| 17|\n", - "| 28|\n", - "| 5|\n", - "| 1|\n", - "| 10|\n", - "| 3|\n", - "| 12|\n", - "| 8|\n", - "| 11|\n", - "| 2|\n", - "+------+\n", - "only showing top 20 rows\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "YeIDF41BmtFd" - }, - "source": [ - "UserRecsOnlyItemId = userRec.select(userRec['userId'], userRec['recommendations']['movieid'])" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JFt5sr2voUbS", - "outputId": "d056b74c-d451-470f-f39a-1f6d06fd86f8" - }, - "source": [ - "UserRecsOnlyItemId.show(10, False) #mostra somente as recomendações por usuário" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "+------+----------------------------------------+\n", - "|userId|recommendations.movieid |\n", - "+------+----------------------------------------+\n", - "|28 |[91, 92, 12, 81, 79, 31, 89, 49, 35, 82]|\n", - "|26 |[30, 32, 94, 17, 22, 88, 7, 98, 90, 24] |\n", - "|27 |[18, 2, 48, 19, 55, 66, 23, 44, 7, 33] |\n", - "|12 |[35, 81, 17, 88, 79, 64, 69, 27, 31, 16]|\n", - "|22 |[4, 51, 75, 74, 52, 88, 30, 9, 85, 58] |\n", - "|1 |[17, 90, 62, 51, 69, 85, 28, 22, 38, 76]|\n", - "|13 |[2, 52, 29, 18, 53, 9, 43, 92, 58, 83] |\n", - "|6 |[25, 62, 51, 90, 76, 85, 58, 2, 95, 63] |\n", - "|16 |[76, 62, 90, 29, 51, 54, 85, 1, 53, 69] |\n", - "|3 |[32, 51, 30, 80, 7, 85, 76, 8, 29, 87] |\n", - "+------+----------------------------------------+\n", - "only showing top 10 rows\n", - "\n" - ], - "name": "stdout" - } - ] - } - ] -} \ No newline at end of file diff --git a/06- sample_movielens_ratings.txt b/06- sample_movielens_ratings.txt deleted file mode 100644 index 0889142..0000000 --- a/06- sample_movielens_ratings.txt +++ /dev/null @@ -1,1501 +0,0 @@ -0::2::3::1424380312 -0::3::1::1424380312 -0::5::2::1424380312 -0::9::4::1424380312 -0::11::1::1424380312 -0::12::2::1424380312 -0::15::1::1424380312 -0::17::1::1424380312 -0::19::1::1424380312 -0::21::1::1424380312 -0::23::1::1424380312 -0::26::3::1424380312 -0::27::1::1424380312 -0::28::1::1424380312 -0::29::1::1424380312 -0::30::1::1424380312 -0::31::1::1424380312 -0::34::1::1424380312 -0::37::1::1424380312 -0::41::2::1424380312 -0::44::1::1424380312 -0::45::2::1424380312 -0::46::1::1424380312 -0::47::1::1424380312 -0::48::1::1424380312 -0::50::1::1424380312 -0::51::1::1424380312 -0::54::1::1424380312 -0::55::1::1424380312 -0::59::2::1424380312 -0::61::2::1424380312 -0::64::1::1424380312 -0::67::1::1424380312 -0::68::1::1424380312 -0::69::1::1424380312 -0::71::1::1424380312 -0::72::1::1424380312 -0::77::2::1424380312 -0::79::1::1424380312 -0::83::1::1424380312 -0::87::1::1424380312 -0::89::2::1424380312 -0::91::3::1424380312 -0::92::4::1424380312 -0::94::1::1424380312 -0::95::2::1424380312 -0::96::1::1424380312 -0::98::1::1424380312 -0::99::1::1424380312 -1::2::2::1424380312 -1::3::1::1424380312 -1::4::2::1424380312 -1::6::1::1424380312 -1::9::3::1424380312 -1::12::1::1424380312 -1::13::1::1424380312 -1::14::1::1424380312 -1::16::1::1424380312 -1::19::1::1424380312 -1::21::3::1424380312 -1::27::1::1424380312 -1::28::3::1424380312 -1::33::1::1424380312 -1::36::2::1424380312 -1::37::1::1424380312 -1::40::1::1424380312 -1::41::2::1424380312 -1::43::1::1424380312 -1::44::1::1424380312 -1::47::1::1424380312 -1::50::1::1424380312 -1::54::1::1424380312 -1::56::2::1424380312 -1::57::1::1424380312 -1::58::1::1424380312 -1::60::1::1424380312 -1::62::4::1424380312 -1::63::1::1424380312 -1::67::1::1424380312 -1::68::4::1424380312 -1::70::2::1424380312 -1::72::1::1424380312 -1::73::1::1424380312 -1::74::2::1424380312 -1::76::1::1424380312 -1::77::3::1424380312 -1::78::1::1424380312 -1::81::1::1424380312 -1::82::1::1424380312 -1::85::3::1424380312 -1::86::2::1424380312 -1::88::2::1424380312 -1::91::1::1424380312 -1::92::2::1424380312 -1::93::1::1424380312 -1::94::2::1424380312 -1::96::1::1424380312 -1::97::1::1424380312 -2::4::3::1424380312 -2::6::1::1424380312 -2::8::5::1424380312 -2::9::1::1424380312 -2::10::1::1424380312 -2::12::3::1424380312 -2::13::1::1424380312 -2::15::2::1424380312 -2::18::2::1424380312 -2::19::4::1424380312 -2::22::1::1424380312 -2::26::1::1424380312 -2::28::1::1424380312 -2::34::4::1424380312 -2::35::1::1424380312 -2::37::5::1424380312 -2::38::1::1424380312 -2::39::5::1424380312 -2::40::4::1424380312 -2::47::1::1424380312 -2::50::1::1424380312 -2::52::2::1424380312 -2::54::1::1424380312 -2::55::1::1424380312 -2::57::2::1424380312 -2::58::2::1424380312 -2::59::1::1424380312 -2::61::1::1424380312 -2::62::1::1424380312 -2::64::1::1424380312 -2::65::1::1424380312 -2::66::3::1424380312 -2::68::1::1424380312 -2::71::3::1424380312 -2::76::1::1424380312 -2::77::1::1424380312 -2::78::1::1424380312 -2::80::1::1424380312 -2::83::5::1424380312 -2::85::1::1424380312 -2::87::2::1424380312 -2::88::1::1424380312 -2::89::4::1424380312 -2::90::1::1424380312 -2::92::4::1424380312 -2::93::5::1424380312 -3::0::1::1424380312 -3::1::1::1424380312 -3::2::1::1424380312 -3::7::3::1424380312 -3::8::3::1424380312 -3::9::1::1424380312 -3::14::1::1424380312 -3::15::1::1424380312 -3::16::1::1424380312 -3::18::4::1424380312 -3::19::1::1424380312 -3::24::3::1424380312 -3::26::1::1424380312 -3::29::3::1424380312 -3::33::1::1424380312 -3::34::3::1424380312 -3::35::1::1424380312 -3::36::3::1424380312 -3::37::1::1424380312 -3::38::2::1424380312 -3::43::1::1424380312 -3::44::1::1424380312 -3::46::1::1424380312 -3::47::1::1424380312 -3::51::5::1424380312 -3::52::3::1424380312 -3::56::1::1424380312 -3::58::1::1424380312 -3::60::3::1424380312 -3::62::1::1424380312 -3::65::2::1424380312 -3::66::1::1424380312 -3::67::1::1424380312 -3::68::2::1424380312 -3::70::1::1424380312 -3::72::2::1424380312 -3::76::3::1424380312 -3::79::3::1424380312 -3::80::4::1424380312 -3::81::1::1424380312 -3::83::1::1424380312 -3::84::1::1424380312 -3::86::1::1424380312 -3::87::2::1424380312 -3::88::4::1424380312 -3::89::1::1424380312 -3::91::1::1424380312 -3::94::3::1424380312 -4::1::1::1424380312 -4::6::1::1424380312 -4::8::1::1424380312 -4::9::1::1424380312 -4::10::1::1424380312 -4::11::1::1424380312 -4::12::1::1424380312 -4::13::1::1424380312 -4::14::2::1424380312 -4::15::1::1424380312 -4::17::1::1424380312 -4::20::1::1424380312 -4::22::1::1424380312 -4::23::1::1424380312 -4::24::1::1424380312 -4::29::4::1424380312 -4::30::1::1424380312 -4::31::1::1424380312 -4::34::1::1424380312 -4::35::1::1424380312 -4::36::1::1424380312 -4::39::2::1424380312 -4::40::3::1424380312 -4::41::4::1424380312 -4::43::2::1424380312 -4::44::1::1424380312 -4::45::1::1424380312 -4::46::1::1424380312 -4::47::1::1424380312 -4::49::2::1424380312 -4::50::1::1424380312 -4::51::1::1424380312 -4::52::4::1424380312 -4::54::1::1424380312 -4::55::1::1424380312 -4::60::3::1424380312 -4::61::1::1424380312 -4::62::4::1424380312 -4::63::3::1424380312 -4::65::1::1424380312 -4::67::2::1424380312 -4::69::1::1424380312 -4::70::4::1424380312 -4::71::1::1424380312 -4::73::1::1424380312 -4::78::1::1424380312 -4::84::1::1424380312 -4::85::1::1424380312 -4::87::3::1424380312 -4::88::3::1424380312 -4::89::2::1424380312 -4::96::1::1424380312 -4::97::1::1424380312 -4::98::1::1424380312 -4::99::1::1424380312 -5::0::1::1424380312 -5::1::1::1424380312 -5::4::1::1424380312 -5::5::1::1424380312 -5::8::1::1424380312 -5::9::3::1424380312 -5::10::2::1424380312 -5::13::3::1424380312 -5::15::1::1424380312 -5::19::1::1424380312 -5::20::3::1424380312 -5::21::2::1424380312 -5::23::3::1424380312 -5::27::1::1424380312 -5::28::1::1424380312 -5::29::1::1424380312 -5::31::1::1424380312 -5::36::3::1424380312 -5::38::2::1424380312 -5::39::1::1424380312 -5::42::1::1424380312 -5::48::3::1424380312 -5::49::4::1424380312 -5::50::3::1424380312 -5::51::1::1424380312 -5::52::1::1424380312 -5::54::1::1424380312 -5::55::5::1424380312 -5::56::3::1424380312 -5::58::1::1424380312 -5::60::1::1424380312 -5::61::1::1424380312 -5::64::3::1424380312 -5::65::2::1424380312 -5::68::4::1424380312 -5::70::1::1424380312 -5::71::1::1424380312 -5::72::1::1424380312 -5::74::1::1424380312 -5::79::1::1424380312 -5::81::2::1424380312 -5::84::1::1424380312 -5::85::1::1424380312 -5::86::1::1424380312 -5::88::1::1424380312 -5::90::4::1424380312 -5::91::2::1424380312 -5::95::2::1424380312 -5::99::1::1424380312 -6::0::1::1424380312 -6::1::1::1424380312 -6::2::3::1424380312 -6::5::1::1424380312 -6::6::1::1424380312 -6::9::1::1424380312 -6::10::1::1424380312 -6::15::2::1424380312 -6::16::2::1424380312 -6::17::1::1424380312 -6::18::1::1424380312 -6::20::1::1424380312 -6::21::1::1424380312 -6::22::1::1424380312 -6::24::1::1424380312 -6::25::5::1424380312 -6::26::1::1424380312 -6::28::1::1424380312 -6::30::1::1424380312 -6::33::1::1424380312 -6::38::1::1424380312 -6::39::1::1424380312 -6::43::4::1424380312 -6::44::1::1424380312 -6::45::1::1424380312 -6::48::1::1424380312 -6::49::1::1424380312 -6::50::1::1424380312 -6::53::1::1424380312 -6::54::1::1424380312 -6::55::1::1424380312 -6::56::1::1424380312 -6::58::4::1424380312 -6::59::1::1424380312 -6::60::1::1424380312 -6::61::3::1424380312 -6::63::3::1424380312 -6::66::1::1424380312 -6::67::3::1424380312 -6::68::1::1424380312 -6::69::1::1424380312 -6::71::2::1424380312 -6::73::1::1424380312 -6::75::1::1424380312 -6::77::1::1424380312 -6::79::1::1424380312 -6::81::1::1424380312 -6::84::1::1424380312 -6::85::3::1424380312 -6::86::1::1424380312 -6::87::1::1424380312 -6::88::1::1424380312 -6::89::1::1424380312 -6::91::2::1424380312 -6::94::1::1424380312 -6::95::2::1424380312 -6::96::1::1424380312 -7::1::1::1424380312 -7::2::2::1424380312 -7::3::1::1424380312 -7::4::1::1424380312 -7::7::1::1424380312 -7::10::1::1424380312 -7::11::2::1424380312 -7::14::2::1424380312 -7::15::1::1424380312 -7::16::1::1424380312 -7::18::1::1424380312 -7::21::1::1424380312 -7::22::1::1424380312 -7::23::1::1424380312 -7::25::5::1424380312 -7::26::1::1424380312 -7::29::4::1424380312 -7::30::1::1424380312 -7::31::3::1424380312 -7::32::1::1424380312 -7::33::1::1424380312 -7::35::1::1424380312 -7::37::2::1424380312 -7::39::3::1424380312 -7::40::2::1424380312 -7::42::2::1424380312 -7::44::1::1424380312 -7::45::2::1424380312 -7::47::4::1424380312 -7::48::1::1424380312 -7::49::1::1424380312 -7::53::1::1424380312 -7::54::1::1424380312 -7::55::1::1424380312 -7::56::1::1424380312 -7::59::1::1424380312 -7::61::2::1424380312 -7::62::3::1424380312 -7::63::2::1424380312 -7::66::1::1424380312 -7::67::3::1424380312 -7::74::1::1424380312 -7::75::1::1424380312 -7::76::3::1424380312 -7::77::1::1424380312 -7::81::1::1424380312 -7::82::1::1424380312 -7::84::2::1424380312 -7::85::4::1424380312 -7::86::1::1424380312 -7::92::2::1424380312 -7::96::1::1424380312 -7::97::1::1424380312 -7::98::1::1424380312 -8::0::1::1424380312 -8::2::4::1424380312 -8::3::2::1424380312 -8::4::2::1424380312 -8::5::1::1424380312 -8::7::1::1424380312 -8::9::1::1424380312 -8::11::1::1424380312 -8::15::1::1424380312 -8::18::1::1424380312 -8::19::1::1424380312 -8::21::1::1424380312 -8::29::5::1424380312 -8::31::3::1424380312 -8::33::1::1424380312 -8::35::1::1424380312 -8::36::1::1424380312 -8::40::2::1424380312 -8::44::1::1424380312 -8::45::1::1424380312 -8::50::1::1424380312 -8::51::1::1424380312 -8::52::5::1424380312 -8::53::5::1424380312 -8::54::1::1424380312 -8::55::1::1424380312 -8::56::1::1424380312 -8::58::4::1424380312 -8::60::3::1424380312 -8::62::4::1424380312 -8::64::1::1424380312 -8::67::3::1424380312 -8::69::1::1424380312 -8::71::1::1424380312 -8::72::3::1424380312 -8::77::3::1424380312 -8::78::1::1424380312 -8::79::1::1424380312 -8::83::1::1424380312 -8::85::5::1424380312 -8::86::1::1424380312 -8::88::1::1424380312 -8::90::1::1424380312 -8::92::2::1424380312 -8::95::4::1424380312 -8::96::3::1424380312 -8::97::1::1424380312 -8::98::1::1424380312 -8::99::1::1424380312 -9::2::3::1424380312 -9::3::1::1424380312 -9::4::1::1424380312 -9::5::1::1424380312 -9::6::1::1424380312 -9::7::5::1424380312 -9::9::1::1424380312 -9::12::1::1424380312 -9::14::3::1424380312 -9::15::1::1424380312 -9::19::1::1424380312 -9::21::1::1424380312 -9::22::1::1424380312 -9::24::1::1424380312 -9::25::1::1424380312 -9::26::1::1424380312 -9::30::3::1424380312 -9::32::4::1424380312 -9::35::2::1424380312 -9::36::2::1424380312 -9::37::2::1424380312 -9::38::1::1424380312 -9::39::1::1424380312 -9::43::3::1424380312 -9::49::5::1424380312 -9::50::3::1424380312 -9::53::1::1424380312 -9::54::1::1424380312 -9::58::1::1424380312 -9::59::1::1424380312 -9::60::1::1424380312 -9::61::1::1424380312 -9::63::3::1424380312 -9::64::3::1424380312 -9::68::1::1424380312 -9::69::1::1424380312 -9::70::3::1424380312 -9::71::1::1424380312 -9::73::2::1424380312 -9::75::1::1424380312 -9::77::2::1424380312 -9::81::2::1424380312 -9::82::1::1424380312 -9::83::1::1424380312 -9::84::1::1424380312 -9::86::1::1424380312 -9::87::4::1424380312 -9::88::1::1424380312 -9::90::3::1424380312 -9::94::2::1424380312 -9::95::3::1424380312 -9::97::2::1424380312 -9::98::1::1424380312 -10::0::3::1424380312 -10::2::4::1424380312 -10::4::3::1424380312 -10::7::1::1424380312 -10::8::1::1424380312 -10::10::1::1424380312 -10::13::2::1424380312 -10::14::1::1424380312 -10::16::2::1424380312 -10::17::1::1424380312 -10::18::1::1424380312 -10::21::1::1424380312 -10::22::1::1424380312 -10::24::1::1424380312 -10::25::3::1424380312 -10::28::1::1424380312 -10::35::1::1424380312 -10::36::1::1424380312 -10::37::1::1424380312 -10::38::1::1424380312 -10::39::1::1424380312 -10::40::4::1424380312 -10::41::2::1424380312 -10::42::3::1424380312 -10::43::1::1424380312 -10::49::3::1424380312 -10::50::1::1424380312 -10::51::1::1424380312 -10::52::1::1424380312 -10::55::2::1424380312 -10::56::1::1424380312 -10::58::1::1424380312 -10::63::1::1424380312 -10::66::1::1424380312 -10::67::2::1424380312 -10::68::1::1424380312 -10::75::1::1424380312 -10::77::1::1424380312 -10::79::1::1424380312 -10::86::1::1424380312 -10::89::3::1424380312 -10::90::1::1424380312 -10::97::1::1424380312 -10::98::1::1424380312 -11::0::1::1424380312 -11::6::2::1424380312 -11::9::1::1424380312 -11::10::1::1424380312 -11::11::1::1424380312 -11::12::1::1424380312 -11::13::4::1424380312 -11::16::1::1424380312 -11::18::5::1424380312 -11::19::4::1424380312 -11::20::1::1424380312 -11::21::1::1424380312 -11::22::1::1424380312 -11::23::5::1424380312 -11::25::1::1424380312 -11::27::5::1424380312 -11::30::5::1424380312 -11::32::5::1424380312 -11::35::3::1424380312 -11::36::2::1424380312 -11::37::2::1424380312 -11::38::4::1424380312 -11::39::1::1424380312 -11::40::1::1424380312 -11::41::1::1424380312 -11::43::2::1424380312 -11::45::1::1424380312 -11::47::1::1424380312 -11::48::5::1424380312 -11::50::4::1424380312 -11::51::3::1424380312 -11::59::1::1424380312 -11::61::1::1424380312 -11::62::1::1424380312 -11::64::1::1424380312 -11::66::4::1424380312 -11::67::1::1424380312 -11::69::5::1424380312 -11::70::1::1424380312 -11::71::3::1424380312 -11::72::3::1424380312 -11::75::3::1424380312 -11::76::1::1424380312 -11::77::1::1424380312 -11::78::1::1424380312 -11::79::5::1424380312 -11::80::3::1424380312 -11::81::4::1424380312 -11::82::1::1424380312 -11::86::1::1424380312 -11::88::1::1424380312 -11::89::1::1424380312 -11::90::4::1424380312 -11::94::2::1424380312 -11::97::3::1424380312 -11::99::1::1424380312 -12::2::1::1424380312 -12::4::1::1424380312 -12::6::1::1424380312 -12::7::3::1424380312 -12::8::1::1424380312 -12::14::1::1424380312 -12::15::2::1424380312 -12::16::4::1424380312 -12::17::5::1424380312 -12::18::2::1424380312 -12::21::1::1424380312 -12::22::2::1424380312 -12::23::3::1424380312 -12::24::1::1424380312 -12::25::1::1424380312 -12::27::5::1424380312 -12::30::2::1424380312 -12::31::4::1424380312 -12::35::5::1424380312 -12::38::1::1424380312 -12::41::1::1424380312 -12::44::2::1424380312 -12::45::1::1424380312 -12::50::4::1424380312 -12::51::1::1424380312 -12::52::1::1424380312 -12::53::1::1424380312 -12::54::1::1424380312 -12::56::2::1424380312 -12::57::1::1424380312 -12::60::1::1424380312 -12::63::1::1424380312 -12::64::5::1424380312 -12::66::3::1424380312 -12::67::1::1424380312 -12::70::1::1424380312 -12::72::1::1424380312 -12::74::1::1424380312 -12::75::1::1424380312 -12::77::1::1424380312 -12::78::1::1424380312 -12::79::3::1424380312 -12::82::2::1424380312 -12::83::1::1424380312 -12::84::1::1424380312 -12::85::1::1424380312 -12::86::1::1424380312 -12::87::1::1424380312 -12::88::1::1424380312 -12::91::3::1424380312 -12::92::1::1424380312 -12::94::4::1424380312 -12::95::2::1424380312 -12::96::1::1424380312 -12::98::2::1424380312 -13::0::1::1424380312 -13::3::1::1424380312 -13::4::2::1424380312 -13::5::1::1424380312 -13::6::1::1424380312 -13::12::1::1424380312 -13::14::2::1424380312 -13::15::1::1424380312 -13::17::1::1424380312 -13::18::3::1424380312 -13::20::1::1424380312 -13::21::1::1424380312 -13::22::1::1424380312 -13::26::1::1424380312 -13::27::1::1424380312 -13::29::3::1424380312 -13::31::1::1424380312 -13::33::1::1424380312 -13::40::2::1424380312 -13::43::2::1424380312 -13::44::1::1424380312 -13::45::1::1424380312 -13::49::1::1424380312 -13::51::1::1424380312 -13::52::2::1424380312 -13::53::3::1424380312 -13::54::1::1424380312 -13::62::1::1424380312 -13::63::2::1424380312 -13::64::1::1424380312 -13::68::1::1424380312 -13::71::1::1424380312 -13::72::3::1424380312 -13::73::1::1424380312 -13::74::3::1424380312 -13::77::2::1424380312 -13::78::1::1424380312 -13::79::2::1424380312 -13::83::3::1424380312 -13::85::1::1424380312 -13::86::1::1424380312 -13::87::2::1424380312 -13::88::2::1424380312 -13::90::1::1424380312 -13::93::4::1424380312 -13::94::1::1424380312 -13::98::1::1424380312 -13::99::1::1424380312 -14::1::1::1424380312 -14::3::3::1424380312 -14::4::1::1424380312 -14::5::1::1424380312 -14::6::1::1424380312 -14::7::1::1424380312 -14::9::1::1424380312 -14::10::1::1424380312 -14::11::1::1424380312 -14::12::1::1424380312 -14::13::1::1424380312 -14::14::3::1424380312 -14::15::1::1424380312 -14::16::1::1424380312 -14::17::1::1424380312 -14::20::1::1424380312 -14::21::1::1424380312 -14::24::1::1424380312 -14::25::2::1424380312 -14::27::1::1424380312 -14::28::1::1424380312 -14::29::5::1424380312 -14::31::3::1424380312 -14::34::1::1424380312 -14::36::1::1424380312 -14::37::2::1424380312 -14::39::2::1424380312 -14::40::1::1424380312 -14::44::1::1424380312 -14::45::1::1424380312 -14::47::3::1424380312 -14::48::1::1424380312 -14::49::1::1424380312 -14::51::1::1424380312 -14::52::5::1424380312 -14::53::3::1424380312 -14::54::1::1424380312 -14::55::1::1424380312 -14::56::1::1424380312 -14::62::4::1424380312 -14::63::5::1424380312 -14::67::3::1424380312 -14::68::1::1424380312 -14::69::3::1424380312 -14::71::1::1424380312 -14::72::4::1424380312 -14::73::1::1424380312 -14::76::5::1424380312 -14::79::1::1424380312 -14::82::1::1424380312 -14::83::1::1424380312 -14::88::1::1424380312 -14::93::3::1424380312 -14::94::1::1424380312 -14::95::2::1424380312 -14::96::4::1424380312 -14::98::1::1424380312 -15::0::1::1424380312 -15::1::4::1424380312 -15::2::1::1424380312 -15::5::2::1424380312 -15::6::1::1424380312 -15::7::1::1424380312 -15::13::1::1424380312 -15::14::1::1424380312 -15::15::1::1424380312 -15::17::2::1424380312 -15::19::2::1424380312 -15::22::2::1424380312 -15::23::2::1424380312 -15::25::1::1424380312 -15::26::3::1424380312 -15::27::1::1424380312 -15::28::2::1424380312 -15::29::1::1424380312 -15::32::1::1424380312 -15::33::2::1424380312 -15::34::1::1424380312 -15::35::2::1424380312 -15::36::1::1424380312 -15::37::1::1424380312 -15::39::1::1424380312 -15::42::1::1424380312 -15::46::5::1424380312 -15::48::2::1424380312 -15::50::2::1424380312 -15::51::1::1424380312 -15::52::1::1424380312 -15::58::1::1424380312 -15::62::1::1424380312 -15::64::3::1424380312 -15::65::2::1424380312 -15::72::1::1424380312 -15::73::1::1424380312 -15::74::1::1424380312 -15::79::1::1424380312 -15::80::1::1424380312 -15::81::1::1424380312 -15::82::2::1424380312 -15::85::1::1424380312 -15::87::1::1424380312 -15::91::2::1424380312 -15::96::1::1424380312 -15::97::1::1424380312 -15::98::3::1424380312 -16::2::1::1424380312 -16::5::3::1424380312 -16::6::2::1424380312 -16::7::1::1424380312 -16::9::1::1424380312 -16::12::1::1424380312 -16::14::1::1424380312 -16::15::1::1424380312 -16::19::1::1424380312 -16::21::2::1424380312 -16::29::4::1424380312 -16::30::2::1424380312 -16::32::1::1424380312 -16::34::1::1424380312 -16::36::1::1424380312 -16::38::1::1424380312 -16::46::1::1424380312 -16::47::3::1424380312 -16::48::1::1424380312 -16::49::1::1424380312 -16::50::1::1424380312 -16::51::5::1424380312 -16::54::5::1424380312 -16::55::1::1424380312 -16::56::2::1424380312 -16::57::1::1424380312 -16::60::1::1424380312 -16::63::2::1424380312 -16::65::1::1424380312 -16::67::1::1424380312 -16::72::1::1424380312 -16::74::1::1424380312 -16::80::1::1424380312 -16::81::1::1424380312 -16::82::1::1424380312 -16::85::5::1424380312 -16::86::1::1424380312 -16::90::5::1424380312 -16::91::1::1424380312 -16::93::1::1424380312 -16::94::3::1424380312 -16::95::2::1424380312 -16::96::3::1424380312 -16::98::3::1424380312 -16::99::1::1424380312 -17::2::1::1424380312 -17::3::1::1424380312 -17::6::1::1424380312 -17::10::4::1424380312 -17::11::1::1424380312 -17::13::2::1424380312 -17::17::5::1424380312 -17::19::1::1424380312 -17::20::5::1424380312 -17::22::4::1424380312 -17::28::1::1424380312 -17::29::1::1424380312 -17::33::1::1424380312 -17::34::1::1424380312 -17::35::2::1424380312 -17::37::1::1424380312 -17::38::1::1424380312 -17::45::1::1424380312 -17::46::5::1424380312 -17::47::1::1424380312 -17::49::3::1424380312 -17::51::1::1424380312 -17::55::5::1424380312 -17::56::3::1424380312 -17::57::1::1424380312 -17::58::1::1424380312 -17::59::1::1424380312 -17::60::1::1424380312 -17::63::1::1424380312 -17::66::1::1424380312 -17::68::4::1424380312 -17::69::1::1424380312 -17::70::1::1424380312 -17::72::1::1424380312 -17::73::3::1424380312 -17::78::1::1424380312 -17::79::1::1424380312 -17::82::2::1424380312 -17::84::1::1424380312 -17::90::5::1424380312 -17::91::3::1424380312 -17::92::1::1424380312 -17::93::1::1424380312 -17::94::4::1424380312 -17::95::2::1424380312 -17::97::1::1424380312 -18::1::1::1424380312 -18::4::3::1424380312 -18::5::2::1424380312 -18::6::1::1424380312 -18::7::1::1424380312 -18::10::1::1424380312 -18::11::4::1424380312 -18::12::2::1424380312 -18::13::1::1424380312 -18::15::1::1424380312 -18::18::1::1424380312 -18::20::1::1424380312 -18::21::2::1424380312 -18::22::1::1424380312 -18::23::2::1424380312 -18::25::1::1424380312 -18::26::1::1424380312 -18::27::1::1424380312 -18::28::5::1424380312 -18::29::1::1424380312 -18::31::1::1424380312 -18::32::1::1424380312 -18::36::1::1424380312 -18::38::5::1424380312 -18::39::5::1424380312 -18::40::1::1424380312 -18::42::1::1424380312 -18::43::1::1424380312 -18::44::4::1424380312 -18::46::1::1424380312 -18::47::1::1424380312 -18::48::1::1424380312 -18::51::2::1424380312 -18::55::1::1424380312 -18::56::1::1424380312 -18::57::1::1424380312 -18::62::1::1424380312 -18::63::1::1424380312 -18::66::3::1424380312 -18::67::1::1424380312 -18::70::1::1424380312 -18::75::1::1424380312 -18::76::3::1424380312 -18::77::1::1424380312 -18::80::3::1424380312 -18::81::3::1424380312 -18::82::1::1424380312 -18::83::5::1424380312 -18::84::1::1424380312 -18::97::1::1424380312 -18::98::1::1424380312 -18::99::2::1424380312 -19::0::1::1424380312 -19::1::1::1424380312 -19::2::1::1424380312 -19::4::1::1424380312 -19::6::2::1424380312 -19::11::1::1424380312 -19::12::1::1424380312 -19::14::1::1424380312 -19::23::1::1424380312 -19::26::1::1424380312 -19::31::1::1424380312 -19::32::4::1424380312 -19::33::1::1424380312 -19::34::1::1424380312 -19::37::1::1424380312 -19::38::1::1424380312 -19::41::1::1424380312 -19::43::1::1424380312 -19::45::1::1424380312 -19::48::1::1424380312 -19::49::1::1424380312 -19::50::2::1424380312 -19::53::2::1424380312 -19::54::3::1424380312 -19::55::1::1424380312 -19::56::2::1424380312 -19::58::1::1424380312 -19::61::1::1424380312 -19::62::1::1424380312 -19::63::1::1424380312 -19::64::1::1424380312 -19::65::1::1424380312 -19::69::2::1424380312 -19::72::1::1424380312 -19::74::3::1424380312 -19::76::1::1424380312 -19::78::1::1424380312 -19::79::1::1424380312 -19::81::1::1424380312 -19::82::1::1424380312 -19::84::1::1424380312 -19::86::1::1424380312 -19::87::2::1424380312 -19::90::4::1424380312 -19::93::1::1424380312 -19::94::4::1424380312 -19::95::2::1424380312 -19::96::1::1424380312 -19::98::4::1424380312 -20::0::1::1424380312 -20::1::1::1424380312 -20::2::2::1424380312 -20::4::2::1424380312 -20::6::1::1424380312 -20::8::1::1424380312 -20::12::1::1424380312 -20::21::2::1424380312 -20::22::5::1424380312 -20::24::2::1424380312 -20::25::1::1424380312 -20::26::1::1424380312 -20::29::2::1424380312 -20::30::2::1424380312 -20::32::2::1424380312 -20::39::1::1424380312 -20::40::1::1424380312 -20::41::2::1424380312 -20::45::2::1424380312 -20::48::1::1424380312 -20::50::1::1424380312 -20::51::3::1424380312 -20::53::3::1424380312 -20::55::1::1424380312 -20::57::2::1424380312 -20::60::1::1424380312 -20::61::1::1424380312 -20::64::1::1424380312 -20::66::1::1424380312 -20::70::2::1424380312 -20::72::1::1424380312 -20::73::2::1424380312 -20::75::4::1424380312 -20::76::1::1424380312 -20::77::4::1424380312 -20::78::1::1424380312 -20::79::1::1424380312 -20::84::2::1424380312 -20::85::2::1424380312 -20::88::3::1424380312 -20::89::1::1424380312 -20::90::3::1424380312 -20::91::1::1424380312 -20::92::2::1424380312 -20::93::1::1424380312 -20::94::4::1424380312 -20::97::1::1424380312 -21::0::1::1424380312 -21::2::4::1424380312 -21::3::1::1424380312 -21::7::2::1424380312 -21::11::1::1424380312 -21::12::1::1424380312 -21::13::1::1424380312 -21::14::3::1424380312 -21::17::1::1424380312 -21::19::1::1424380312 -21::20::1::1424380312 -21::21::1::1424380312 -21::22::1::1424380312 -21::23::1::1424380312 -21::24::1::1424380312 -21::27::1::1424380312 -21::29::5::1424380312 -21::30::2::1424380312 -21::38::1::1424380312 -21::40::2::1424380312 -21::43::3::1424380312 -21::44::1::1424380312 -21::45::1::1424380312 -21::46::1::1424380312 -21::48::1::1424380312 -21::51::1::1424380312 -21::53::5::1424380312 -21::54::1::1424380312 -21::55::1::1424380312 -21::56::1::1424380312 -21::58::3::1424380312 -21::59::3::1424380312 -21::64::1::1424380312 -21::66::1::1424380312 -21::68::1::1424380312 -21::71::1::1424380312 -21::73::1::1424380312 -21::74::4::1424380312 -21::80::1::1424380312 -21::81::1::1424380312 -21::83::1::1424380312 -21::84::1::1424380312 -21::85::3::1424380312 -21::87::4::1424380312 -21::89::2::1424380312 -21::92::2::1424380312 -21::96::3::1424380312 -21::99::1::1424380312 -22::0::1::1424380312 -22::3::2::1424380312 -22::5::2::1424380312 -22::6::2::1424380312 -22::9::1::1424380312 -22::10::1::1424380312 -22::11::1::1424380312 -22::13::1::1424380312 -22::14::1::1424380312 -22::16::1::1424380312 -22::18::3::1424380312 -22::19::1::1424380312 -22::22::5::1424380312 -22::25::1::1424380312 -22::26::1::1424380312 -22::29::3::1424380312 -22::30::5::1424380312 -22::32::4::1424380312 -22::33::1::1424380312 -22::35::1::1424380312 -22::36::3::1424380312 -22::37::1::1424380312 -22::40::1::1424380312 -22::41::3::1424380312 -22::44::1::1424380312 -22::45::2::1424380312 -22::48::1::1424380312 -22::51::5::1424380312 -22::55::1::1424380312 -22::56::2::1424380312 -22::60::3::1424380312 -22::61::1::1424380312 -22::62::4::1424380312 -22::63::1::1424380312 -22::65::1::1424380312 -22::66::1::1424380312 -22::68::4::1424380312 -22::69::4::1424380312 -22::70::3::1424380312 -22::71::1::1424380312 -22::74::5::1424380312 -22::75::5::1424380312 -22::78::1::1424380312 -22::80::3::1424380312 -22::81::1::1424380312 -22::82::1::1424380312 -22::84::1::1424380312 -22::86::1::1424380312 -22::87::3::1424380312 -22::88::5::1424380312 -22::90::2::1424380312 -22::92::3::1424380312 -22::95::2::1424380312 -22::96::2::1424380312 -22::98::4::1424380312 -22::99::1::1424380312 -23::0::1::1424380312 -23::2::1::1424380312 -23::4::1::1424380312 -23::6::2::1424380312 -23::10::4::1424380312 -23::12::1::1424380312 -23::13::4::1424380312 -23::14::1::1424380312 -23::15::1::1424380312 -23::18::4::1424380312 -23::22::2::1424380312 -23::23::4::1424380312 -23::24::1::1424380312 -23::25::1::1424380312 -23::26::1::1424380312 -23::27::5::1424380312 -23::28::1::1424380312 -23::29::1::1424380312 -23::30::4::1424380312 -23::32::5::1424380312 -23::33::2::1424380312 -23::36::3::1424380312 -23::37::1::1424380312 -23::38::1::1424380312 -23::39::1::1424380312 -23::43::1::1424380312 -23::48::5::1424380312 -23::49::5::1424380312 -23::50::4::1424380312 -23::53::1::1424380312 -23::55::5::1424380312 -23::57::1::1424380312 -23::59::1::1424380312 -23::60::1::1424380312 -23::61::1::1424380312 -23::64::4::1424380312 -23::65::5::1424380312 -23::66::2::1424380312 -23::67::1::1424380312 -23::68::3::1424380312 -23::69::1::1424380312 -23::72::1::1424380312 -23::73::3::1424380312 -23::77::1::1424380312 -23::82::2::1424380312 -23::83::1::1424380312 -23::84::1::1424380312 -23::85::1::1424380312 -23::87::3::1424380312 -23::88::1::1424380312 -23::95::2::1424380312 -23::97::1::1424380312 -24::4::1::1424380312 -24::6::3::1424380312 -24::7::1::1424380312 -24::10::2::1424380312 -24::12::1::1424380312 -24::15::1::1424380312 -24::19::1::1424380312 -24::24::1::1424380312 -24::27::3::1424380312 -24::30::5::1424380312 -24::31::1::1424380312 -24::32::3::1424380312 -24::33::1::1424380312 -24::37::1::1424380312 -24::39::1::1424380312 -24::40::1::1424380312 -24::42::1::1424380312 -24::43::3::1424380312 -24::45::2::1424380312 -24::46::1::1424380312 -24::47::1::1424380312 -24::48::1::1424380312 -24::49::1::1424380312 -24::50::1::1424380312 -24::52::5::1424380312 -24::57::1::1424380312 -24::59::4::1424380312 -24::63::4::1424380312 -24::65::1::1424380312 -24::66::1::1424380312 -24::67::1::1424380312 -24::68::3::1424380312 -24::69::5::1424380312 -24::71::1::1424380312 -24::72::4::1424380312 -24::77::4::1424380312 -24::78::1::1424380312 -24::80::1::1424380312 -24::82::1::1424380312 -24::84::1::1424380312 -24::86::1::1424380312 -24::87::1::1424380312 -24::88::2::1424380312 -24::89::1::1424380312 -24::90::5::1424380312 -24::91::1::1424380312 -24::92::1::1424380312 -24::94::2::1424380312 -24::95::1::1424380312 -24::96::5::1424380312 -24::98::1::1424380312 -24::99::1::1424380312 -25::1::3::1424380312 -25::2::1::1424380312 -25::7::1::1424380312 -25::9::1::1424380312 -25::12::3::1424380312 -25::16::3::1424380312 -25::17::1::1424380312 -25::18::1::1424380312 -25::20::1::1424380312 -25::22::1::1424380312 -25::23::1::1424380312 -25::26::2::1424380312 -25::29::1::1424380312 -25::30::1::1424380312 -25::31::2::1424380312 -25::33::4::1424380312 -25::34::3::1424380312 -25::35::2::1424380312 -25::36::1::1424380312 -25::37::1::1424380312 -25::40::1::1424380312 -25::41::1::1424380312 -25::43::1::1424380312 -25::47::4::1424380312 -25::50::1::1424380312 -25::51::1::1424380312 -25::53::1::1424380312 -25::56::1::1424380312 -25::58::2::1424380312 -25::64::2::1424380312 -25::67::2::1424380312 -25::68::1::1424380312 -25::70::1::1424380312 -25::71::4::1424380312 -25::73::1::1424380312 -25::74::1::1424380312 -25::76::1::1424380312 -25::79::1::1424380312 -25::82::1::1424380312 -25::84::2::1424380312 -25::85::1::1424380312 -25::91::3::1424380312 -25::92::1::1424380312 -25::94::1::1424380312 -25::95::1::1424380312 -25::97::2::1424380312 -26::0::1::1424380312 -26::1::1::1424380312 -26::2::1::1424380312 -26::3::1::1424380312 -26::4::4::1424380312 -26::5::2::1424380312 -26::6::3::1424380312 -26::7::5::1424380312 -26::13::3::1424380312 -26::14::1::1424380312 -26::16::1::1424380312 -26::18::3::1424380312 -26::20::1::1424380312 -26::21::3::1424380312 -26::22::5::1424380312 -26::23::5::1424380312 -26::24::5::1424380312 -26::27::1::1424380312 -26::31::1::1424380312 -26::35::1::1424380312 -26::36::4::1424380312 -26::40::1::1424380312 -26::44::1::1424380312 -26::45::2::1424380312 -26::47::1::1424380312 -26::48::1::1424380312 -26::49::3::1424380312 -26::50::2::1424380312 -26::52::1::1424380312 -26::54::4::1424380312 -26::55::1::1424380312 -26::57::3::1424380312 -26::58::1::1424380312 -26::61::1::1424380312 -26::62::2::1424380312 -26::66::1::1424380312 -26::68::4::1424380312 -26::71::1::1424380312 -26::73::4::1424380312 -26::76::1::1424380312 -26::81::3::1424380312 -26::85::1::1424380312 -26::86::3::1424380312 -26::88::5::1424380312 -26::91::1::1424380312 -26::94::5::1424380312 -26::95::1::1424380312 -26::96::1::1424380312 -26::97::1::1424380312 -27::0::1::1424380312 -27::9::1::1424380312 -27::10::1::1424380312 -27::18::4::1424380312 -27::19::3::1424380312 -27::20::1::1424380312 -27::22::2::1424380312 -27::24::2::1424380312 -27::25::1::1424380312 -27::27::3::1424380312 -27::28::1::1424380312 -27::29::1::1424380312 -27::31::1::1424380312 -27::33::3::1424380312 -27::40::1::1424380312 -27::42::1::1424380312 -27::43::1::1424380312 -27::44::3::1424380312 -27::45::1::1424380312 -27::51::3::1424380312 -27::52::1::1424380312 -27::55::3::1424380312 -27::57::1::1424380312 -27::59::1::1424380312 -27::60::1::1424380312 -27::61::1::1424380312 -27::64::1::1424380312 -27::66::3::1424380312 -27::68::1::1424380312 -27::70::1::1424380312 -27::71::2::1424380312 -27::72::1::1424380312 -27::75::3::1424380312 -27::78::1::1424380312 -27::80::3::1424380312 -27::82::1::1424380312 -27::83::3::1424380312 -27::86::1::1424380312 -27::87::2::1424380312 -27::90::1::1424380312 -27::91::1::1424380312 -27::92::1::1424380312 -27::93::1::1424380312 -27::94::2::1424380312 -27::95::1::1424380312 -27::98::1::1424380312 -28::0::3::1424380312 -28::1::1::1424380312 -28::2::4::1424380312 -28::3::1::1424380312 -28::6::1::1424380312 -28::7::1::1424380312 -28::12::5::1424380312 -28::13::2::1424380312 -28::14::1::1424380312 -28::15::1::1424380312 -28::17::1::1424380312 -28::19::3::1424380312 -28::20::1::1424380312 -28::23::3::1424380312 -28::24::3::1424380312 -28::27::1::1424380312 -28::29::1::1424380312 -28::33::1::1424380312 -28::34::1::1424380312 -28::36::1::1424380312 -28::38::2::1424380312 -28::39::2::1424380312 -28::44::1::1424380312 -28::45::1::1424380312 -28::49::4::1424380312 -28::50::1::1424380312 -28::52::1::1424380312 -28::54::1::1424380312 -28::56::1::1424380312 -28::57::3::1424380312 -28::58::1::1424380312 -28::59::1::1424380312 -28::60::1::1424380312 -28::62::3::1424380312 -28::63::1::1424380312 -28::65::1::1424380312 -28::75::1::1424380312 -28::78::1::1424380312 -28::81::5::1424380312 -28::82::4::1424380312 -28::83::1::1424380312 -28::85::1::1424380312 -28::88::2::1424380312 -28::89::4::1424380312 -28::90::1::1424380312 -28::92::5::1424380312 -28::94::1::1424380312 -28::95::2::1424380312 -28::98::1::1424380312 -28::99::1::1424380312 -29::3::1::1424380312 -29::4::1::1424380312 -29::5::1::1424380312 -29::7::2::1424380312 -29::9::1::1424380312 -29::10::3::1424380312 -29::11::1::1424380312 -29::13::3::1424380312 -29::14::1::1424380312 -29::15::1::1424380312 -29::17::3::1424380312 -29::19::3::1424380312 -29::22::3::1424380312 -29::23::4::1424380312 -29::25::1::1424380312 -29::29::1::1424380312 -29::31::1::1424380312 -29::32::4::1424380312 -29::33::2::1424380312 -29::36::2::1424380312 -29::38::3::1424380312 -29::39::1::1424380312 -29::42::1::1424380312 -29::46::5::1424380312 -29::49::3::1424380312 -29::51::2::1424380312 -29::59::1::1424380312 -29::61::1::1424380312 -29::62::1::1424380312 -29::67::1::1424380312 -29::68::3::1424380312 -29::69::1::1424380312 -29::70::1::1424380312 -29::74::1::1424380312 -29::75::1::1424380312 -29::79::2::1424380312 -29::80::1::1424380312 -29::81::2::1424380312 -29::83::1::1424380312 -29::85::1::1424380312 -29::86::1::1424380312 -29::90::4::1424380312 -29::93::1::1424380312 -29::94::4::1424380312 -29::97::1::1424380312 -29::99::1::1424380312 diff --git a/Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/1_Ligacao_banco_dados.ipynb b/Analise de Negocios/Aula2 - Ligacao Banco de Dados/1_Ligacao_banco_dados.ipynb similarity index 100% rename from Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/1_Ligacao_banco_dados.ipynb rename to Analise de Negocios/Aula2 - Ligacao Banco de Dados/1_Ligacao_banco_dados.ipynb diff --git a/Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/PostgreSQL-logo.png b/Analise de Negocios/Aula2 - Ligacao Banco de Dados/PostgreSQL-logo.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/PostgreSQL-logo.png rename to Analise de Negocios/Aula2 - Ligacao Banco de Dados/PostgreSQL-logo.png diff --git a/Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/anp.png b/Analise de Negocios/Aula2 - Ligacao Banco de Dados/anp.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/anp.png rename to Analise de Negocios/Aula2 - Ligacao Banco de Dados/anp.png diff --git a/Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime.png b/Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime.png rename to Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime.png diff --git a/Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime2.png b/Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime2.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime2.png rename to Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime2.png diff --git a/Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime3.png b/Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime3.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime3.png rename to Analise de Negocios/Aula2 - Ligacao Banco de Dados/knime3.png diff --git a/Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/postgre1.png b/Analise de Negocios/Aula2 - Ligacao Banco de Dados/postgre1.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/postgre1.png rename to Analise de Negocios/Aula2 - Ligacao Banco de Dados/postgre1.png diff --git a/Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/preco_combustivel.jpeg b/Analise de Negocios/Aula2 - Ligacao Banco de Dados/preco_combustivel.jpeg similarity index 100% rename from Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/preco_combustivel.jpeg rename to Analise de Negocios/Aula2 - Ligacao Banco de Dados/preco_combustivel.jpeg diff --git a/Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/tabela_base_dados.png b/Analise de Negocios/Aula2 - Ligacao Banco de Dados/tabela_base_dados.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula2 - Ligacao Banco de Dados/tabela_base_dados.png rename to Analise de Negocios/Aula2 - Ligacao Banco de Dados/tabela_base_dados.png diff --git a/Fase 4/Analise de Negocios/Aula3 - Geracao Insights/2_Geracao_Insights.ipynb b/Analise de Negocios/Aula3 - Geracao Insights/2_Geracao_Insights.ipynb similarity index 100% rename from Fase 4/Analise de Negocios/Aula3 - Geracao Insights/2_Geracao_Insights.ipynb rename to Analise de Negocios/Aula3 - Geracao Insights/2_Geracao_Insights.ipynb diff --git a/Fase 4/Analise de Negocios/Aula3 - Geracao Insights/PostgreSQL-logo.png b/Analise de Negocios/Aula3 - Geracao Insights/PostgreSQL-logo.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula3 - Geracao Insights/PostgreSQL-logo.png rename to Analise de Negocios/Aula3 - Geracao Insights/PostgreSQL-logo.png diff --git a/Fase 4/Analise de Negocios/Aula3 - Geracao Insights/anp.png b/Analise de Negocios/Aula3 - Geracao Insights/anp.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula3 - Geracao Insights/anp.png rename to Analise de Negocios/Aula3 - Geracao Insights/anp.png diff --git a/Fase 4/Analise de Negocios/Aula3 - Geracao Insights/knime.png b/Analise de Negocios/Aula3 - Geracao Insights/knime.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula3 - Geracao Insights/knime.png rename to Analise de Negocios/Aula3 - Geracao Insights/knime.png diff --git a/Fase 4/Analise de Negocios/Aula3 - Geracao Insights/knime2.png b/Analise de Negocios/Aula3 - Geracao Insights/knime2.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula3 - Geracao Insights/knime2.png rename to Analise de Negocios/Aula3 - Geracao Insights/knime2.png diff --git a/Fase 4/Analise de Negocios/Aula3 - Geracao Insights/knime3.png b/Analise de Negocios/Aula3 - Geracao Insights/knime3.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula3 - Geracao Insights/knime3.png rename to Analise de Negocios/Aula3 - Geracao Insights/knime3.png diff --git a/Fase 4/Analise de Negocios/Aula3 - Geracao Insights/postgre1.png b/Analise de Negocios/Aula3 - Geracao Insights/postgre1.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula3 - Geracao Insights/postgre1.png rename to Analise de Negocios/Aula3 - Geracao Insights/postgre1.png diff --git a/Fase 4/Analise de Negocios/Aula3 - Geracao Insights/preco_combustivel.jpeg b/Analise de Negocios/Aula3 - Geracao Insights/preco_combustivel.jpeg similarity index 100% rename from Fase 4/Analise de Negocios/Aula3 - Geracao Insights/preco_combustivel.jpeg rename to Analise de Negocios/Aula3 - Geracao Insights/preco_combustivel.jpeg diff --git a/Fase 4/Analise de Negocios/Aula3 - Geracao Insights/tabela_base_dados.png b/Analise de Negocios/Aula3 - Geracao Insights/tabela_base_dados.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula3 - Geracao Insights/tabela_base_dados.png rename to Analise de Negocios/Aula3 - Geracao Insights/tabela_base_dados.png diff --git a/Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/3_Tecnicas_Visualizacao.ipynb b/Analise de Negocios/Aula4 - Tecnicas Visualizacao/3_Tecnicas_Visualizacao.ipynb similarity index 100% rename from Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/3_Tecnicas_Visualizacao.ipynb rename to Analise de Negocios/Aula4 - Tecnicas Visualizacao/3_Tecnicas_Visualizacao.ipynb diff --git a/Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/PostgreSQL-logo.png b/Analise de Negocios/Aula4 - Tecnicas Visualizacao/PostgreSQL-logo.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/PostgreSQL-logo.png rename to Analise de Negocios/Aula4 - Tecnicas Visualizacao/PostgreSQL-logo.png diff --git a/Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/anp.png b/Analise de Negocios/Aula4 - Tecnicas Visualizacao/anp.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/anp.png rename to Analise de Negocios/Aula4 - Tecnicas Visualizacao/anp.png diff --git a/Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime.png b/Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime.png rename to Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime.png diff --git a/Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime2.png b/Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime2.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime2.png rename to Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime2.png diff --git a/Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime3.png b/Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime3.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime3.png rename to Analise de Negocios/Aula4 - Tecnicas Visualizacao/knime3.png diff --git a/Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/postgre1.png b/Analise de Negocios/Aula4 - Tecnicas Visualizacao/postgre1.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/postgre1.png rename to Analise de Negocios/Aula4 - Tecnicas Visualizacao/postgre1.png diff --git a/Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/preco_combustivel.jpeg b/Analise de Negocios/Aula4 - Tecnicas Visualizacao/preco_combustivel.jpeg similarity index 100% rename from Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/preco_combustivel.jpeg rename to Analise de Negocios/Aula4 - Tecnicas Visualizacao/preco_combustivel.jpeg diff --git a/Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/tabela_base_dados.png b/Analise de Negocios/Aula4 - Tecnicas Visualizacao/tabela_base_dados.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula4 - Tecnicas Visualizacao/tabela_base_dados.png rename to Analise de Negocios/Aula4 - Tecnicas Visualizacao/tabela_base_dados.png diff --git a/Fase 4/Analise de Negocios/Aula5 - Dashboard Storytelling/analise_negocios_fiap.pbix b/Analise de Negocios/Aula5 - Dashboard Storytelling/analise_negocios_fiap.pbix similarity index 100% rename from Fase 4/Analise de Negocios/Aula5 - Dashboard Storytelling/analise_negocios_fiap.pbix rename to Analise de Negocios/Aula5 - Dashboard Storytelling/analise_negocios_fiap.pbix diff --git a/Fase 4/Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel-fossil.png b/Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel-fossil.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel-fossil.png rename to Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel-fossil.png diff --git a/Fase 4/Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel.png b/Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel.png rename to Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel.png diff --git a/Fase 4/Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel1.png b/Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel1.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel1.png rename to Analise de Negocios/Aula5 - Dashboard Storytelling/combustivel1.png diff --git a/Fase 4/Analise de Negocios/Aula5 - Dashboard Storytelling/filtro-limpo.png b/Analise de Negocios/Aula5 - Dashboard Storytelling/filtro-limpo.png similarity index 100% rename from Fase 4/Analise de Negocios/Aula5 - Dashboard Storytelling/filtro-limpo.png rename to Analise de Negocios/Aula5 - Dashboard Storytelling/filtro-limpo.png diff --git a/LICENSE b/LICENSE deleted file mode 100644 index be2367d..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2023 FIAP - Faculdade de Informática e Administração Paulista - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/PySpark_Column_Operations.ipynb b/PySpark_Column_Operations.ipynb deleted file mode 100644 index 18b6cf1..0000000 --- a/PySpark_Column_Operations.ipynb +++ /dev/null @@ -1,523 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Creating New Column in PySpark DataFrame" - ], - "metadata": { - "id": "fp0aNsCIYV4Y" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Installing Libraries" - ], - "metadata": { - "id": "lTIwmVXEYvqO" - } - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "cDM0Ro4QYPk6", - "outputId": "f8604fd9-62d7-4028-b878-22b19232e7e9" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting pyspark\n", - " Downloading pyspark-3.4.1.tar.gz (310.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.8/310.8 MB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: py4j==0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pyspark) (0.10.9.7)\n", - "Building wheels for collected packages: pyspark\n", - " Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285398 sha256=9d5e11cdda8c083c4d6eb0805f2784034981f8e4973d28bf226c6ddba9174b07\n", - " Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834\n", - "Successfully built pyspark\n", - "Installing collected packages: pyspark\n", - "Successfully installed pyspark-3.4.1\n" - ] - } - ], - "source": [ - "!pip install pyspark" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Importing the Libraries" - ], - "metadata": { - "id": "1TZXyu1vYzEX" - } - }, - { - "cell_type": "code", - "source": [ - "from pyspark.sql import SparkSession" - ], - "metadata": { - "id": "J2RVV_40Ygnv" - }, - "execution_count": 2, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Creating Spark Session" - ], - "metadata": { - "id": "3RmGsf2WY42f" - } - }, - { - "cell_type": "code", - "source": [ - "spark = SparkSession.builder.appName('PySpark Column Ops').getOrCreate()" - ], - "metadata": { - "id": "3gN4uCH6YkY8" - }, - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Reading the Dataset" - ], - "metadata": { - "id": "1CVP3cmxY6u7" - } - }, - { - "cell_type": "code", - "source": [ - "df = spark.read.csv('Fish.csv', sep = ',', inferSchema = True, header = True)" - ], - "metadata": { - "id": "sqAlMbX1YmpR" - }, - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Checking the Imported Dataset" - ], - "metadata": { - "id": "06IlRwQwY8uz" - } - }, - { - "cell_type": "code", - "source": [ - "df.show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "2t_6cc-NY-KE", - "outputId": "00bfd217-ef19-493b-c56b-184ae7777c38" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+-------+------+-------+-------+-------+-------+------+\n", - "|Species|Weight|Length1|Length2|Length3| Height| Width|\n", - "+-------+------+-------+-------+-------+-------+------+\n", - "| Bream| 242.0| 23.2| 25.4| 30.0| 11.52| 4.02|\n", - "| Bream| 290.0| 24.0| 26.3| 31.2| 12.48|4.3056|\n", - "| Bream| 340.0| 23.9| 26.5| 31.1|12.3778|4.6961|\n", - "| Bream| 363.0| 26.3| 29.0| 33.5| 12.73|4.4555|\n", - "| Bream| 430.0| 26.5| 29.0| 34.0| 12.444| 5.134|\n", - "| Bream| 450.0| 26.8| 29.7| 34.7|13.6024|4.9274|\n", - "| Bream| 500.0| 26.8| 29.7| 34.5|14.1795|5.2785|\n", - "| Bream| 390.0| 27.6| 30.0| 35.0| 12.67| 4.69|\n", - "| Bream| 450.0| 27.6| 30.0| 35.1|14.0049|4.8438|\n", - "| Bream| 500.0| 28.5| 30.7| 36.2|14.2266|4.9594|\n", - "| Bream| 475.0| 28.4| 31.0| 36.2|14.2628|5.1042|\n", - "| Bream| 500.0| 28.7| 31.0| 36.2|14.3714|4.8146|\n", - "| Bream| 500.0| 29.1| 31.5| 36.4|13.7592| 4.368|\n", - "| Bream| 340.0| 29.5| 32.0| 37.3|13.9129|5.0728|\n", - "| Bream| 600.0| 29.4| 32.0| 37.2|14.9544|5.1708|\n", - "| Bream| 600.0| 29.4| 32.0| 37.2| 15.438| 5.58|\n", - "| Bream| 700.0| 30.4| 33.0| 38.3|14.8604|5.2854|\n", - "| Bream| 700.0| 30.4| 33.0| 38.5| 14.938|5.1975|\n", - "| Bream| 610.0| 30.9| 33.5| 38.6| 15.633|5.1338|\n", - "| Bream| 650.0| 31.0| 33.5| 38.7|14.4738|5.7276|\n", - "+-------+------+-------+-------+-------+-------+------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Creating a New Column" - ], - "metadata": { - "id": "fCiQ5-e6Y_uD" - } - }, - { - "cell_type": "code", - "source": [ - "df = df.withColumn('Weight in Kg', df.Weight/1000)" - ], - "metadata": { - "id": "OoRAx0zKZD0r" - }, - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Checking the Updated DataFrame\n" - ], - "metadata": { - "id": "lhwSPTh-ZHGH" - } - }, - { - "cell_type": "code", - "source": [ - "df.show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "1btXaw6vZJfv", - "outputId": "f0dc6e23-02e0-426c-f165-7eca346d5c95" - }, - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+-------+------+-------+-------+-------+-------+------+------------+\n", - "|Species|Weight|Length1|Length2|Length3| Height| Width|Weight in Kg|\n", - "+-------+------+-------+-------+-------+-------+------+------------+\n", - "| Bream| 242.0| 23.2| 25.4| 30.0| 11.52| 4.02| 0.242|\n", - "| Bream| 290.0| 24.0| 26.3| 31.2| 12.48|4.3056| 0.29|\n", - "| Bream| 340.0| 23.9| 26.5| 31.1|12.3778|4.6961| 0.34|\n", - "| Bream| 363.0| 26.3| 29.0| 33.5| 12.73|4.4555| 0.363|\n", - "| Bream| 430.0| 26.5| 29.0| 34.0| 12.444| 5.134| 0.43|\n", - "| Bream| 450.0| 26.8| 29.7| 34.7|13.6024|4.9274| 0.45|\n", - "| Bream| 500.0| 26.8| 29.7| 34.5|14.1795|5.2785| 0.5|\n", - "| Bream| 390.0| 27.6| 30.0| 35.0| 12.67| 4.69| 0.39|\n", - "| Bream| 450.0| 27.6| 30.0| 35.1|14.0049|4.8438| 0.45|\n", - "| Bream| 500.0| 28.5| 30.7| 36.2|14.2266|4.9594| 0.5|\n", - "| Bream| 475.0| 28.4| 31.0| 36.2|14.2628|5.1042| 0.475|\n", - "| Bream| 500.0| 28.7| 31.0| 36.2|14.3714|4.8146| 0.5|\n", - "| Bream| 500.0| 29.1| 31.5| 36.4|13.7592| 4.368| 0.5|\n", - "| Bream| 340.0| 29.5| 32.0| 37.3|13.9129|5.0728| 0.34|\n", - "| Bream| 600.0| 29.4| 32.0| 37.2|14.9544|5.1708| 0.6|\n", - "| Bream| 600.0| 29.4| 32.0| 37.2| 15.438| 5.58| 0.6|\n", - "| Bream| 700.0| 30.4| 33.0| 38.3|14.8604|5.2854| 0.7|\n", - "| Bream| 700.0| 30.4| 33.0| 38.5| 14.938|5.1975| 0.7|\n", - "| Bream| 610.0| 30.9| 33.5| 38.6| 15.633|5.1338| 0.61|\n", - "| Bream| 650.0| 31.0| 33.5| 38.7|14.4738|5.7276| 0.65|\n", - "+-------+------+-------+-------+-------+-------+------+------------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Renaming an Existing Column in PySpark DataFrame\n" - ], - "metadata": { - "id": "4N0umJ_BZLz3" - } - }, - { - "cell_type": "markdown", - "source": [ - "# Checking the Current PySpark DataFrame" - ], - "metadata": { - "id": "vrEzPB7IZORL" - } - }, - { - "cell_type": "code", - "source": [ - "df.show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "DqYVlcNUZN7p", - "outputId": "7103be5a-8881-40c3-e3aa-e3e8e0fe7705" - }, - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+-------+------+-------+-------+-------+-------+------+------------+\n", - "|Species|Weight|Length1|Length2|Length3| Height| Width|Weight in Kg|\n", - "+-------+------+-------+-------+-------+-------+------+------------+\n", - "| Bream| 242.0| 23.2| 25.4| 30.0| 11.52| 4.02| 0.242|\n", - "| Bream| 290.0| 24.0| 26.3| 31.2| 12.48|4.3056| 0.29|\n", - "| Bream| 340.0| 23.9| 26.5| 31.1|12.3778|4.6961| 0.34|\n", - "| Bream| 363.0| 26.3| 29.0| 33.5| 12.73|4.4555| 0.363|\n", - "| Bream| 430.0| 26.5| 29.0| 34.0| 12.444| 5.134| 0.43|\n", - "| Bream| 450.0| 26.8| 29.7| 34.7|13.6024|4.9274| 0.45|\n", - "| Bream| 500.0| 26.8| 29.7| 34.5|14.1795|5.2785| 0.5|\n", - "| Bream| 390.0| 27.6| 30.0| 35.0| 12.67| 4.69| 0.39|\n", - "| Bream| 450.0| 27.6| 30.0| 35.1|14.0049|4.8438| 0.45|\n", - "| Bream| 500.0| 28.5| 30.7| 36.2|14.2266|4.9594| 0.5|\n", - "| Bream| 475.0| 28.4| 31.0| 36.2|14.2628|5.1042| 0.475|\n", - "| Bream| 500.0| 28.7| 31.0| 36.2|14.3714|4.8146| 0.5|\n", - "| Bream| 500.0| 29.1| 31.5| 36.4|13.7592| 4.368| 0.5|\n", - "| Bream| 340.0| 29.5| 32.0| 37.3|13.9129|5.0728| 0.34|\n", - "| Bream| 600.0| 29.4| 32.0| 37.2|14.9544|5.1708| 0.6|\n", - "| Bream| 600.0| 29.4| 32.0| 37.2| 15.438| 5.58| 0.6|\n", - "| Bream| 700.0| 30.4| 33.0| 38.3|14.8604|5.2854| 0.7|\n", - "| Bream| 700.0| 30.4| 33.0| 38.5| 14.938|5.1975| 0.7|\n", - "| Bream| 610.0| 30.9| 33.5| 38.6| 15.633|5.1338| 0.61|\n", - "| Bream| 650.0| 31.0| 33.5| 38.7|14.4738|5.7276| 0.65|\n", - "+-------+------+-------+-------+-------+-------+------+------------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Renaming the Column\n" - ], - "metadata": { - "id": "zjHt7IsIZS1A" - } - }, - { - "cell_type": "code", - "source": [ - "df = df.withColumnRenamed(\"Weight in Kg\", \"Weight in Kilograms\")" - ], - "metadata": { - "id": "vZDmqdEFZWOV" - }, - "execution_count": 17, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Checking the updated PySpark DataFrame" - ], - "metadata": { - "id": "DIQa3FRBZYGb" - } - }, - { - "cell_type": "code", - "source": [ - "df.show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "c_UHQ8XWZiVF", - "outputId": "b087cb0b-681e-4ac5-91aa-b8e32435c428" - }, - "execution_count": 11, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+-------+------+-------+-------+-------+-------+------+-------------------+\n", - "|Species|Weight|Length1|Length2|Length3| Height| Width|Weight in Kilograms|\n", - "+-------+------+-------+-------+-------+-------+------+-------------------+\n", - "| Bream| 242.0| 23.2| 25.4| 30.0| 11.52| 4.02| 0.242|\n", - "| Bream| 290.0| 24.0| 26.3| 31.2| 12.48|4.3056| 0.29|\n", - "| Bream| 340.0| 23.9| 26.5| 31.1|12.3778|4.6961| 0.34|\n", - "| Bream| 363.0| 26.3| 29.0| 33.5| 12.73|4.4555| 0.363|\n", - "| Bream| 430.0| 26.5| 29.0| 34.0| 12.444| 5.134| 0.43|\n", - "| Bream| 450.0| 26.8| 29.7| 34.7|13.6024|4.9274| 0.45|\n", - "| Bream| 500.0| 26.8| 29.7| 34.5|14.1795|5.2785| 0.5|\n", - "| Bream| 390.0| 27.6| 30.0| 35.0| 12.67| 4.69| 0.39|\n", - "| Bream| 450.0| 27.6| 30.0| 35.1|14.0049|4.8438| 0.45|\n", - "| Bream| 500.0| 28.5| 30.7| 36.2|14.2266|4.9594| 0.5|\n", - "| Bream| 475.0| 28.4| 31.0| 36.2|14.2628|5.1042| 0.475|\n", - "| Bream| 500.0| 28.7| 31.0| 36.2|14.3714|4.8146| 0.5|\n", - "| Bream| 500.0| 29.1| 31.5| 36.4|13.7592| 4.368| 0.5|\n", - "| Bream| 340.0| 29.5| 32.0| 37.3|13.9129|5.0728| 0.34|\n", - "| Bream| 600.0| 29.4| 32.0| 37.2|14.9544|5.1708| 0.6|\n", - "| Bream| 600.0| 29.4| 32.0| 37.2| 15.438| 5.58| 0.6|\n", - "| Bream| 700.0| 30.4| 33.0| 38.3|14.8604|5.2854| 0.7|\n", - "| Bream| 700.0| 30.4| 33.0| 38.5| 14.938|5.1975| 0.7|\n", - "| Bream| 610.0| 30.9| 33.5| 38.6| 15.633|5.1338| 0.61|\n", - "| Bream| 650.0| 31.0| 33.5| 38.7|14.4738|5.7276| 0.65|\n", - "+-------+------+-------+-------+-------+-------+------+-------------------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Creating a Column Alias in PySpark DataFrame" - ], - "metadata": { - "id": "fs6mwFAUZkrz" - } - }, - { - "cell_type": "code", - "source": [ - "df.show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "HGG4KSgNZmnZ", - "outputId": "c8156606-03a5-47d8-ead6-14fbb9f6db48" - }, - "execution_count": 12, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+-------+------+-------+-------+-------+-------+------+-------------------+\n", - "|Species|Weight|Length1|Length2|Length3| Height| Width|Weight in Kilograms|\n", - "+-------+------+-------+-------+-------+-------+------+-------------------+\n", - "| Bream| 242.0| 23.2| 25.4| 30.0| 11.52| 4.02| 0.242|\n", - "| Bream| 290.0| 24.0| 26.3| 31.2| 12.48|4.3056| 0.29|\n", - "| Bream| 340.0| 23.9| 26.5| 31.1|12.3778|4.6961| 0.34|\n", - "| Bream| 363.0| 26.3| 29.0| 33.5| 12.73|4.4555| 0.363|\n", - "| Bream| 430.0| 26.5| 29.0| 34.0| 12.444| 5.134| 0.43|\n", - "| Bream| 450.0| 26.8| 29.7| 34.7|13.6024|4.9274| 0.45|\n", - "| Bream| 500.0| 26.8| 29.7| 34.5|14.1795|5.2785| 0.5|\n", - "| Bream| 390.0| 27.6| 30.0| 35.0| 12.67| 4.69| 0.39|\n", - "| Bream| 450.0| 27.6| 30.0| 35.1|14.0049|4.8438| 0.45|\n", - "| Bream| 500.0| 28.5| 30.7| 36.2|14.2266|4.9594| 0.5|\n", - "| Bream| 475.0| 28.4| 31.0| 36.2|14.2628|5.1042| 0.475|\n", - "| Bream| 500.0| 28.7| 31.0| 36.2|14.3714|4.8146| 0.5|\n", - "| Bream| 500.0| 29.1| 31.5| 36.4|13.7592| 4.368| 0.5|\n", - "| Bream| 340.0| 29.5| 32.0| 37.3|13.9129|5.0728| 0.34|\n", - "| Bream| 600.0| 29.4| 32.0| 37.2|14.9544|5.1708| 0.6|\n", - "| Bream| 600.0| 29.4| 32.0| 37.2| 15.438| 5.58| 0.6|\n", - "| Bream| 700.0| 30.4| 33.0| 38.3|14.8604|5.2854| 0.7|\n", - "| Bream| 700.0| 30.4| 33.0| 38.5| 14.938|5.1975| 0.7|\n", - "| Bream| 610.0| 30.9| 33.5| 38.6| 15.633|5.1338| 0.61|\n", - "| Bream| 650.0| 31.0| 33.5| 38.7|14.4738|5.7276| 0.65|\n", - "+-------+------+-------+-------+-------+-------+------+-------------------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Creating Column Column Alias\n", - "\n" - ], - "metadata": { - "id": "HfDsR4q4aAzS" - } - }, - { - "cell_type": "code", - "source": [ - "df.select(df['Weight in Kilograms'].alias(\"Kilograms\")).show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ohVBLfInaC0J", - "outputId": "e42cbe8c-359f-403e-abed-0e23c793e4d0" - }, - "execution_count": 16, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+---------+\n", - "|Kilograms|\n", - "+---------+\n", - "| 0.242|\n", - "| 0.29|\n", - "| 0.34|\n", - "| 0.363|\n", - "| 0.43|\n", - "| 0.45|\n", - "| 0.5|\n", - "| 0.39|\n", - "| 0.45|\n", - "| 0.5|\n", - "| 0.475|\n", - "| 0.5|\n", - "| 0.5|\n", - "| 0.34|\n", - "| 0.6|\n", - "| 0.6|\n", - "| 0.7|\n", - "| 0.7|\n", - "| 0.61|\n", - "| 0.65|\n", - "+---------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ] - } - ] -} \ No newline at end of file diff --git a/PySpark_DataManipulation.ipynb b/PySpark_DataManipulation.ipynb deleted file mode 100644 index 254a32f..0000000 --- a/PySpark_DataManipulation.ipynb +++ /dev/null @@ -1,1032 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "lihDEjAhyqca" - }, - "source": [ - "# Spark 101 - Data Manipulation using Spark\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "o9t08dFEeA9W" - }, - "source": [ - "## **1. Running Spark in Colab**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "e8dsXbdzzHDc" - }, - "source": [ - "### 1.1 Initialize Spark" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "anLHk85lyQ5G", - "outputId": "e1b1f057-ac20-4a87-a1bb-8e910fe3dd22" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting pyspark\n", - " Downloading pyspark-3.4.1.tar.gz (310.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.8/310.8 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: py4j==0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pyspark) (0.10.9.7)\n", - "Building wheels for collected packages: pyspark\n", - " Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285398 sha256=5377e2774d9b2079e8a446797edb3f0cd65734b9f8bdeb07ff2c99f7b9ac184b\n", - " Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834\n", - "Successfully built pyspark\n", - "Installing collected packages: pyspark\n", - "Successfully installed pyspark-3.4.1\n" - ] - } - ], - "source": [ - "!pip install pyspark" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "rt3m6PxECGIY", - "outputId": "698a4c0b-6fc9-4178-fb0b-9df581684e60" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting findspark\n", - " Downloading findspark-2.0.1-py2.py3-none-any.whl (4.4 kB)\n", - "Installing collected packages: findspark\n", - "Successfully installed findspark-2.0.1\n" - ] - } - ], - "source": [ - "!pip install findspark" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "gJ6HCddh0rBI" - }, - "outputs": [], - "source": [ - "import findspark\n", - "findspark.init()\n", - "from pyspark.sql import SparkSession\n", - "spark = SparkSession.builder.master(\"local[*]\").getOrCreate()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "77fvmo9F0szJ", - "outputId": "abdb4099-e584-451e-f3fc-4cc075531564" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-----+\n", - "|hello|\n", - "+-----+\n", - "|spark|\n", - "+-----+\n", - "\n" - ] - } - ], - "source": [ - "# Check if spark is initialized\n", - "df = spark.sql('''select 'Sucesso total, estamos online!' as hello''')\n", - "df.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "QWbHuaova0PH" - }, - "outputs": [], - "source": [ - "# Import spark libraries\n", - "from pyspark.sql import Row, DataFrame\n", - "from pyspark.sql.types import StringType, StructType, StructField, IntegerType\n", - "from pyspark.sql.functions import col, expr, lit, substring, concat, concat_ws, when, coalesce\n", - "from pyspark.sql import functions as F # for more sql functions\n", - "from functools import reduce" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tDLaLZjzzpWg" - }, - "source": [ - "# Data manipulation using spark" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "1Ql57n4Iz4kX", - "outputId": "ed890508-2f5c-4de4-c1f7-c0f067821804" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "df.count : 561\n", - "df.col ct : 6\n", - "df.columns: ['Bank Name', 'City', 'ST', 'CERT', 'Acquiring Institution', 'Closing Date']\n" - ] - } - ], - "source": [ - "df = spark.read.csv('banklist.csv', sep = ',', inferSchema = True, header = True)\n", - "\n", - "print('df.count :', df.count())\n", - "print('df.col ct :', len(df.columns))\n", - "print('df.columns:', df.columns)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lviT9J99TMmr" - }, - "source": [ - "### **3. Using SQL in spark**" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Jce3V6YyT_tm", - "outputId": "3b1360dc-bbf9-4584-f501-f66baf608816" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------------------+-------------+------------+\n", - "|Bank Name |City |Closing Date|\n", - "+--------------------------------+-------------+------------+\n", - "|The First State Bank |Barboursville|3-Apr-20 |\n", - "|Ericson State Bank |Ericson |14-Feb-20 |\n", - "|City National Bank of New Jersey|Newark |1-Nov-19 |\n", - "|Resolute Bank |Maumee |25-Oct-19 |\n", - "+--------------------------------+-------------+------------+\n", - "only showing top 4 rows\n", - "\n" - ] - } - ], - "source": [ - "df.createOrReplaceTempView(\"banklist\")\n", - "\n", - "df_check = spark.sql('''select `Bank Name`, City, `Closing Date` from banklist''')\n", - "df_check.show(4, truncate=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A_h4nezoVoMP" - }, - "source": [ - "## **4 Dataframe Basic Operations**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NLoqnwVgtucp" - }, - "source": [ - "### 4.1 Describe dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "LJSOwS4Eturw", - "outputId": "196643d7-35c5-4393-9ee0-95962684f637" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-------+--------------------+-------+----+-----------------+---------------------+------------+\n", - "|summary| Bank Name| City| ST| CERT|Acquiring Institution|Closing Date|\n", - "+-------+--------------------+-------+----+-----------------+---------------------+------------+\n", - "| count| 561| 561| 561| 561| 561| 561|\n", - "| mean| null| null|null|31685.68449197861| null| null|\n", - "| stddev| null| null|null|16446.65659309965| null| null|\n", - "| min|1st American Stat...|Acworth| AL| 91| 1st United Bank| 1-Aug-08|\n", - "| max| ebank|Wyoming| WY| 58701| Your Community Bank| 9-Sep-11|\n", - "+-------+--------------------+-------+----+-----------------+---------------------+------------+\n", - "\n" - ] - } - ], - "source": [ - "df.describe().show()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "FwVXN5fZn3Jz", - "outputId": "8a0fa666-647a-4939-934a-8460e06c5447" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-------+-------+----+\n", - "|summary| City| ST|\n", - "+-------+-------+----+\n", - "| count| 561| 561|\n", - "| mean| null|null|\n", - "| stddev| null|null|\n", - "| min|Acworth| AL|\n", - "| max|Wyoming| WY|\n", - "+-------+-------+----+\n", - "\n" - ] - } - ], - "source": [ - "df.describe('City', 'ST').show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RDht33CnhVL5" - }, - "source": [ - "### 4.2 Counts, Columns and Schema" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "O63DzXi1VoaN", - "outputId": "0961aac2-74f3-4a70-c281-4ed74d575053" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "df.count\t\t: 561\n", - "df.columns\t: ['Bank Name', 'City', 'ST', 'CERT', 'Acquiring Institution', 'Closing Date']\n", - "df dtypes\t: [('Bank Name', 'string'), ('City', 'string'), ('ST', 'string'), ('CERT', 'int'), ('Acquiring Institution', 'string'), ('Closing Date', 'string')]\n", - "df schema 1: StructType([StructField('Bank Name', StringType(), True), StructField('City', StringType(), True), StructField('ST', StringType(), True), StructField('CERT', IntegerType(), True), StructField('Acquiring Institution', StringType(), True), StructField('Closing Date', StringType(), True)])\n" - ] - } - ], - "source": [ - "print('df.count\t\t:', df.count())\n", - "print('df.columns\t:', df.columns)\n", - "print('df dtypes\t:', df.dtypes)\n", - "print('df schema 1:', df.schema)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "AeG5OcIq1vfg", - "outputId": "97ffca11-b0e7-4887-c7ed-a8bd9e2ff5b2" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "df schema 1:\n", - "root\n", - " |-- Bank Name: string (nullable = true)\n", - " |-- City: string (nullable = true)\n", - " |-- ST: string (nullable = true)\n", - " |-- CERT: integer (nullable = true)\n", - " |-- Acquiring Institution: string (nullable = true)\n", - " |-- Closing Date: string (nullable = true)\n", - "\n" - ] - } - ], - "source": [ - "print('df schema 1:')\n", - "df.printSchema()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OSHkkGVBhd-F" - }, - "source": [ - "### 4.3 Remove Duplicates" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "CcqzJ65IV0Fa", - "outputId": "3b77c35f-4010-4a6a-b409-c764a7000d2f" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "df.count\t\t: 561\n", - "df.columns\t: ['Bank Name', 'City', 'ST', 'CERT', 'Acquiring Institution', 'Closing Date']\n" - ] - } - ], - "source": [ - "df = df.dropDuplicates()\n", - "print('df.count\t\t:', df.count())\n", - "print('df.columns\t:', df.columns)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "olIfm2KIicOn" - }, - "source": [ - "### 4.4 Select specific columns" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "3wBMsReAWSUF", - "outputId": "2d75977f-77d8-4a3f-cb70-780a8ccfd05d" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------+\n", - "| Bank Name| City|\n", - "+--------------------+--------+\n", - "| First Bank of Idaho| Ketchum|\n", - "|Amcore Bank, Nati...|Rockford|\n", - "+--------------------+--------+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "df2 = df.select(*['Bank Name', 'City'])\n", - "df2.show(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WMvFVdP_oK7W" - }, - "source": [ - "### 4.5 Select multiple columns" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fS3tpP6qoLVu", - "outputId": "73bcacce-8016-4ff8-bae0-0ce89be7f5cd" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---------------------+--------+------------+--------------------+\n", - "|Acquiring Institution| City|Closing Date| Bank Name|\n", - "+---------------------+--------+------------+--------------------+\n", - "| U.S. Bank, N.A.| Ketchum| 24-Apr-09| First Bank of Idaho|\n", - "| Harris N.A.|Rockford| 23-Apr-10|Amcore Bank, Nati...|\n", - "+---------------------+--------+------------+--------------------+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "col_l = list(set(df.columns) - {'CERT','ST'})\n", - "df2 = df.select(*col_l)\n", - "df2.show(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Zfc5uWaoi_R2" - }, - "source": [ - "### 4.6 Rename columns" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "PdOkLYzlisvy", - "outputId": "5172404f-8533-4a28-bb9c-4cdb0576280d" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------+-----+-----+---------------+------------+\n", - "| bank_name| City|state| cert|acq_institution|closing_date|\n", - "+--------------------+--------+-----+-----+---------------+------------+\n", - "| First Bank of Idaho| Ketchum| ID|34396|U.S. Bank, N.A.| 24-Apr-09|\n", - "|Amcore Bank, Nati...|Rockford| IL| 3735| Harris N.A.| 23-Apr-10|\n", - "+--------------------+--------+-----+-----+---------------+------------+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "df2 = df \\\n", - " .withColumnRenamed('Bank Name' , 'bank_name') \\\n", - " .withColumnRenamed('Acquiring Institution', 'acq_institution') \\\n", - " .withColumnRenamed('Closing Date' , 'closing_date') \\\n", - " .withColumnRenamed('ST' , 'state') \\\n", - " .withColumnRenamed('CERT' , 'cert') #\\\n", - "\n", - "df2.show(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "veAUD18tl6oG" - }, - "source": [ - "### 4.7 Rename columns using loop" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0gnKDBPjjpka", - "outputId": "d72eb7fa-e2cc-48c2-c487-f5eff71b6982" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------+---+-----+---------------------+------------+\n", - "| Bank_Name| City| ST| CERT|Acquiring_Institution|Closing_Date|\n", - "+--------------------+--------+---+-----+---------------------+------------+\n", - "| First Bank of Idaho| Ketchum| ID|34396| U.S. Bank, N.A.| 24-Apr-09|\n", - "|Amcore Bank, Nati...|Rockford| IL| 3735| Harris N.A.| 23-Apr-10|\n", - "+--------------------+--------+---+-----+---------------------+------------+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "rename_expr = [col(column).alias(column.replace(' ', '_')) for column in df.columns]\n", - "\n", - "df2 = df.select(*rename_expr)\n", - "df2.show(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "momxuupJnqPd" - }, - "source": [ - "### 4.8 Add columns" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "uDKwdHg5l_nm", - "outputId": "523ec74f-d6c1-43d2-bd6c-a1ff1b7dde78" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------+---+-----+---------------------+------------+-----+\n", - "| Bank Name| City| ST| CERT|Acquiring Institution|Closing Date|state|\n", - "+--------------------+--------+---+-----+---------------------+------------+-----+\n", - "| First Bank of Idaho| Ketchum| ID|34396| U.S. Bank, N.A.| 24-Apr-09| ID|\n", - "|Amcore Bank, Nati...|Rockford| IL| 3735| Harris N.A.| 23-Apr-10| IL|\n", - "+--------------------+--------+---+-----+---------------------+------------+-----+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "df2 = df.withColumn('state', col('ST'))\n", - "df2.show(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EjkXUUpvou7B" - }, - "source": [ - "### 4.9 Add constant column" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0-gSou1GnpBd", - "outputId": "50611507-a1ac-4799-f9dd-35582acfd4ea" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------+---+-----+---------------------+------------+-------+\n", - "| Bank Name| City| ST| CERT|Acquiring Institution|Closing Date|country|\n", - "+--------------------+--------+---+-----+---------------------+------------+-------+\n", - "| First Bank of Idaho| Ketchum| ID|34396| U.S. Bank, N.A.| 24-Apr-09| US|\n", - "|Amcore Bank, Nati...|Rockford| IL| 3735| Harris N.A.| 23-Apr-10| US|\n", - "+--------------------+--------+---+-----+---------------------+------------+-------+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "df2 = df.withColumn('country', lit('US'))\n", - "df2.show(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wx7fAtmqZxSH" - }, - "source": [ - "### 4.10 Drop columns" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fAKuWe7nZxmc", - "outputId": "77278d54-efe0-4c4f-989f-ad3a4f701fea" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------+---+---------------------+------------+\n", - "| Bank Name| City| ST|Acquiring Institution|Closing Date|\n", - "+--------------------+--------+---+---------------------+------------+\n", - "| First Bank of Idaho| Ketchum| ID| U.S. Bank, N.A.| 24-Apr-09|\n", - "|Amcore Bank, Nati...|Rockford| IL| Harris N.A.| 23-Apr-10|\n", - "+--------------------+--------+---+---------------------+------------+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "df2 = df.drop('CERT')\n", - "df2.show(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AG7tuEZpaGll" - }, - "source": [ - "### 4.11 Drop multiple columns" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "56E1uJPQpZJH", - "outputId": "8c3d1e2e-f984-44a5-d6fa-9aa0a013febf" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------+---------------------+------------+\n", - "| Bank Name| City|Acquiring Institution|Closing Date|\n", - "+--------------------+--------+---------------------+------------+\n", - "| First Bank of Idaho| Ketchum| U.S. Bank, N.A.| 24-Apr-09|\n", - "|Amcore Bank, Nati...|Rockford| Harris N.A.| 23-Apr-10|\n", - "+--------------------+--------+---------------------+------------+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "df2 = df.drop(*['CERT','ST'])\n", - "df2.show(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "WuVEE3SqaFvC", - "outputId": "9b4c70e2-8812-4282-fdc9-84c0dd435bf3" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------+---------------------+------------+\n", - "| Bank Name| City|Acquiring Institution|Closing Date|\n", - "+--------------------+--------+---------------------+------------+\n", - "| First Bank of Idaho| Ketchum| U.S. Bank, N.A.| 24-Apr-09|\n", - "|Amcore Bank, Nati...|Rockford| Harris N.A.| 23-Apr-10|\n", - "+--------------------+--------+---------------------+------------+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "df2 = reduce(DataFrame.drop, ['CERT','ST'], df)\n", - "df2.show(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PhuwX3AwbRiu" - }, - "source": [ - "### 4.12 Filter data" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "yeA5BOpJbD5O", - "outputId": "95398d26-e396-4b64-fae1-feb8a3b56386" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "df.count : 561\n", - "df2.count : 4\n", - "df3.count : 9\n", - "df4.count : 73\n" - ] - } - ], - "source": [ - "# Equal to values\n", - "df2 = df.where(df['ST'] == 'NE')\n", - "\n", - "# Between values\n", - "df3 = df.where(df['CERT'].between('1000','2000'))\n", - "\n", - "# Is inside multiple values\n", - "df4 = df.where(df['ST'].isin('NE','IL'))\n", - "\n", - "print('df.count :', df.count())\n", - "print('df2.count :', df2.count())\n", - "print('df3.count :', df3.count())\n", - "print('df4.count :', df4.count())\n" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "k0fL-hmR7XAU", - "outputId": "0b6c1a97-fc04-48b9-b810-105bfd34df70" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "df2 sample below\n", - "+-------------------+---------+---+-----+---------------------+------------+\n", - "| Bank Name| City| ST| CERT|Acquiring Institution|Closing Date|\n", - "+-------------------+---------+---+-----+---------------------+------------+\n", - "| TierOne Bank| Lincoln| NE|29341| Great Western Bank| 4-Jun-10|\n", - "|Sherman County Bank|Loup City| NE| 5431| Heritage Bank| 13-Feb-09|\n", - "+-------------------+---------+---+-----+---------------------+------------+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "# Equal to values\n", - "df2 = df.where(df['ST'] == 'NE')\n", - "\n", - "print('\\ndf2 sample below')\n", - "df2.show(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WvyiT6Virnk0" - }, - "source": [ - "### 4.13 Filter data using logical operators" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "d2jx8gV3q70-", - "outputId": "6ae8b94b-6658-4bbf-bba8-4821dc291c1f" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+------------------+-------+---+-----+---------------------+------------+\n", - "| Bank Name| City| ST| CERT|Acquiring Institution|Closing Date|\n", - "+------------------+-------+---+-----+---------------------+------------+\n", - "|Ericson State Bank|Ericson| NE|18265| Farmers and Merch...| 14-Feb-20|\n", - "+------------------+-------+---+-----+---------------------+------------+\n", - "\n" - ] - } - ], - "source": [ - "df2 = df.where((df['ST'] == 'NE') & (df['City'] == 'Ericson'))\n", - "df2.show(3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bImSbNBFcsNl" - }, - "source": [ - "### 4.14 Cast datatypes" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "5kUFFW4FcsNp", - "outputId": "36c86cf4-0c22-45ca-ed38-bbe129e5e560" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "==================================================\n", - "Pre cast\n", - "root\n", - " |-- Bank Name: string (nullable = true)\n", - " |-- City: string (nullable = true)\n", - " |-- ST: string (nullable = true)\n", - " |-- CERT: integer (nullable = true)\n", - " |-- Acquiring Institution: string (nullable = true)\n", - " |-- Closing Date: string (nullable = true)\n", - "\n", - "None\n", - "==================================================\n", - "Post cast\n", - "root\n", - " |-- Bank Name: string (nullable = true)\n", - " |-- City: string (nullable = true)\n", - " |-- ST: string (nullable = true)\n", - " |-- CERT: integer (nullable = true)\n", - " |-- Acquiring Institution: string (nullable = true)\n", - " |-- Closing Date: string (nullable = true)\n", - " |-- CERT_str1: string (nullable = true)\n", - " |-- CERT_str2: string (nullable = true)\n", - "\n", - "None\n" - ] - } - ], - "source": [ - "print('='*50)\n", - "print('Pre cast')\n", - "print(df.printSchema())\n", - "\n", - "df2 = df \\\n", - ".withColumn('CERT_str1', df['CERT'].cast('string')) \\\n", - ".withColumn('CERT_str2', df['CERT'].cast(StringType())) #\\\n", - "\n", - "print('='*50)\n", - "print('Post cast')\n", - "print(df2.printSchema())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BOAWTJcIVLrs" - }, - "source": [ - "### 4.15 Replace values in dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "vJkLrgi-VMqM", - "outputId": "c9c09051-10ef-4234-d2e9-37a7cc202d87" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------+---+-----+---------------------+------------+\n", - "| Bank Name| City| ST| CERT|Acquiring Institution|Closing Date|\n", - "+--------------------+--------+---+-----+---------------------+------------+\n", - "| First Bank of Idaho| Ketchum| ID|34396| U.S. Bank, N.A.| 24-Apr-09|\n", - "|Amcore Bank, Nati...|Rockford| IL| 3735| Harris N.A.| 23-Apr-10|\n", - "+--------------------+--------+---+-----+---------------------+------------+\n", - "only showing top 2 rows\n", - "\n", - "Replace 7 in the above dataframe with 17 at all instances\n", - "+--------------------+--------+---+-----+---------------------+------------+\n", - "| Bank Name| City| ST| CERT|Acquiring Institution|Closing Date|\n", - "+--------------------+--------+---+-----+---------------------+------------+\n", - "| First Bank of Idaho| Ketchum| ID|34396| U.S. Bank, N.A.| 24-Apr-09|\n", - "|Amcore Bank, Nati...|Rockford| IL| 3735| Harris N.A.| 23-Apr-10|\n", - "+--------------------+--------+---+-----+---------------------+------------+\n", - "only showing top 2 rows\n", - "\n" - ] - } - ], - "source": [ - "# Pre replace\n", - "df.show(2)\n", - "\n", - "# Post replace\n", - "print('Replace 7 in the above dataframe with 17 at all instances')\n", - "df.na.replace(7,17).show(2)" - ] - } - ], - "metadata": { - "colab": { - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/PySpark_Dataframe_Commom_Data_Operatorios.ipynb b/PySpark_Dataframe_Commom_Data_Operatorios.ipynb deleted file mode 100644 index 1b69940..0000000 --- a/PySpark_Dataframe_Commom_Data_Operatorios.ipynb +++ /dev/null @@ -1,569 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Creating New Column in PySpark DataFrame" - ], - "metadata": { - "id": "PgHnsi52fTxr" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Installing Libraries, Importing the Libraries and Creating Spark Session" - ], - "metadata": { - "id": "xD2_W_08fXs_" - } - }, - { - "cell_type": "code", - "source": [ - "!pip install pyspark\n", - "import pyspark\n", - "from pyspark.sql import SparkSession\n", - "spark=SparkSession.builder.appName(\"pysparkdf\").getOrCreate()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "RP1guNIYe2U7", - "outputId": "b8f248e5-2ae2-47e5-b5df-f70117e3481d" - }, - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting pyspark\n", - " Downloading pyspark-3.4.1.tar.gz (310.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.8/310.8 MB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: py4j==0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pyspark) (0.10.9.7)\n", - "Building wheels for collected packages: pyspark\n", - " Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285398 sha256=115e6c8daeb33de125d001c462a71282203f6c1fd4b93692ca6a751c13036e2e\n", - " Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834\n", - "Successfully built pyspark\n", - "Installing collected packages: pyspark\n", - "Successfully installed pyspark-3.4.1\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Importing Data" - ], - "metadata": { - "id": "7I3EIkpnfrrT" - } - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "0AMDPqUkesch" - }, - "outputs": [], - "source": [ - "df = spark.read.csv('cereal.csv', sep = ',', inferSchema = True, header = True)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Reading the Schema" - ], - "metadata": { - "id": "ucoOWQ1NfuZp" - } - }, - { - "cell_type": "code", - "source": [ - "df.printSchema()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "l9ZEXMCKfxoE", - "outputId": "65534ccd-829c-43a9-b419-d3316154d2a6" - }, - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "root\n", - " |-- name: string (nullable = true)\n", - " |-- mfr: string (nullable = true)\n", - " |-- type: string (nullable = true)\n", - " |-- calories: integer (nullable = true)\n", - " |-- protein: integer (nullable = true)\n", - " |-- fat: integer (nullable = true)\n", - " |-- sodium: integer (nullable = true)\n", - " |-- fiber: double (nullable = true)\n", - " |-- carbo: double (nullable = true)\n", - " |-- sugars: integer (nullable = true)\n", - " |-- potass: integer (nullable = true)\n", - " |-- vitamins: integer (nullable = true)\n", - " |-- shelf: integer (nullable = true)\n", - " |-- weight: double (nullable = true)\n", - " |-- cups: double (nullable = true)\n", - " |-- rating: double (nullable = true)\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# select()" - ], - "metadata": { - "id": "8CryY4XBg2Dy" - } - }, - { - "cell_type": "code", - "source": [ - "df.select('name', 'mfr', 'rating').show(10)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "4qEK9Hh-f5z5", - "outputId": "84b9939d-83bd-49cb-8b36-a87248aba6e0" - }, - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+--------------------+---+---------+\n", - "| name|mfr| rating|\n", - "+--------------------+---+---------+\n", - "| 100% Bran| N|68.402973|\n", - "| 100% Natural Bran| Q|33.983679|\n", - "| All-Bran| K|59.425505|\n", - "|All-Bran with Ext...| K|93.704912|\n", - "| Almond Delight| R|34.384843|\n", - "|Apple Cinnamon Ch...| G|29.509541|\n", - "| Apple Jacks| K|33.174094|\n", - "| Basic 4| G|37.038562|\n", - "| Bran Chex| R|49.120253|\n", - "| Bran Flakes| P|53.313813|\n", - "+--------------------+---+---------+\n", - "only showing top 10 rows\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# withColumn()" - ], - "metadata": { - "id": "YFkz5fi3g4rN" - } - }, - { - "cell_type": "code", - "source": [ - "df.withColumn(\"Calories\",df['calories'].cast(\"Integer\")).printSchema()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Kym19ml9f9Ui", - "outputId": "72fc23fe-acd9-4c13-894d-651d012044e8" - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "root\n", - " |-- name: string (nullable = true)\n", - " |-- mfr: string (nullable = true)\n", - " |-- type: string (nullable = true)\n", - " |-- Calories: integer (nullable = true)\n", - " |-- protein: integer (nullable = true)\n", - " |-- fat: integer (nullable = true)\n", - " |-- sodium: integer (nullable = true)\n", - " |-- fiber: double (nullable = true)\n", - " |-- carbo: double (nullable = true)\n", - " |-- sugars: integer (nullable = true)\n", - " |-- potass: integer (nullable = true)\n", - " |-- vitamins: integer (nullable = true)\n", - " |-- shelf: integer (nullable = true)\n", - " |-- weight: double (nullable = true)\n", - " |-- cups: double (nullable = true)\n", - " |-- rating: double (nullable = true)\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# groupBy()" - ], - "metadata": { - "id": "Avu4Pd5Qg7Fh" - } - }, - { - "cell_type": "code", - "source": [ - "df.groupBy(\"name\", \"calories\").count().show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "RYs1skB4f_mB", - "outputId": "ad33c3f3-5626-45c7-8a0e-7a66d7015de4" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+--------------------+--------+-----+\n", - "| name|calories|count|\n", - "+--------------------+--------+-----+\n", - "|Just Right Fruit ...| 140| 1|\n", - "| Raisin Bran| 120| 1|\n", - "|Shredded Wheat sp...| 90| 1|\n", - "| Corn Pops| 110| 1|\n", - "| Honey Nut Cheerios| 110| 1|\n", - "|Muesli Raisins; D...| 150| 1|\n", - "| Fruity Pebbles| 110| 1|\n", - "| 100% Bran| 70| 1|\n", - "| Fruitful Bran| 120| 1|\n", - "| Puffed Rice| 50| 1|\n", - "| Raisin Squares| 90| 1|\n", - "| Total Raisin Bran| 140| 1|\n", - "| Golden Grahams| 110| 1|\n", - "| Nutri-grain Wheat| 90| 1|\n", - "| 100% Natural Bran| 120| 1|\n", - "|Apple Cinnamon Ch...| 110| 1|\n", - "|Mueslix Crispy Blend| 160| 1|\n", - "|Shredded Wheat 'n...| 90| 1|\n", - "| Smacks| 110| 1|\n", - "| Quaker Oatmeal| 100| 1|\n", - "+--------------------+--------+-----+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# orderBy()" - ], - "metadata": { - "id": "461dECcGg-Cc" - } - }, - { - "cell_type": "code", - "source": [ - "df.orderBy(\"protein\").show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "mAsEHrKmgCC0", - "outputId": "73abc952-78f0-4b4f-d014-4ac92ffb4611" - }, - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+\n", - "| name|mfr|type|calories|protein|fat|sodium|fiber|carbo|sugars|potass|vitamins|shelf|weight|cups| rating|\n", - "+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+\n", - "| Cap'n'Crunch| Q| C| 120| 1| 2| 220| 0.0| 12.0| 12| 35| 25| 2| 1.0|0.75|18.042851|\n", - "|Cinnamon Toast Cr...| G| C| 120| 1| 3| 210| 0.0| 13.0| 9| 45| 25| 2| 1.0|0.75|19.823573|\n", - "| Cocoa Puffs| G| C| 110| 1| 1| 180| 0.0| 12.0| 13| 55| 25| 2| 1.0| 1.0|22.736446|\n", - "| Corn Pops| K| C| 110| 1| 0| 90| 1.0| 13.0| 12| 20| 25| 2| 1.0| 1.0|35.782791|\n", - "| Count Chocula| G| C| 110| 1| 1| 180| 0.0| 12.0| 13| 65| 25| 2| 1.0| 1.0|22.396513|\n", - "| Frosted Flakes| K| C| 110| 1| 0| 200| 1.0| 14.0| 11| 25| 25| 1| 1.0|0.75|31.435973|\n", - "| Fruity Pebbles| P| C| 110| 1| 1| 135| 0.0| 13.0| 12| 25| 25| 2| 1.0|0.75|28.025765|\n", - "| Golden Grahams| G| C| 110| 1| 1| 280| 0.0| 15.0| 9| 45| 25| 2| 1.0|0.75|23.804043|\n", - "| Honey Graham Ohs| Q| C| 120| 1| 2| 220| 1.0| 12.0| 11| 45| 25| 2| 1.0| 1.0|21.871292|\n", - "| Honey-comb| P| C| 110| 1| 0| 180| 0.0| 14.0| 11| 35| 25| 1| 1.0|1.33|28.742414|\n", - "| Puffed Rice| Q| C| 50| 1| 0| 0| 0.0| 13.0| 0| 15| 0| 3| 0.5| 1.0|60.756112|\n", - "| Rice Chex| R| C| 110| 1| 0| 240| 0.0| 23.0| 2| 30| 25| 1| 1.0|1.13|41.998933|\n", - "| Trix| G| C| 110| 1| 1| 140| 0.0| 13.0| 12| 25| 25| 2| 1.0| 1.0|27.753301|\n", - "| Froot Loops| K| C| 110| 2| 1| 125| 1.0| 11.0| 13| 30| 25| 2| 1.0| 1.0|32.207582|\n", - "|Apple Cinnamon Ch...| G| C| 110| 2| 2| 180| 1.5| 10.5| 10| 70| 25| 1| 1.0|0.75|29.509541|\n", - "| Crispix| K| C| 110| 2| 0| 220| 1.0| 21.0| 3| 30| 25| 3| 1.0| 1.0|46.895644|\n", - "| Apple Jacks| K| C| 110| 2| 0| 125| 1.0| 11.0| 14| 30| 25| 2| 1.0| 1.0|33.174094|\n", - "| Corn Flakes| K| C| 100| 2| 0| 290| 1.0| 21.0| 2| 35| 25| 1| 1.0| 1.0|45.863324|\n", - "| Golden Crisp| P| C| 100| 2| 0| 45| 0.0| 11.0| 15| 40| 25| 1| 1.0|0.88|35.252444|\n", - "|Crispy Wheat & Ra...| G| C| 100| 2| 1| 140| 2.0| 11.0| 10| 120| 25| 3| 1.0|0.75|36.176196|\n", - "+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# when()" - ], - "metadata": { - "id": "ZvoLjFLlhC2y" - } - }, - { - "cell_type": "code", - "source": [ - "from pyspark.sql.functions import when" - ], - "metadata": { - "id": "0BkjHZaLggtb" - }, - "execution_count": 14, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "df.select(\"name\", when(df.vitamins >= \"25\", \"rich in vitamins\")).show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "5gta6F8fgeVO", - "outputId": "6efa5fd8-8d12-42b1-9d3d-5dcaebb09a06" - }, - "execution_count": 15, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+--------------------+----------------------------------------------------+\n", - "| name|CASE WHEN (vitamins >= 25) THEN rich in vitamins END|\n", - "+--------------------+----------------------------------------------------+\n", - "| 100% Bran| rich in vitamins|\n", - "| 100% Natural Bran| null|\n", - "| All-Bran| rich in vitamins|\n", - "|All-Bran with Ext...| rich in vitamins|\n", - "| Almond Delight| rich in vitamins|\n", - "|Apple Cinnamon Ch...| rich in vitamins|\n", - "| Apple Jacks| rich in vitamins|\n", - "| Basic 4| rich in vitamins|\n", - "| Bran Chex| rich in vitamins|\n", - "| Bran Flakes| rich in vitamins|\n", - "| Cap'n'Crunch| rich in vitamins|\n", - "| Cheerios| rich in vitamins|\n", - "|Cinnamon Toast Cr...| rich in vitamins|\n", - "| Clusters| rich in vitamins|\n", - "| Cocoa Puffs| rich in vitamins|\n", - "| Corn Chex| rich in vitamins|\n", - "| Corn Flakes| rich in vitamins|\n", - "| Corn Pops| rich in vitamins|\n", - "| Count Chocula| rich in vitamins|\n", - "| Cracklin' Oat Bran| rich in vitamins|\n", - "+--------------------+----------------------------------------------------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# filter()" - ], - "metadata": { - "id": "oCNE9jk7hF_f" - } - }, - { - "cell_type": "code", - "source": [ - "df.filter(df.calories == \"100\").show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "kjVLOMBvgjwn", - "outputId": "7a689dff-5137-4630-950d-9a223b2946fa" - }, - "execution_count": 16, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+\n", - "| name|mfr|type|calories|protein|fat|sodium|fiber|carbo|sugars|potass|vitamins|shelf|weight|cups| rating|\n", - "+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+\n", - "| Corn Flakes| K| C| 100| 2| 0| 290| 1.0| 21.0| 2| 35| 25| 1| 1.0| 1.0|45.863324|\n", - "|Cream of Wheat (Q...| N| H| 100| 3| 0| 80| 1.0| 21.0| 0| -1| 0| 2| 1.0| 1.0|64.533816|\n", - "|Crispy Wheat & Ra...| G| C| 100| 2| 1| 140| 2.0| 11.0| 10| 120| 25| 3| 1.0|0.75|36.176196|\n", - "| Double Chex| R| C| 100| 2| 0| 190| 1.0| 18.0| 5| 80| 25| 3| 1.0|0.75|44.330856|\n", - "| Frosted Mini-Wheats| K| C| 100| 3| 0| 0| 3.0| 14.0| 7| 100| 25| 2| 1.0| 0.8|58.345141|\n", - "| Golden Crisp| P| C| 100| 2| 0| 45| 0.0| 11.0| 15| 40| 25| 1| 1.0|0.88|35.252444|\n", - "| Grape Nuts Flakes| P| C| 100| 3| 1| 140| 3.0| 15.0| 5| 85| 25| 3| 1.0|0.88|52.076897|\n", - "| Life| Q| C| 100| 4| 2| 150| 2.0| 12.0| 6| 95| 25| 2| 1.0|0.67|45.328074|\n", - "| Maypo| A| H| 100| 4| 1| 0| 0.0| 16.0| 3| 95| 25| 2| 1.0| 1.0|54.850917|\n", - "|Multi-Grain Cheerios| G| C| 100| 2| 1| 220| 2.0| 15.0| 6| 90| 25| 1| 1.0| 1.0|40.105965|\n", - "| Product 19| K| C| 100| 3| 0| 320| 1.0| 20.0| 3| 45| 100| 3| 1.0| 1.0| 41.50354|\n", - "| Quaker Oat Squares| Q| C| 100| 4| 1| 135| 2.0| 14.0| 6| 110| 25| 3| 1.0| 0.5|49.511874|\n", - "| Quaker Oatmeal| Q| H| 100| 5| 2| 0| 2.7| -1.0| -1| 110| 0| 1| 1.0|0.67|50.828392|\n", - "| Raisin Nut Bran| G| C| 100| 3| 2| 140| 2.5| 10.5| 8| 140| 25| 3| 1.0| 0.5| 39.7034|\n", - "| Total Whole Grain| G| C| 100| 3| 1| 200| 3.0| 16.0| 3| 110| 100| 3| 1.0| 1.0|46.658844|\n", - "| Wheat Chex| R| C| 100| 3| 1| 230| 3.0| 17.0| 3| 115| 25| 1| 1.0|0.67|49.787445|\n", - "| Wheaties| G| C| 100| 3| 1| 200| 3.0| 17.0| 3| 110| 25| 1| 1.0| 1.0|51.592193|\n", - "+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# isNull()/isNotNull()" - ], - "metadata": { - "id": "-HRkuU4dhImn" - } - }, - { - "cell_type": "code", - "source": [ - "#isNotNull()\n", - "from pyspark.sql.functions import *\n", - "#filter data by null values\n", - "df.filter(df.name.isNotNull()).show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "vnN4l9nTgnFL", - "outputId": "832d73ad-563f-45f3-e4f1-f0d3124c6f4d" - }, - "execution_count": 17, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+\n", - "| name|mfr|type|calories|protein|fat|sodium|fiber|carbo|sugars|potass|vitamins|shelf|weight|cups| rating|\n", - "+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+\n", - "| 100% Bran| N| C| 70| 4| 1| 130| 10.0| 5.0| 6| 280| 25| 3| 1.0|0.33|68.402973|\n", - "| 100% Natural Bran| Q| C| 120| 3| 5| 15| 2.0| 8.0| 8| 135| 0| 3| 1.0| 1.0|33.983679|\n", - "| All-Bran| K| C| 70| 4| 1| 260| 9.0| 7.0| 5| 320| 25| 3| 1.0|0.33|59.425505|\n", - "|All-Bran with Ext...| K| C| 50| 4| 0| 140| 14.0| 8.0| 0| 330| 25| 3| 1.0| 0.5|93.704912|\n", - "| Almond Delight| R| C| 110| 2| 2| 200| 1.0| 14.0| 8| -1| 25| 3| 1.0|0.75|34.384843|\n", - "|Apple Cinnamon Ch...| G| C| 110| 2| 2| 180| 1.5| 10.5| 10| 70| 25| 1| 1.0|0.75|29.509541|\n", - "| Apple Jacks| K| C| 110| 2| 0| 125| 1.0| 11.0| 14| 30| 25| 2| 1.0| 1.0|33.174094|\n", - "| Basic 4| G| C| 130| 3| 2| 210| 2.0| 18.0| 8| 100| 25| 3| 1.33|0.75|37.038562|\n", - "| Bran Chex| R| C| 90| 2| 1| 200| 4.0| 15.0| 6| 125| 25| 1| 1.0|0.67|49.120253|\n", - "| Bran Flakes| P| C| 90| 3| 0| 210| 5.0| 13.0| 5| 190| 25| 3| 1.0|0.67|53.313813|\n", - "| Cap'n'Crunch| Q| C| 120| 1| 2| 220| 0.0| 12.0| 12| 35| 25| 2| 1.0|0.75|18.042851|\n", - "| Cheerios| G| C| 110| 6| 2| 290| 2.0| 17.0| 1| 105| 25| 1| 1.0|1.25|50.764999|\n", - "|Cinnamon Toast Cr...| G| C| 120| 1| 3| 210| 0.0| 13.0| 9| 45| 25| 2| 1.0|0.75|19.823573|\n", - "| Clusters| G| C| 110| 3| 2| 140| 2.0| 13.0| 7| 105| 25| 3| 1.0| 0.5|40.400208|\n", - "| Cocoa Puffs| G| C| 110| 1| 1| 180| 0.0| 12.0| 13| 55| 25| 2| 1.0| 1.0|22.736446|\n", - "| Corn Chex| R| C| 110| 2| 0| 280| 0.0| 22.0| 3| 25| 25| 1| 1.0| 1.0|41.445019|\n", - "| Corn Flakes| K| C| 100| 2| 0| 290| 1.0| 21.0| 2| 35| 25| 1| 1.0| 1.0|45.863324|\n", - "| Corn Pops| K| C| 110| 1| 0| 90| 1.0| 13.0| 12| 20| 25| 2| 1.0| 1.0|35.782791|\n", - "| Count Chocula| G| C| 110| 1| 1| 180| 0.0| 12.0| 13| 65| 25| 2| 1.0| 1.0|22.396513|\n", - "| Cracklin' Oat Bran| K| C| 110| 3| 3| 140| 4.0| 10.0| 7| 160| 25| 3| 1.0| 0.5|40.448772|\n", - "+--------------------+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+---------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# isNull()" - ], - "metadata": { - "id": "UhlLFymIhNDd" - } - }, - { - "cell_type": "code", - "source": [ - "df.filter(df.name.isNull()).show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "uFiatSsOgpUs", - "outputId": "9a655b50-cf12-436d-c2ed-e94cc11677e7" - }, - "execution_count": 18, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+----+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+------+\n", - "|name|mfr|type|calories|protein|fat|sodium|fiber|carbo|sugars|potass|vitamins|shelf|weight|cups|rating|\n", - "+----+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+------+\n", - "+----+---+----+--------+-------+---+------+-----+-----+------+------+--------+-----+------+----+------+\n", - "\n" - ] - } - ] - } - ] -} \ No newline at end of file diff --git a/PySpark_RDD e DataFrame.ipynb b/PySpark_RDD e DataFrame.ipynb deleted file mode 100644 index 1d676b0..0000000 --- a/PySpark_RDD e DataFrame.ipynb +++ /dev/null @@ -1,496 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Configuração de biblioteca do PySpark no Google" - ], - "metadata": { - "id": "kT5SPWFMVURY" - } - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Zdd7ESWeS3U1", - "outputId": "dba2e0d4-cfab-4d0f-dc73-26d1d212aefb" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Requirement already satisfied: pyspark in /usr/local/lib/python3.10/dist-packages (3.4.1)\n", - "Requirement already satisfied: py4j==0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pyspark) (0.10.9.7)\n" - ] - } - ], - "source": [ - "!pip install pyspark" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Criando a sessão do SparkContext e SparkSession" - ], - "metadata": { - "id": "iXZzK_F0VW5R" - } - }, - { - "cell_type": "code", - "source": [ - "from pyspark import SparkContext\n", - "from pyspark.sql import SparkSession" - ], - "metadata": { - "id": "SVC51xpOTF4h" - }, - "execution_count": 29, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "sc = SparkContext.getOrCreate()" - ], - "metadata": { - "id": "uNjNxORcTKEb" - }, - "execution_count": 30, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "spark = SparkSession.builder.appName('PySpark DataFrame From RDD').getOrCreate()" - ], - "metadata": { - "id": "MgwbLZPBTMJF" - }, - "execution_count": 31, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Create PySpark DataFrame From an Existing RDD" - ], - "metadata": { - "id": "NqmeysoNTsw9" - } - }, - { - "cell_type": "code", - "source": [ - "rdd = sc.parallelize([('C',85,76,87,91), ('B',85,76,87,91), (\"A\", 85,78,96,92), (\"A\", 92,76,89,96)], 4)" - ], - "metadata": { - "id": "LbGDD240TOz3" - }, - "execution_count": 32, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "print(type(rdd))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "5xUhfDpLTSFX", - "outputId": "d4b024c7-98cc-403a-a21c-c98bc79086fc" - }, - "execution_count": 33, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "sub = ['Division','English','Mathematics','Physics','Chemistry']\n", - "marks_df = spark.createDataFrame(rdd, schema=sub)" - ], - "metadata": { - "id": "NGzl22XDTUxf" - }, - "execution_count": 34, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "print(type(marks_df))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "w9CrNRlPTW0b", - "outputId": "936c9592-4de1-4d71-9216-7a70d1ffe7a5" - }, - "execution_count": 35, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "marks_df.printSchema()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "H94oSH23TYp9", - "outputId": "b62ae417-1966-4d50-fef4-cf65799f53d8" - }, - "execution_count": 36, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "root\n", - " |-- Division: string (nullable = true)\n", - " |-- English: long (nullable = true)\n", - " |-- Mathematics: long (nullable = true)\n", - " |-- Physics: long (nullable = true)\n", - " |-- Chemistry: long (nullable = true)\n", - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "marks_df.show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Sf4pHHZfTbD_", - "outputId": "863fc2a4-81f0-42d4-a68d-421278299e2d" - }, - "execution_count": 37, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "+--------+-------+-----------+-------+---------+\n", - "|Division|English|Mathematics|Physics|Chemistry|\n", - "+--------+-------+-----------+-------+---------+\n", - "| C| 85| 76| 87| 91|\n", - "| B| 85| 76| 87| 91|\n", - "| A| 85| 78| 96| 92|\n", - "| A| 92| 76| 89| 96|\n", - "+--------+-------+-----------+-------+---------+\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Create PySpark DataFrame From an External File" - ], - "metadata": { - "id": "G7tzGYFoTvvu" - } - }, - { - "cell_type": "code", - "source": [ - "from pyspark.sql import SparkSession" - ], - "metadata": { - "id": "M_rIOv_-TxtX" - }, - "execution_count": 38, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "spark = SparkSession.builder.appName('PySpark DataFrame From External Files').getOrCreate()" - ], - "metadata": { - "id": "56SR03IGTzdx" - }, - "execution_count": 39, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "csv_file = spark.read.csv('Fish.csv', sep = ',', inferSchema = True, header = True)" - ], - "metadata": { - "id": "DgUKPm9bT1IQ" - }, - "execution_count": 40, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "txt_file = spark.read.text(\"example.txt\")" - ], - "metadata": { - "id": "B3KPNVERUEsl" - }, - "execution_count": 41, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "json_file = spark.read.json(\"sample.json\", multiLine=True)" - ], - "metadata": { - "id": "lnEuj94PUUNO" - }, - "execution_count": 42, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "print(type(csv_file))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "B0E4RZcsUXDg", - "outputId": "57d37c63-eea9-441e-f897-1823b37b400d" - }, - "execution_count": 43, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "print(type(txt_file))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "kmAUj9z_UYW_", - "outputId": "f86d43ee-3a4d-4186-fdbf-cad562cc0c81" - }, - "execution_count": 44, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "print(type(json_file))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Miuoeo7VUaIN", - "outputId": "5499c487-e973-4d22-f7f8-86d84d5306e3" - }, - "execution_count": 45, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "csv_file.printSchema()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "iSoTE3sBUcXe", - "outputId": "5aa960ab-18d5-4178-cea6-e0bfa67c7f6b" - }, - "execution_count": 46, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "root\n", - " |-- Species: string (nullable = true)\n", - " |-- Weight: double (nullable = true)\n", - " |-- Length1: double (nullable = true)\n", - " |-- Length2: double (nullable = true)\n", - " |-- Length3: double (nullable = true)\n", - " |-- Height: double (nullable = true)\n", - " |-- Width: double (nullable = true)\n", - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "txt_file.printSchema()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "9tPb1wJqUdmk", - "outputId": "0b2869b6-05dc-4c2c-d063-8905e8f3bf17" - }, - "execution_count": 47, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "root\n", - " |-- value: string (nullable = true)\n", - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "json_file.printSchema()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7WKa1jWrUfMw", - "outputId": "fffafedb-8551-4d4e-af6b-7a8dd4ae5de2" - }, - "execution_count": 48, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "root\n", - " |-- employees: array (nullable = true)\n", - " | |-- element: struct (containsNull = true)\n", - " | | |-- firstName: string (nullable = true)\n", - " | | |-- lastName: string (nullable = true)\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# PySpark DataFrame to Pandas DataFrame" - ], - "metadata": { - "id": "TTYZQ1BuUm91" - } - }, - { - "cell_type": "code", - "source": [ - "df = csv_file.toPandas()" - ], - "metadata": { - "id": "-YkSgpyyUpLK" - }, - "execution_count": 49, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "type(df)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "_oaFwM6tUrI4", - "outputId": "823a8a51-c5e0-49a5-d290-e6e60811fcab" - }, - "execution_count": 50, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "pandas.core.frame.DataFrame" - ] - }, - "metadata": {}, - "execution_count": 50 - } - ] - } - ] -} \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 4a92e9d..0000000 --- a/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Pos_Tech_DTAT -Repositório para todas as Fases contemplando os códigos utilizados nas disciplinas da Pós Tech em Data Analytics diff --git a/Transformations_and_Actions_PySpark.ipynb b/Transformations_and_Actions_PySpark.ipynb deleted file mode 100644 index ebb8a2e..0000000 --- a/Transformations_and_Actions_PySpark.ipynb +++ /dev/null @@ -1,701 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Transformations PySpark" - ], - "metadata": { - "id": "DtPi3zvhItBb" - } - }, - { - "cell_type": "code", - "source": [ - "!pip install pyspark" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "gZ-JqwDpIzmN", - "outputId": "ff8dac73-54f7-4cac-be41-be333e7daff4" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting pyspark\n", - " Downloading pyspark-3.4.1.tar.gz (310.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.8/310.8 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: py4j==0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pyspark) (0.10.9.7)\n", - "Building wheels for collected packages: pyspark\n", - " Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285398 sha256=d66abc130d9c7d43c9c5033160ad176e04f57309bfe070942d8e05295de3a3af\n", - " Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834\n", - "Successfully built pyspark\n", - "Installing collected packages: pyspark\n", - "Successfully installed pyspark-3.4.1\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "!pip install findspark" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "f3Md3fv3I_XG", - "outputId": "93350244-ee93-405b-f2a9-88d3a103d553" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting findspark\n", - " Downloading findspark-2.0.1-py2.py3-none-any.whl (4.4 kB)\n", - "Installing collected packages: findspark\n", - "Successfully installed findspark-2.0.1\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "from pyspark import SparkContext\n", - "from pyspark.sql import SparkSession" - ], - "metadata": { - "id": "Qy0Z0nZHI8Wp" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "sc = SparkContext.getOrCreate()" - ], - "metadata": { - "id": "5eQ7NvRzJ0uQ" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "spark = SparkSession.builder.appName('PySpark DataFrame From RDD').getOrCreate()" - ], - "metadata": { - "id": "UZJ896PgJ2K1" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## map()" - ], - "metadata": { - "id": "5pUVpU6yIwwQ" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "kYKiwxjzImXY", - "outputId": "59ee24eb-61f1-4978-e805-b9d72e8e0dd2" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[2, 4, 6, 8, 10]\n" - ] - } - ], - "source": [ - "data= [1, 2, 3, 4, 5]\n", - "myRDD= sc.parallelize(data)\n", - "#Returns a new RDD by multiplying all elements of parent RDD by 2\n", - "newRDD= myRDD.map(lambda x: x*2)\n", - "print(newRDD.collect())" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## flatMap()" - ], - "metadata": { - "id": "8iz2mvvkKDMn" - } - }, - { - "cell_type": "code", - "source": [ - "data= [1, 2, 3]\n", - "myRDD= sc.parallelize(data)\n", - "#map() returns [[1], [1, 2], [1, 2, 3]]\n", - "mapRDD= myRDD.map(lambda x: range(1,x))\n", - "#flatmap() returns [1, 1, 2, 1, 2, 3]\n", - "flatMapRDD = myRDD.flatMap(lambda x: range(1,x))" - ], - "metadata": { - "id": "0EQYOAEjKCka" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## filter()" - ], - "metadata": { - "id": "EGkIstSWKHV_" - } - }, - { - "cell_type": "code", - "source": [ - "data= [1, 2, 3, 4, 5, 6]\n", - "myRDD= sc.parallelize(data)\n", - "#returns an RDD with only the elements that are divisible by 2\n", - "newRDD= myRDD.filter(lambda x: x%2 == 0)\n", - "print(newRDD.collect())" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "nsJ_5vZ4KIxm", - "outputId": "7bbec148-e2b6-4730-ea3e-525f5e677301" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[2, 4, 6]\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## distinct()" - ], - "metadata": { - "id": "k66NCOyXKMGH" - } - }, - { - "cell_type": "code", - "source": [ - "data= [1, 2, 2, 3, 3, 3]\n", - "myRDD= sc.parallelize(data)\n", - "newRDD= myRDD.distinct()\n", - "print(newRDD.collect())" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "segwXsP4KNjO", - "outputId": "30491411-0585-47ab-d59d-758ba283d0a1" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[2, 1, 3]\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## groupByKey()" - ], - "metadata": { - "id": "c4SOApzYKPC5" - } - }, - { - "cell_type": "code", - "source": [ - "myRDD = sc.parallelize([('a', 1), ('a', 2), ('a', 3), ('b', 1)])\n", - "#print result as list\n", - "resultList= myRDD.groupByKey().mapValues(list)\n", - "resultList.collect()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "N3-m37wMKQW1", - "outputId": "c4a2d202-08e2-4d59-8668-f555208d7a8b" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[('b', [1]), ('a', [1, 2, 3])]" - ] - }, - "metadata": {}, - "execution_count": 16 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## reduceByKey()" - ], - "metadata": { - "id": "-wwgB4nSKYVq" - } - }, - { - "cell_type": "code", - "source": [ - "from operator import add\n", - "myRDD = sc.parallelize([('a', 1), ('a', 2), ('a', 3), ('b', 1)])\n", - "#adds the values by keys\n", - "newRDD= myRDD.reduceByKey(add)\n", - "newRDD.collect()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "nyGxP_ufKZ6y", - "outputId": "e9dd5d08-1db5-4607-dfb0-1c9b88464c35" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[('b', 1), ('a', 6)]" - ] - }, - "metadata": {}, - "execution_count": 18 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## sortByKey()" - ], - "metadata": { - "id": "4pJjGTUrKkAi" - } - }, - { - "cell_type": "code", - "source": [ - "myRDD = sc.parallelize([('c', 1), ('d', 2), ('a', 3), ('b', 4)])\n", - "#sort by key\n", - "newRDD= myRDD.sortByKey()\n", - "newRDD.collect()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "nya6PABeKldr", - "outputId": "7e25d77a-786e-4d38-924b-3e159612c0a3" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[('a', 3), ('b', 4), ('c', 1), ('d', 2)]" - ] - }, - "metadata": {}, - "execution_count": 20 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## union()" - ], - "metadata": { - "id": "qQiAF9EpKq3n" - } - }, - { - "cell_type": "code", - "source": [ - "myRDD1 = sc.parallelize([1, 2, 3, 4])\n", - "myRDD2 = sc.parallelize([ 3, 4, 5, 6, 7])\n", - "#union of myRDD1 and myRDD2\n", - "newRDD = myRDD1.union(myRDD2)\n", - "newRDD.collect()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "LxfM314VKsnA", - "outputId": "ea8067a2-07f3-47ed-9c74-8ed96b99d51f" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[1, 2, 3, 4, 3, 4, 5, 6, 7]" - ] - }, - "metadata": {}, - "execution_count": 21 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Actions PySpark" - ], - "metadata": { - "id": "R5eqe2hPKugZ" - } - }, - { - "cell_type": "markdown", - "source": [ - "## count()" - ], - "metadata": { - "id": "MZ_WZU1EKzVL" - } - }, - { - "cell_type": "code", - "source": [ - "data= ['Scala', 'Python', 'Java', 'R']\n", - "myRDD= sc.parallelize(data)\n", - "#Returns 4 as output\n", - "myRDD.count()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "PIXVbiyAKySe", - "outputId": "c3ee0b44-b3cb-4712-84db-ab7835531916" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "4" - ] - }, - "metadata": {}, - "execution_count": 22 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## reduce()" - ], - "metadata": { - "id": "Hv1Ha7gEK4x3" - } - }, - { - "cell_type": "code", - "source": [ - "data= [1, 2, 3, 4, 5]\n", - "myRDD= sc.parallelize(data)\n", - "#returns the product of all the elements\n", - "myRDD.reduce(lambda x, y: x * y)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "01tur36zK6am", - "outputId": "6eda15d5-ea2e-4dde-de45-c47c2a5ce2f4" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "120" - ] - }, - "metadata": {}, - "execution_count": 23 - } - ] - }, - { - "cell_type": "code", - "source": [ - "data= ['Scala', 'Python', 'Java', 'R']\n", - "myRDD= sc.parallelize(data)\n", - "#Concatenate the string elements\n", - "myRDD.reduce( lambda x, y: x + y)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 36 - }, - "id": "O7jCKpFYK9XU", - "outputId": "308babc8-4054-49d4-b004-579fc6f5c582" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "'ScalaPythonJavaR'" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - } - }, - "metadata": {}, - "execution_count": 24 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## foreach()" - ], - "metadata": { - "id": "QpULVIk4LBr6" - } - }, - { - "cell_type": "code", - "source": [ - "def fun(x):\n", - " print(x)\n", - "data= ['Scala', 'Python', 'Java', 'R']\n", - "myRDD= sc.parallelize(data)\n", - "#function applied to all the elements\n", - "myRDD.foreach(fun)" - ], - "metadata": { - "id": "Gm7iAf0YLDnP" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## countByValue()" - ], - "metadata": { - "id": "Yw6gTUqBLHF0" - } - }, - { - "cell_type": "code", - "source": [ - "data= ['Python', 'Scala', 'Python', 'R', 'Python', 'Java', 'R']\n", - "myRDD= sc.parallelize(data)\n", - "#items() returns a list with all the dictionary keys and values returned by countByValue()\n", - "myRDD.countByValue().items()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "u1ySKgxJLIRW", - "outputId": "895bc41f-2500-4161-a9db-2e87609a833b" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "dict_items([('Python', 3), ('Scala', 1), ('R', 2), ('Java', 1)])" - ] - }, - "metadata": {}, - "execution_count": 27 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## countByKey()" - ], - "metadata": { - "id": "9FhAg63XLcqr" - } - }, - { - "cell_type": "code", - "source": [ - "data= [('a', 1), ('b', 1), ('c', 1), ('a', 1)]\n", - "myRDD = sc.parallelize(data)\n", - "myRDD.countByKey().items()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "DrLHUTGaLfSX", - "outputId": "1ed0f052-5c7e-4f1b-8713-ddac4290bb99" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "dict_items([('a', 2), ('b', 1), ('c', 1)])" - ] - }, - "metadata": {}, - "execution_count": 28 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## take(n)" - ], - "metadata": { - "id": "2PI_T0PnLlVL" - } - }, - { - "cell_type": "code", - "source": [ - "data= [2, 5, 3, 8, 4]\n", - "myRDD= sc.parallelize(data)\n", - "#return the first 2 elements\n", - "myRDD.take(3)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "z2jLc1B5LmzG", - "outputId": "aff87ab8-8e86-44c0-ead1-ac936b11e43f" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[2, 5, 3]" - ] - }, - "metadata": {}, - "execution_count": 29 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## top(n)" - ], - "metadata": { - "id": "9ZHLsm5YLqCK" - } - }, - { - "cell_type": "code", - "source": [ - "data= [2, 5, 3, 8, 4]\n", - "myRDD= sc.parallelize(data)\n", - "#return the first 2 elements\n", - "myRDD.take(3)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "TNZCb9sOLrLG", - "outputId": "0a1b929d-637d-40a9-a6ac-b62696e3cf91" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[2, 5, 3]" - ] - }, - "metadata": {}, - "execution_count": 30 - } - ] - } - ] -} \ No newline at end of file diff --git a/cereal.csv b/cereal.csv deleted file mode 100644 index 43dc0a3..0000000 --- a/cereal.csv +++ /dev/null @@ -1,78 +0,0 @@ -name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating -100% Bran,N,C,70,4,1,130,10,5,6,280,25,3,1,0.33,68.402973 -100% Natural Bran,Q,C,120,3,5,15,2,8,8,135,0,3,1,1,33.983679 -All-Bran,K,C,70,4,1,260,9,7,5,320,25,3,1,0.33,59.425505 -All-Bran with Extra Fiber,K,C,50,4,0,140,14,8,0,330,25,3,1,0.5,93.704912 -Almond Delight,R,C,110,2,2,200,1,14,8,-1,25,3,1,0.75,34.384843 -Apple Cinnamon Cheerios,G,C,110,2,2,180,1.5,10.5,10,70,25,1,1,0.75,29.509541 -Apple Jacks,K,C,110,2,0,125,1,11,14,30,25,2,1,1,33.174094 -Basic 4,G,C,130,3,2,210,2,18,8,100,25,3,1.33,0.75,37.038562 -Bran Chex,R,C,90,2,1,200,4,15,6,125,25,1,1,0.67,49.120253 -Bran Flakes,P,C,90,3,0,210,5,13,5,190,25,3,1,0.67,53.313813 -Cap'n'Crunch,Q,C,120,1,2,220,0,12,12,35,25,2,1,0.75,18.042851 -Cheerios,G,C,110,6,2,290,2,17,1,105,25,1,1,1.25,50.764999 -Cinnamon Toast Crunch,G,C,120,1,3,210,0,13,9,45,25,2,1,0.75,19.823573 -Clusters,G,C,110,3,2,140,2,13,7,105,25,3,1,0.5,40.400208 -Cocoa Puffs,G,C,110,1,1,180,0,12,13,55,25,2,1,1,22.736446 -Corn Chex,R,C,110,2,0,280,0,22,3,25,25,1,1,1,41.445019 -Corn Flakes,K,C,100,2,0,290,1,21,2,35,25,1,1,1,45.863324 -Corn Pops,K,C,110,1,0,90,1,13,12,20,25,2,1,1,35.782791 -Count Chocula,G,C,110,1,1,180,0,12,13,65,25,2,1,1,22.396513 -Cracklin' Oat Bran,K,C,110,3,3,140,4,10,7,160,25,3,1,0.5,40.448772 -Cream of Wheat (Quick),N,H,100,3,0,80,1,21,0,-1,0,2,1,1,64.533816 -Crispix,K,C,110,2,0,220,1,21,3,30,25,3,1,1,46.895644 -Crispy Wheat & Raisins,G,C,100,2,1,140,2,11,10,120,25,3,1,0.75,36.176196 -Double Chex,R,C,100,2,0,190,1,18,5,80,25,3,1,0.75,44.330856 -Froot Loops,K,C,110,2,1,125,1,11,13,30,25,2,1,1,32.207582 -Frosted Flakes,K,C,110,1,0,200,1,14,11,25,25,1,1,0.75,31.435973 -Frosted Mini-Wheats,K,C,100,3,0,0,3,14,7,100,25,2,1,0.8,58.345141 -Fruit & Fibre Dates; Walnuts; and Oats,P,C,120,3,2,160,5,12,10,200,25,3,1.25,0.67,40.917047 -Fruitful Bran,K,C,120,3,0,240,5,14,12,190,25,3,1.33,0.67,41.015492 -Fruity Pebbles,P,C,110,1,1,135,0,13,12,25,25,2,1,0.75,28.025765 -Golden Crisp,P,C,100,2,0,45,0,11,15,40,25,1,1,0.88,35.252444 -Golden Grahams,G,C,110,1,1,280,0,15,9,45,25,2,1,0.75,23.804043 -Grape Nuts Flakes,P,C,100,3,1,140,3,15,5,85,25,3,1,0.88,52.076897 -Grape-Nuts,P,C,110,3,0,170,3,17,3,90,25,3,1,0.25,53.371007 -Great Grains Pecan,P,C,120,3,3,75,3,13,4,100,25,3,1,0.33,45.811716 -Honey Graham Ohs,Q,C,120,1,2,220,1,12,11,45,25,2,1,1,21.871292 -Honey Nut Cheerios,G,C,110,3,1,250,1.5,11.5,10,90,25,1,1,0.75,31.072217 -Honey-comb,P,C,110,1,0,180,0,14,11,35,25,1,1,1.33,28.742414 -Just Right Crunchy Nuggets,K,C,110,2,1,170,1,17,6,60,100,3,1,1,36.523683 -Just Right Fruit & Nut,K,C,140,3,1,170,2,20,9,95,100,3,1.3,0.75,36.471512 -Kix,G,C,110,2,1,260,0,21,3,40,25,2,1,1.5,39.241114 -Life,Q,C,100,4,2,150,2,12,6,95,25,2,1,0.67,45.328074 -Lucky Charms,G,C,110,2,1,180,0,12,12,55,25,2,1,1,26.734515 -Maypo,A,H,100,4,1,0,0,16,3,95,25,2,1,1,54.850917 -Muesli Raisins; Dates; & Almonds,R,C,150,4,3,95,3,16,11,170,25,3,1,1,37.136863 -Muesli Raisins; Peaches; & Pecans,R,C,150,4,3,150,3,16,11,170,25,3,1,1,34.139765 -Mueslix Crispy Blend,K,C,160,3,2,150,3,17,13,160,25,3,1.5,0.67,30.313351 -Multi-Grain Cheerios,G,C,100,2,1,220,2,15,6,90,25,1,1,1,40.105965 -Nut&Honey Crunch,K,C,120,2,1,190,0,15,9,40,25,2,1,0.67,29.924285 -Nutri-Grain Almond-Raisin,K,C,140,3,2,220,3,21,7,130,25,3,1.33,0.67,40.692320 -Nutri-grain Wheat,K,C,90,3,0,170,3,18,2,90,25,3,1,1,59.642837 -Oatmeal Raisin Crisp,G,C,130,3,2,170,1.5,13.5,10,120,25,3,1.25,0.5,30.450843 -Post Nat. Raisin Bran,P,C,120,3,1,200,6,11,14,260,25,3,1.33,0.67,37.840594 -Product 19,K,C,100,3,0,320,1,20,3,45,100,3,1,1,41.503540 -Puffed Rice,Q,C,50,1,0,0,0,13,0,15,0,3,0.5,1,60.756112 -Puffed Wheat,Q,C,50,2,0,0,1,10,0,50,0,3,0.5,1,63.005645 -Quaker Oat Squares,Q,C,100,4,1,135,2,14,6,110,25,3,1,0.5,49.511874 -Quaker Oatmeal,Q,H,100,5,2,0,2.7,-1,-1,110,0,1,1,0.67,50.828392 -Raisin Bran,K,C,120,3,1,210,5,14,12,240,25,2,1.33,0.75,39.259197 -Raisin Nut Bran,G,C,100,3,2,140,2.5,10.5,8,140,25,3,1,0.5,39.703400 -Raisin Squares,K,C,90,2,0,0,2,15,6,110,25,3,1,0.5,55.333142 -Rice Chex,R,C,110,1,0,240,0,23,2,30,25,1,1,1.13,41.998933 -Rice Krispies,K,C,110,2,0,290,0,22,3,35,25,1,1,1,40.560159 -Shredded Wheat,N,C,80,2,0,0,3,16,0,95,0,1,0.83,1,68.235885 -Shredded Wheat 'n'Bran,N,C,90,3,0,0,4,19,0,140,0,1,1,0.67,74.472949 -Shredded Wheat spoon size,N,C,90,3,0,0,3,20,0,120,0,1,1,0.67,72.801787 -Smacks,K,C,110,2,1,70,1,9,15,40,25,2,1,0.75,31.230054 -Special K,K,C,110,6,0,230,1,16,3,55,25,1,1,1,53.131324 -Strawberry Fruit Wheats,N,C,90,2,0,15,3,15,5,90,25,2,1,1,59.363993 -Total Corn Flakes,G,C,110,2,1,200,0,21,3,35,100,3,1,1,38.839746 -Total Raisin Bran,G,C,140,3,1,190,4,15,14,230,100,3,1.5,1,28.592785 -Total Whole Grain,G,C,100,3,1,200,3,16,3,110,100,3,1,1,46.658844 -Triples,G,C,110,2,1,250,0,21,3,60,25,3,1,0.75,39.106174 -Trix,G,C,110,1,1,140,0,13,12,25,25,2,1,1,27.753301 -Wheat Chex,R,C,100,3,1,230,3,17,3,115,25,1,1,0.67,49.787445 -Wheaties,G,C,100,3,1,200,3,17,3,110,25,1,1,1,51.592193 -Wheaties Honey Gold,G,C,110,2,1,200,1,16,8,60,25,1,1,0.75,36.187559