diff --git a/ZawarKhan_khi_MongoDB.ipynb b/ZawarKhan_khi_MongoDB.ipynb new file mode 100644 index 0000000..b7c1eab --- /dev/null +++ b/ZawarKhan_khi_MongoDB.ipynb @@ -0,0 +1,334 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + " import pandas as pd\n", + "import numpy as np\n", + "import json\n", + "import csv\n", + "import ijson\n", + "import codecs" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "filename = \"rows.json\"\n", + "with open(filename, 'r') as f:\n", + " objects = ijson.items(f, 'meta.view.columns.item')\n", + " columns = list(objects)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{u'name': u'sid', u'format': {}, u'dataTypeName': u'meta_data', u'fieldName': u':sid', u'renderTypeName': u'meta_data', u'position': 0, u'id': -1, u'flags': [u'hidden']}\n" + ] + } + ], + "source": [ + "print(columns[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[u':sid',\n", + " u':id',\n", + " u':position',\n", + " u':created_at',\n", + " u':created_meta',\n", + " u':updated_at',\n", + " u':updated_meta',\n", + " u':meta',\n", + " u'date_of_stop',\n", + " u'time_of_stop',\n", + " u'agency',\n", + " u'subagency',\n", + " u'description',\n", + " u'location',\n", + " u'latitude',\n", + " u'longitude',\n", + " u'accident',\n", + " u'belts',\n", + " u'personal_injury',\n", + " u'property_damage',\n", + " u'fatal',\n", + " u'commercial_license',\n", + " u'hazmat',\n", + " u'commercial_vehicle',\n", + " u'alcohol',\n", + " u'work_zone',\n", + " u'state',\n", + " u'vehicle_type',\n", + " u'year',\n", + " u'make',\n", + " u'model',\n", + " u'color',\n", + " u'violation_type',\n", + " u'charge',\n", + " u'article',\n", + " u'contributed_to_accident',\n", + " u'race',\n", + " u'gender',\n", + " u'driver_city',\n", + " u'driver_state',\n", + " u'dl_state',\n", + " u'arrest_type',\n", + " u'geolocation']" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "column_names = [col[\"fieldName\"] for col in columns]\n", + "column_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "good_columns = [\n", + " \"date_of_stop\", \n", + " \"time_of_stop\", \n", + " \"agency\", \n", + " \"subagency\",\n", + " \"description\",\n", + " \"location\", \n", + " \"latitude\", \n", + " \"longitude\", \n", + " \"vehicle_type\", \n", + " \"year\", \n", + " \"make\", \n", + " \"model\", \n", + " \"color\", \n", + " \"violation_type\",\n", + " \"race\", \n", + " \"gender\", \n", + " \"driver_state\", \n", + " \"driver_city\", \n", + " \"dl_state\",\n", + " \"arrest_type\"\n", + "]\n", + "data = []\n", + "with open(filename, 'r') as f:\n", + " objects = ijson.items(f, 'data.item')\n", + " for row in objects:\n", + " selected_row = []\n", + " for item in good_columns:\n", + " selected_row.append(row[column_names.index(item)])\n", + " data.append(selected_row)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df[\"date_of_stop\"]=pd.to_datetime(df[\"date_of_stop\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df = pd.DataFrame(data, columns=good_columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df.to_csv(\"rows.csv\",sep=\",\", encoding='UTF-8')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "ds = pd.read_csv(\"rows.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from pymongo import MongoClient\n", + "from datetime import datetime\n", + "client = MongoClient()\n", + "client = MongoClient('localhost', 27017)\n", + "db = client['Assignment']\n", + "posts = db.collections\n", + "posts.insert_many(df.to_dict('record'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + " client.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "client = MongoClient('localhost', 27017)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "db = client['Assignment']\n", + "collections = db.collections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#Q5. how many stops are made by car color\n", + "car_stops = collections.aggregate([{\"$group\" : {\"_id\":\"$color\", \"count\":{\"$sum\":1}}}])\n", + "for No in car_stops:\n", + " print No" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Q6 Find the value counts of arrest type\n", + "arrest_type =collections.aggregate([{\"$group\" : {\"_id\":\"$arrest_type\", \"count\":{\"$sum\":1}}}])\n", + "for No in arrest_type:\n", + " print No" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df[\"date_of_stop\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Q7 Find which days result in the most traffic stop\n", + "Days=collections.aggregate([{\"$project\":{\"No_of_daysRes\": { \"$dayOfWeek\": \"$date_of_stop\" },} },{\n", + " \"$group\": {\"_id\":\"$No_of_daysRes\", \"count\":{\"$sum\":1}}}])\n", + "\n", + "for doc in Days:\n", + " print doc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11+" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}