Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
334 changes: 334 additions & 0 deletions ZawarKhan_khi_MongoDB.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,334 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
" import pandas as pd\n",
"import numpy as np\n",
"import json\n",
"import csv\n",
"import ijson\n",
"import codecs"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"filename = \"rows.json\"\n",
"with open(filename, 'r') as f:\n",
" objects = ijson.items(f, 'meta.view.columns.item')\n",
" columns = list(objects)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{u'name': u'sid', u'format': {}, u'dataTypeName': u'meta_data', u'fieldName': u':sid', u'renderTypeName': u'meta_data', u'position': 0, u'id': -1, u'flags': [u'hidden']}\n"
]
}
],
"source": [
"print(columns[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[u':sid',\n",
" u':id',\n",
" u':position',\n",
" u':created_at',\n",
" u':created_meta',\n",
" u':updated_at',\n",
" u':updated_meta',\n",
" u':meta',\n",
" u'date_of_stop',\n",
" u'time_of_stop',\n",
" u'agency',\n",
" u'subagency',\n",
" u'description',\n",
" u'location',\n",
" u'latitude',\n",
" u'longitude',\n",
" u'accident',\n",
" u'belts',\n",
" u'personal_injury',\n",
" u'property_damage',\n",
" u'fatal',\n",
" u'commercial_license',\n",
" u'hazmat',\n",
" u'commercial_vehicle',\n",
" u'alcohol',\n",
" u'work_zone',\n",
" u'state',\n",
" u'vehicle_type',\n",
" u'year',\n",
" u'make',\n",
" u'model',\n",
" u'color',\n",
" u'violation_type',\n",
" u'charge',\n",
" u'article',\n",
" u'contributed_to_accident',\n",
" u'race',\n",
" u'gender',\n",
" u'driver_city',\n",
" u'driver_state',\n",
" u'dl_state',\n",
" u'arrest_type',\n",
" u'geolocation']"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"column_names = [col[\"fieldName\"] for col in columns]\n",
"column_names"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"good_columns = [\n",
" \"date_of_stop\", \n",
" \"time_of_stop\", \n",
" \"agency\", \n",
" \"subagency\",\n",
" \"description\",\n",
" \"location\", \n",
" \"latitude\", \n",
" \"longitude\", \n",
" \"vehicle_type\", \n",
" \"year\", \n",
" \"make\", \n",
" \"model\", \n",
" \"color\", \n",
" \"violation_type\",\n",
" \"race\", \n",
" \"gender\", \n",
" \"driver_state\", \n",
" \"driver_city\", \n",
" \"dl_state\",\n",
" \"arrest_type\"\n",
"]\n",
"data = []\n",
"with open(filename, 'r') as f:\n",
" objects = ijson.items(f, 'data.item')\n",
" for row in objects:\n",
" selected_row = []\n",
" for item in good_columns:\n",
" selected_row.append(row[column_names.index(item)])\n",
" data.append(selected_row)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df[\"date_of_stop\"]=pd.to_datetime(df[\"date_of_stop\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame(data, columns=good_columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.to_csv(\"rows.csv\",sep=\",\", encoding='UTF-8')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ds = pd.read_csv(\"rows.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pymongo import MongoClient\n",
"from datetime import datetime\n",
"client = MongoClient()\n",
"client = MongoClient('localhost', 27017)\n",
"db = client['Assignment']\n",
"posts = db.collections\n",
"posts.insert_many(df.to_dict('record'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
" client.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"client = MongoClient('localhost', 27017)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"db = client['Assignment']\n",
"collections = db.collections"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Q5. how many stops are made by car color\n",
"car_stops = collections.aggregate([{\"$group\" : {\"_id\":\"$color\", \"count\":{\"$sum\":1}}}])\n",
"for No in car_stops:\n",
" print No"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Q6 Find the value counts of arrest type\n",
"arrest_type =collections.aggregate([{\"$group\" : {\"_id\":\"$arrest_type\", \"count\":{\"$sum\":1}}}])\n",
"for No in arrest_type:\n",
" print No"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df[\"date_of_stop\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Q7 Find which days result in the most traffic stop\n",
"Days=collections.aggregate([{\"$project\":{\"No_of_daysRes\": { \"$dayOfWeek\": \"$date_of_stop\" },} },{\n",
" \"$group\": {\"_id\":\"$No_of_daysRes\", \"count\":{\"$sum\":1}}}])\n",
"\n",
"for doc in Days:\n",
" print doc"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11+"
}
},
"nbformat": 4,
"nbformat_minor": 2
}