From ac4c390784cec5de7b180acb52105d41aa7d7d3e Mon Sep 17 00:00:00 2001 From: Saifinbox Date: Wed, 22 Mar 2017 16:49:13 +0500 Subject: [PATCH] Add files via upload --- saif_khi_python_assignment1.ipynb | 110 ++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 saif_khi_python_assignment1.ipynb diff --git a/saif_khi_python_assignment1.ipynb b/saif_khi_python_assignment1.ipynb new file mode 100644 index 0000000..5626104 --- /dev/null +++ b/saif_khi_python_assignment1.ipynb @@ -0,0 +1,110 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "C:\\Windows\\system32\n", + "Index(['age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr', 'bu',\n", + " 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wbcc', 'rbcc', 'htn', 'dm', 'cad',\n", + " 'appet', 'pe', 'ane', 'class'],\n", + " dtype='object')\n", + "[nan 'yes' 'no' ' yes' '\\tno' '\\tyes']\n", + "['nan' 'yes' 'no']\n", + "60\n", + "47\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEhCAYAAABx6WukAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEfxJREFUeJzt3XuMpfVdx/H3x10KvcitTFdkobvVtWahVOpI0TZtFW2p\nYJdoJUtrs1rMxgRbbJpU8BK0CQnGa71gsgrtahHEWgveate1N7WlLhS5LZRNgbK4y47WYltbKPD1\nj/NsHJbZnZlz5uwz85v3K5mc8/ye55nzSU7yOc95znNJVSFJatc39R1AkjReFr0kNc6il6TGWfSS\n1DiLXpIaZ9FLUuMseklqnEUvSY2z6CWpcSv7DgBwwgkn1Jo1a/qOIUlLyi233PKfVTUx23KLoujX\nrFnDjh07+o4hSUtKkgfnspy7biSpcRa9JDXOopekxln0ktQ4i16SGmfRS1LjLHpJapxFL0mNWxQn\nTB1uay79274jjNUDV57bdwRJi8isW/RJrkmyL8mdM8x7Z5JKcsK0scuS7Epyb5LXLXRgSdL8zGXX\nzfuAcw4cTHIy8FrgC9PG1gMbgVO7da5KsmJBkkqShjJr0VfVJ4AvzjDrt4F3ATVtbANwfVU9VlX3\nA7uAMxciqCRpOEP9GJtkA/BwVf37AbNOAh6aNr27G5Mk9WTeP8YmeQ7wCwx22wwtyWZgM8App5wy\nyr+SJB3CMFv03wasBf49yQPAauDWJN8CPAycPG3Z1d3YM1TVlqqarKrJiYlZL6csSRrSvIu+qu6o\nqhdU1ZqqWsNg98zLqmovcBOwMcmRSdYC64DPLGhiSdK8zOXwyuuATwEvTrI7yUUHW7aq7gJuAO4G\nPgxcXFVPLlRYSdL8zbqPvqounGX+mgOmrwCuGC2WJGmheAkESWqcRS9JjbPoJalxFr0kNc6il6TG\nWfSS1DiLXpIaZ9FLUuMseklqnEUvSY2z6CWpcRa9JDXOopekxln0ktQ4i16SGmfRS1LjLHpJapxF\nL0mNs+glqXEWvSQ1btaiT3JNkn1J7pw29utJ7klye5K/SnLstHmXJdmV5N4krxtXcEnS3Mxli/59\nwDkHjG0DTquq04HPAZcBJFkPbARO7da5KsmKBUsrSZq3WYu+qj4BfPGAsY9U1RPd5KeB1d3zDcD1\nVfVYVd0P7ALOXMC8kqR5Woh99G8F/r57fhLw0LR5u7sxSVJPRir6JL8IPAFcO8S6m5PsSLJjampq\nlBiSpEMYuuiT/CRwHvDmqqpu+GHg5GmLre7GnqGqtlTVZFVNTkxMDBtDkjSLoYo+yTnAu4A3VNX/\nTpt1E7AxyZFJ1gLrgM+MHlOSNKyVsy2Q5DrgNcAJSXYDlzM4yuZIYFsSgE9X1c9U1V1JbgDuZrBL\n5+KqenJc4SVJs5u16KvqwhmGrz7E8lcAV4wSSpK0cDwzVpIaZ9FLUuMseklqnEUvSY2z6CWpcRa9\nJDXOopekxln0ktQ4i16SGmfRS1LjLHpJapxFL0mNs+glqXEWvSQ1zqKXpMZZ9JLUOItekhpn0UtS\n4yx6SWqcRS9JjZu16JNck2RfkjunjR2fZFuS+7rH46bNuyzJriT3JnnduIJLkuZmLlv07wPOOWDs\nUmB7Va0DtnfTJFkPbARO7da5KsmKBUsrSZq3WYu+qj4BfPGA4Q3A1u75VuD8aePXV9VjVXU/sAs4\nc4GySpKGMOw++lVVtad7vhdY1T0/CXho2nK7u7FnSLI5yY4kO6ampoaMIUmazcg/xlZVATXEeluq\narKqJicmJkaNIUk6iGGL/pEkJwJ0j/u68YeBk6ctt7obkyT1ZNiivwnY1D3fBNw4bXxjkiOTrAXW\nAZ8ZLaIkaRQrZ1sgyXXAa4ATkuwGLgeuBG5IchHwIHABQFXdleQG4G7gCeDiqnpyTNklSXMwa9FX\n1YUHmXX2QZa/ArhilFCSpIXjmbGS1DiLXpIaZ9FLUuMseklqnEUvSY2z6CWpcRa9JDXOopekxln0\nktQ4i16SGmfRS1LjLHpJapxFL0mNs+glqXEWvSQ1zqKXpMZZ9JLUOItekhpn0UtS4yx6SWrcSEWf\n5B1J7kpyZ5LrkhyV5Pgk25Lc1z0et1BhJUnzN3TRJzkJeDswWVWnASuAjcClwPaqWgds76YlST0Z\nddfNSuDZSVYCzwH+A9gAbO3mbwXOH/E1JEkjGLroq+ph4DeALwB7gEer6iPAqqra0y22F1g10/pJ\nNifZkWTH1NTUsDEkSbMYZdfNcQy23tcC3wo8N8lPTF+mqgqomdavqi1VNVlVkxMTE8PGkCTNYpRd\nNz8I3F9VU1X1DeCDwPcBjyQ5EaB73Dd6TEnSsEYp+i8AZyV5TpIAZwM7gZuATd0ym4AbR4soSRrF\nymFXrKqbk3wAuBV4AvgssAV4HnBDkouAB4ELFiKoJGk4Qxc9QFVdDlx+wPBjDLbuJUmLgGfGSlLj\nLHpJapxFL0mNs+glqXEWvSQ1zqKXpMZZ9JLUOItekhpn0UtS4yx6SWqcRS9JjbPoJalxFr0kNc6i\nl6TGWfSS1DiLXpIaZ9FLUuMseklqnEUvSY2z6CWpcSMVfZJjk3wgyT1Jdib53iTHJ9mW5L7u8biF\nCitJmr9Rt+jfA3y4qr4TeCmwE7gU2F5V64Dt3bQkqSdDF32SY4BXAVcDVNXjVfUlYAOwtVtsK3D+\nqCElScNbOcK6a4Ep4L1JXgrcAlwCrKqqPd0ye4FVM62cZDOwGeCUU04ZIYaWmzWX/m3fEcbqgSvP\n7TuCGjPKrpuVwMuAP6yqM4CvcsBumqoqoGZauaq2VNVkVU1OTEyMEEOSdCijFP1uYHdV3dxNf4BB\n8T+S5ESA7nHfaBElSaMYuuirai/wUJIXd0NnA3cDNwGburFNwI0jJZQkjWSUffQAbwOuTfIs4PPA\nTzH48LghyUXAg8AFI76GJGkEIxV9Vd0GTM4w6+xR/q8kaeF4ZqwkNc6il6TGWfSS1DiLXpIaZ9FL\nUuMseklqnEUvSY2z6CWpcRa9JDXOopekxln0ktQ4i16SGmfRS1LjRr1MsSTNmbeB7Idb9JLUOIte\nkhpn0UtS4yx6SWqcRS9JjbPoJalxIxd9khVJPpvkb7rp45NsS3Jf93jc6DElScNaiC36S4Cd06Yv\nBbZX1TpgezctSerJSEWfZDVwLvDH04Y3AFu751uB80d5DUnSaEbdov8d4F3AU9PGVlXVnu75XmDV\nTCsm2ZxkR5IdU1NTI8aQJB3M0EWf5DxgX1XdcrBlqqqAOsi8LVU1WVWTExMTw8aQJM1ilGvdvAJ4\nQ5IfBo4Cjk7yfuCRJCdW1Z4kJwL7FiKoJGk4Q2/RV9VlVbW6qtYAG4F/qqqfAG4CNnWLbQJuHDml\nJGlo4ziO/krgh5LcB/xgNy1J6smCXKa4qj4GfKx7/l/A2QvxfyVJo/PMWElqnEUvSY2z6CWpcRa9\nJDXOopekxln0ktQ4i16SGmfRS1LjLHpJapxFL0mNs+glqXEWvSQ1zqKXpMZZ9JLUOItekhpn0UtS\n4yx6SWqcRS9JjbPoJalxFr0kNW7ook9ycpKPJrk7yV1JLunGj0+yLcl93eNxCxdXkjRfo2zRPwG8\ns6rWA2cBFydZD1wKbK+qdcD2blqS1JOhi76q9lTVrd3zLwM7gZOADcDWbrGtwPmjhpQkDW9B9tEn\nWQOcAdwMrKqqPd2svcCqg6yzOcmOJDumpqYWIoYkaQYjF32S5wF/CfxcVf3P9HlVVUDNtF5Vbamq\nyaqanJiYGDWGJOkgRir6JEcwKPlrq+qD3fAjSU7s5p8I7BstoiRpFKMcdRPgamBnVf3WtFk3AZu6\n55uAG4ePJ0ka1coR1n0F8BbgjiS3dWO/AFwJ3JDkIuBB4ILRIkqSRjF00VfVPwM5yOyzh/2/kqSF\n5ZmxktQ4i16SGmfRS1LjLHpJapxFL0mNs+glqXEWvSQ1zqKXpMZZ9JLUOItekhpn0UtS4yx6SWqc\nRS9JjbPoJalxFr0kNc6il6TGWfSS1DiLXpIaZ9FLUuMseklq3NiKPsk5Se5NsivJpeN6HUnSoY2l\n6JOsAP4AeD2wHrgwyfpxvJYk6dDGtUV/JrCrqj5fVY8D1wMbxvRakqRDWDmm/3sS8NC06d3Ay6cv\nkGQzsLmb/EqSe8eUZTE4AfjPw/Vi+bXD9UrLhu/f0tX6e/fCuSw0rqKfVVVtAbb09fqHU5IdVTXZ\ndw4Nx/dv6fK9GxjXrpuHgZOnTa/uxiRJh9m4iv7fgHVJ1iZ5FrARuGlMryVJOoSx7LqpqieS/Czw\nD8AK4Jqqumscr7VELItdVA3z/Vu6fO+AVFXfGSRJY+SZsZLUOItekhpn0UtS4yx6SWpcbydMSYtR\nkpcdan5V3Xq4skgLxaNuxiTJjwK/BrwASPdXVXV0r8F0SEk+eojZVVU/cNjCaF6S/B5w0EKrqrcf\nxjiLikU/Jkl2AT9SVTv7ziItB0k2HWp+VW09XFkWG4t+TJL8S1W9ou8cGl6S0xhcZvuo/WNV9Sf9\nJZKGY9GPSZL3AN8CfAh4bP94VX2wt1CasySXA69hUPR/x+DeCv9cVW/sM5dml2QC+Hme+SG9bHe7\nedTN+BwN/C/wWuBHur/zek2k+XgjcDawt6p+CngpcEy/kTRH1wI7gbXArwIPMLj+1rLlUTdj0pWD\nlq6vVdVTSZ5IcjSwj6dfkVWL1/Or6uokl1TVx4GPJ7HotfCSHAVcBJzK078+vrW3UJqPHUmOBf4I\nuAX4CvCpfiNpjr7RPe5Jci7wH8DxPebpnfvoxyTJXwD3AG8C3g28GdhZVZf0GkzzlmQNcHRV3d5z\nFM1BkvOATzL4BvZ7DHaj/mpVLdtLpVv0Y5Lks1V1RpLbq+r0JEcAn6yqs/rOprlJcjqwhmnffP0x\nXUuRu27GZ//Xxy91h+ntZXDylJaAJNcApwN3AU91wwVY9ItckrXA23jmh/Qb+srUN4t+fLYkOQ74\nJQZ313oe8Mv9RtI8nFVV6/sOoaF8CLga+Gv+/0N6WbPox+dPgR9jsFWx/4y8Vb2l0Xx9Ksn6qrq7\n7yCat69X1e/2HWIxcR/9mCT5MPAogyM2ntw/XlW/2VsozVmSVzP4JraXwQlv+69VdHqvwTSrJG8C\n1gEf4eknKy7bC9K5RT8+q6vqnL5DaGhXA28B7sCv/0vNSxi8dz/A039fWbZnxlr04/OvSV5SVXf0\nHURDmVrOh+MtcT8OvKiqHu87yGJh0Y/PK4GfTHI/fvVfij6b5M8Y/KDntYqWljuBYxmczSws+nF6\nfd8BNJJnMyj4104b8/DKpeFY4J7usgfTP6Q9vFILq6oe7DuDhpNkBXB7Vf1231k0lMv7DrDYeNSN\nNIMkn6mqM/vOofnpPqT/saq+v+8si4lb9NLM/iXJ7wN/Dnx1/+ByPkRvKaiqJ5M8leSYqnq07zyL\nhVv00gwOcu9Y7xm7BCS5ETgD2MbTP6S9Z6wkteBg9471nrGSnibJMQx+1HtVN/Rx4N3uDlgakjwL\n+I5u8t6q+sahlm+dtxKUZnYN8GXggu7vf4D39ppIc5LkNcB9wB8AVwGfS/KqQ67UOLfopRkkua2q\nvmu2MS0+SW4B3lRV93bT3wFcV1Xf3W+y/rhFL83sa0leuX8iySuAr/WYR3N3xP6SB6iqzwFH9Jin\nd27RSzNI8l0MLi99TDf038Ambye4+HU3jXkKeH839GZgxXK+X7NFL80gyZHAG4FvY3BK/aMMDq98\nd6/BNKvuvbuYwfWmYHD/2Kuq6rGDr9U2i16aQXc/gS8Bt+L9BLTEWfTSDJLcWVWn9Z1D89f9nvIr\nwAt5+j1jX9RXpr55CQRpZt5PYOm6GngHB9zdbTlzi16aQZK7gW8HvJ/AEpPk5qp6ed85FhOLXppB\nkhfONO7lpxe/JFcCKxjcO8B7xmLRS2qMF6R7JotekhrnmbGSmpLk+Ul+N8mtSW5J8p4kz+87V58s\nekmtuR6YAn6MwUlvUwxuILNsuetGUlNmOgciyR1V9ZK+MvXNLXpJrflIko1Jvqn7uwD4h75D9ckt\neklNSPJloBic8/Bc/v9kqRXAV6rq6L6y9c0zYyU1oaq+ef/zJMcD64Cj+ku0eFj0kpqS5KeBS4DV\nwG3AWcC/Amf3matP7qOX1JpLgO8BHqyq7wfOYHCZ6WXLopfUmq9X1ddhcG36qroHeHHPmXrlrhtJ\nrdmd5FjgQ8C2JP8NLOtrFHnUjaRmJXk1g9tBfriqHu87T18seklqnPvoJalxFr0kNc6il6TGWfSS\n1Lj/AwnUxn6BFjaHAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "180.0\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "\n", + "print(os.getcwd())\n", + "\n", + "def print_column_unique(df):\n", + " for x in range(0,len(df.columns)):\n", + " print(str(df.columns[x])+\" \" +str(df.iloc[:,x].unique()))\n", + "def clean(df):\n", + " func=lambda x: str(x).strip()\n", + " df=df.applymap(func)\n", + " df=df.replace(\"?\",float(\"nan\"))\n", + " return df\n", + "\n", + "df = pd.read_csv(\"C:/Users/muhammad.saif/Documents/Updated Dataset/Dataset/chronic_kidney_disease_updated.csv\",sep=',', na_values=['?'])\n", + "print(df.columns)\n", + "#print_column_unique(df)\n", + "\n", + "print(df.dm.unique())\n", + "df=clean(df)\n", + "print(df.dm.unique())\n", + "\n", + "df[['age', 'bp', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wbcc', 'rbcc']]=df[['age', 'bp', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wbcc', 'rbcc']].astype(float)\n", + "print(len(df[(df[\"class\"]==\"ckd\") & (df[\"rbc\"]==\"normal\")]))\n", + "print(len(df[(df[\"class\"]==\"ckd\") & (df[\"rbc\"]==\"abnormal\")]))\n", + "df[df[\"class\"]==\"ckd\"].rbc.value_counts().plot(kind=\"bar\")\n", + "plt.show()\n", + "\n", + "print(df.bp.max())\n", + "#print_column_unique(df)\n", + "#print(df.dm.unique())\n", + "df.to_csv(\"clean_chronic_kidney_disease.csv\",sep=\",\",index=False,encoding=\"utf-8\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}