diff --git a/tmpo.ipynb b/tmpo.ipynb index 8855169..1e79c58 100644 --- a/tmpo.ipynb +++ b/tmpo.ipynb @@ -3,20 +3,21 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "%matplotlib inline" + "import pandas as pd\n", + "import os\n", + "import tmpo\n", + "\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "token = \"b371402dc767cc83e41bc294b63f9586\"\n", @@ -27,17 +28,19 @@ "}" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Init session, add sensors and sync" + ] + }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "import os\n", - "import tmpo\n", - "\n", "s = tmpo.Session(path=os.getcwd())\n", "for room in house:\n", " for sensor in house[room]:\n", @@ -47,9 +50,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "s.debug = True" @@ -58,50 +59,78 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "s.sync()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get info about sensor" + ] + }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "start = pd.Timestamp('20160101')" + "sensor = house[room][\"electricity\"]\n", + "\n", + "type, data_type, subtype = s.get_types(sensor)\n", + "unit = s.get_unit(sensor)\n", + "\n", + "print(type, data_type, subtype, unit)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get raw data" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, + "outputs": [], + "source": [ + "s.debug = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "df = s.dataframe([house[room][\"electricity\"]], head=start)" + "start = pd.Timestamp('20160101')\n", + "end = pd.Timestamp('20160501')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = s.dataframe(sids=[house[room][\"electricity\"]], head=start, tail=end)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "scrolled": false }, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", - "\n", "room = \"cellar\"\n", - "plt.figure(figsize=(16,10))\n", + "plt.figure(figsize=(16,5))\n", "ax1=plt.subplot()\n", "ax1.grid()\n", "ax1.set_ylabel(u'Wh')\n", @@ -110,11 +139,147 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { "collapsed": true }, + "source": [ + "## Get resampled data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The difference between raw and resampled data is that raw data has data with an uneven time axis. This makes it difficult to calculate power values or flow rates." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = s.get_data(sids=[house[room][\"electricity\"]], head=start, tail=end, resolution='1h')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As resolution you can pick from these resolutions, and you can precede them with a number:\n", + "* B business day frequency\n", + "* C custom business day frequency (experimental)\n", + "* D calendar day frequency\n", + "* W weekly frequency\n", + "* M month end frequency\n", + "* BM business month end frequency\n", + "* MS month start frequency\n", + "* BMS business month start frequency\n", + "* Q quarter end frequency\n", + "* BQ business quarter endfrequency\n", + "* QS quarter start frequency\n", + "* BQS business quarter start frequency\n", + "* A year end frequency\n", + "* BA business year end frequency\n", + "* AS year start frequency\n", + "* BAS business year start frequency\n", + "* H hourly frequency\n", + "* T minutely frequency\n", + "* S secondly frequency\n", + "* L milliseconds\n", + "* U microseconds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "room = \"cellar\"\n", + "plt.figure(figsize=(16,5))\n", + "ax1=plt.subplot()\n", + "ax1.grid()\n", + "ax1.set_ylabel(u'Wh')\n", + "plt.title(room + \" electricity\")\n", + "df.plot(ax=ax1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now you can easily calculate power by taking the difference between values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "P = df.diff()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "room = \"cellar\"\n", + "plt.figure(figsize=(16,5))\n", + "ax1=plt.subplot()\n", + "ax1.grid()\n", + "ax1.set_ylabel(u'W')\n", + "plt.title(room + \" electricity\")\n", + "P.plot(ax=ax1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mind your dimensions! Since we had Wh values resampled to hourly basis, the `diff` gets us Wh/h, or W! If you choose another resolution you need to bring a factor into account." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export to CSV, Excel..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "P.to_csv('tmpo_example.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "P.to_excel('tmpo_example.xlsx')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pandas has many other export methods, see https://pandas.pydata.org/pandas-docs/stable/io.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [] } @@ -139,5 +304,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/tmpo/__init__.py b/tmpo/__init__.py index ab1e46c..4f278c3 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -1,5 +1,5 @@ __title__ = "tmpo" -__version__ = "0.2.10" +__version__ = "0.2.11" __build__ = 0x000100 __author__ = "Bart Van Der Meerssche" __license__ = "MIT" @@ -588,3 +588,121 @@ def _blocktail(self, lvl, bid): def _dprintf(self, fmt, *args): if self.debug: print(fmt % args) + + def get_types(self, sid): + """ + Get commodity and data type (counter or gauge), and subtype (optional) + + Parameters + ---------- + sid : str + + Returns + ------- + (str, str, str) + """ + last_block = self._last_block(sid=sid) + if last_block is None: + raise LookupError('Sensor {} has no data'.format(sid)) + config = last_block['h']['cfg'] + com = config['type'] + dtype = config['data_type'] + subtype = config.get('subtype') + return com, dtype, subtype + + def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None, diff=False): + """ + Get data for a sensor in a format ready for analysis + + Parameters + ---------- + sid : str + head : int | pd.Timestamp + tail : int | pd.Timestamp + resolution : str + tz : str + IANA time zone + diff : bool + default False + if the sensor is a counter, return the difference between values + + Returns + ------- + pd.Series + """ + try: + commodity, data_type, subtype = self.get_types(sid=sid) + except LookupError: # no data + return pd.Series() + + ts = self.series(sid=sid, head=head, tail=tail) + if ts.dropna().empty: + return ts + if tz is not None: + ts = ts.tz_convert(tz=tz) + + if data_type == 'gauge': + ts = ts.resample(rule=resolution).mean() + + elif data_type == 'counter': + newindex = ts.resample(rule=resolution).first().index + ts = ts.reindex(ts.index.union(newindex)) + ts = ts.interpolate(method='time') + ts = ts.reindex(newindex) + if diff: + ts = ts.diff() + + else: + raise NotImplementedError("I don't know the data type {}".format(data_type)) + + return ts.dropna() + + def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None, diff=False): + """ + Get data for a sensor in a format ready for analysis + + Parameters + ---------- + sid : [str] + head : int | pd.Timestamp + tail : int | pd.Timestamp + resolution : str + tz : str + IANA time zone + diff : bool + default False + if the sensor is a counter, return the difference between values + + Returns + ------- + pd.DataFrame + """ + series = (self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz, diff=diff) + for sid in sids) + series = (s for s in series if not s.empty) + try: + df = pd.concat(series, axis=1) + except ValueError: + df = pd.DataFrame() + return df + + def get_unit(self, sid): + """ + Get unit of a sensor + + Parameters + ---------- + sid : str + + Returns + ------- + str + """ + last_block = self._last_block(sid=sid) + if last_block is None: + return None + config = last_block['h']['cfg'] + unit = config.get('unit') + if unit == '': + return None + return unit