From f22a51c8607d16ec877a846aefbd2f69fe9b7121 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Mon, 18 Dec 2017 21:07:05 +0100 Subject: [PATCH 01/24] get types --- tmpo/__init__.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index 9acec67..f1a61e9 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -565,3 +565,22 @@ def _blocktail(self, lvl, bid): def _dprintf(self, fmt, *args): if self.debug: print(fmt % args) + + def get_types(self, sid): + """ + Get commodity and data type (counter or gauge), and subtype (optional) + + Parameters + ---------- + sid : str + + Returns + ------- + (str, str, str) + """ + last_block = self._last_block(sid=sid) + config = last_block['h']['cfg'] + com = config['type'] + dtype = config['data_type'] + subtype = config.get('subtype') + return com, dtype, subtype \ No newline at end of file From 80d95cb64bbffc9fd5e0032d9f433581c5810bdf Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Mon, 18 Dec 2017 21:48:08 +0100 Subject: [PATCH 02/24] `get data` and `get sensor data` --- tmpo/__init__.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index f1a61e9..a9676ee 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -583,4 +583,65 @@ def get_types(self, sid): com = config['type'] dtype = config['data_type'] subtype = config.get('subtype') - return com, dtype, subtype \ No newline at end of file + return com, dtype, subtype + + def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): + """ + Get data for a sensor in a format ready for analysis + + Parameters + ---------- + sid : str + head : int | pd.Timestamp + tail : int | pd.Timestamp + resolution : str + tz : str + IANA time zone + + Returns + ------- + pd.Series + """ + commodity, data_type, subtype = self.get_types(sid=sid) + + ts = self.series(sid=sid, head=head, tail=tail) + if ts.dropna().empty: + return ts + if tz is not None: + ts = ts.tz_convert(tz=tz) + + if data_type == 'gauge': + ts = ts.resample(rule=resolution).mean() + + elif data_type == 'counter': + newindex = ts.resample(rule=resolution).first().index + ts = ts.reindex(ts.index.union(newindex)) + ts = ts.interpolate(method='time') + ts = ts.reindex(newindex) + + else: + raise NotImplementedError("I don't know the data type {}".format(data_type)) + + return ts.dropna() + + def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): + """ + Get data for a sensor in a format ready for analysis + + Parameters + ---------- + sid : [str] + head : int | pd.Timestamp + tail : int | pd.Timestamp + resolution : str + tz : str + IANA time zone + + Returns + ------- + pd.DataFrame + """ + series = [self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) + for sid in sids] + df = pd.concat(series, axis=1) + return df \ No newline at end of file From ea18ed1beccabaac6e42aa2d523a0a95c970a277 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Mon, 18 Dec 2017 21:50:06 +0100 Subject: [PATCH 03/24] `get data` and `get sensor data` --- tmpo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index a9676ee..ee338dd 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -1,5 +1,5 @@ __title__ = "tmpo" -__version__ = "0.2.8" +__version__ = "0.2.11" __build__ = 0x000100 __author__ = "Bart Van Der Meerssche" __license__ = "MIT" From 87906dff61db0bd1836be58d3c8ba144421ac654 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Mon, 18 Dec 2017 21:53:36 +0100 Subject: [PATCH 04/24] typo --- tmpo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index ee338dd..8681326 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -644,4 +644,4 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): series = [self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) for sid in sids] df = pd.concat(series, axis=1) - return df \ No newline at end of file + return df From 8a1e386496f34b4c8e4a351ad43488ccd9ea6fb3 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 21 Dec 2017 15:10:17 +0100 Subject: [PATCH 05/24] better demo --- tmpo.ipynb | 214 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 179 insertions(+), 35 deletions(-) diff --git a/tmpo.ipynb b/tmpo.ipynb index 8855169..db0624a 100644 --- a/tmpo.ipynb +++ b/tmpo.ipynb @@ -3,20 +3,21 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "%matplotlib inline" + "import pandas as pd\n", + "import os\n", + "import tmpo\n", + "\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "token = \"b371402dc767cc83e41bc294b63f9586\"\n", @@ -27,17 +28,19 @@ "}" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Init session, add sensors and sync" + ] + }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "import os\n", - "import tmpo\n", - "\n", "s = tmpo.Session(path=os.getcwd())\n", "for room in house:\n", " for sensor in house[room]:\n", @@ -47,9 +50,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "s.debug = True" @@ -58,50 +59,57 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "s.sync()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get raw data" + ] + }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "start = pd.Timestamp('20160101')" + "s.debug = False" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "df = s.dataframe([house[room][\"electricity\"]], head=start)" + "start = pd.Timestamp('20160101')\n", + "end = pd.Timestamp('20160501')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = s.dataframe(sids=[house[room][\"electricity\"]], head=start, tail=end)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "scrolled": false }, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", - "\n", "room = \"cellar\"\n", - "plt.figure(figsize=(16,10))\n", + "plt.figure(figsize=(16,5))\n", "ax1=plt.subplot()\n", "ax1.grid()\n", "ax1.set_ylabel(u'Wh')\n", @@ -110,11 +118,147 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { "collapsed": true }, + "source": [ + "## Get resampled data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The difference between raw and resampled data is that raw data has data with an uneven time axis. This makes it difficult to calculate power values or flow rates." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = s.get_data(sids=[house[room][\"electricity\"]], head=start, tail=end, resolution='1h')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As resolution you can pick from these resolutions, and you can precede them with a number:\n", + "* B business day frequency\n", + "* C custom business day frequency (experimental)\n", + "* D calendar day frequency\n", + "* W weekly frequency\n", + "* M month end frequency\n", + "* BM business month end frequency\n", + "* MS month start frequency\n", + "* BMS business month start frequency\n", + "* Q quarter end frequency\n", + "* BQ business quarter endfrequency\n", + "* QS quarter start frequency\n", + "* BQS business quarter start frequency\n", + "* A year end frequency\n", + "* BA business year end frequency\n", + "* AS year start frequency\n", + "* BAS business year start frequency\n", + "* H hourly frequency\n", + "* T minutely frequency\n", + "* S secondly frequency\n", + "* L milliseconds\n", + "* U microseconds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "room = \"cellar\"\n", + "plt.figure(figsize=(16,5))\n", + "ax1=plt.subplot()\n", + "ax1.grid()\n", + "ax1.set_ylabel(u'Wh')\n", + "plt.title(room + \" electricity\")\n", + "df.plot(ax=ax1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now you can easily calculate power by taking the difference between values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "P = df.diff()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "room = \"cellar\"\n", + "plt.figure(figsize=(16,5))\n", + "ax1=plt.subplot()\n", + "ax1.grid()\n", + "ax1.set_ylabel(u'W')\n", + "plt.title(room + \" electricity\")\n", + "P.plot(ax=ax1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mind your dimensions! Since we had Wh values resampled to hourly basis, the `diff` gets us Wh/h, or W! If you choose another resolution you need to bring a factor into account." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export to CSV, Excel..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "P.to_csv('tmpo_example.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "P.to_excel('tmpo_example.xlsx')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pandas has many other export methods, see https://pandas.pydata.org/pandas-docs/stable/io.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [] } @@ -139,5 +283,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } From be07b609a97f33865d6d6ea4e1e3611cb50ddb67 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 21 Dec 2017 16:06:59 +0100 Subject: [PATCH 06/24] deal with sensors without data --- tmpo/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index 83b5ba7..c9937cd 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -602,6 +602,8 @@ def get_types(self, sid): (str, str, str) """ last_block = self._last_block(sid=sid) + if last_block is None: + raise LookupError('Sensor {} has no data'.format(sid)) config = last_block['h']['cfg'] com = config['type'] dtype = config['data_type'] @@ -625,7 +627,10 @@ def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=N ------- pd.Series """ - commodity, data_type, subtype = self.get_types(sid=sid) + try: + commodity, data_type, subtype = self.get_types(sid=sid) + except LookupError: # no data + return pd.Series() ts = self.series(sid=sid, head=head, tail=tail) if ts.dropna().empty: @@ -666,5 +671,6 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): """ series = [self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) for sid in sids] + series = [s for s in series if not s.empty] df = pd.concat(series, axis=1) return df From a2e0fd977c11cf6daa63e66e3b5e62f03bca3add Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 21 Dec 2017 16:27:39 +0100 Subject: [PATCH 07/24] get unit --- tmpo.ipynb | 21 +++++++++++++++++++++ tmpo/__init__.py | 19 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/tmpo.ipynb b/tmpo.ipynb index db0624a..1e79c58 100644 --- a/tmpo.ipynb +++ b/tmpo.ipynb @@ -65,6 +65,27 @@ "s.sync()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get info about sensor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sensor = house[room][\"electricity\"]\n", + "\n", + "type, data_type, subtype = s.get_types(sensor)\n", + "unit = s.get_unit(sensor)\n", + "\n", + "print(type, data_type, subtype, unit)" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/tmpo/__init__.py b/tmpo/__init__.py index c9937cd..d2ee286 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -674,3 +674,22 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): series = [s for s in series if not s.empty] df = pd.concat(series, axis=1) return df + + def get_unit(self, sid): + """ + Get unit of a sensor + + Parameters + ---------- + sid : str + + Returns + ------- + str + """ + last_block = self._last_block(sid=sid) + if last_block is None: + return None + config = last_block['h']['cfg'] + unit = config.get('unit') + return unit From 6693b927782a49bd1b93b399c86bb89ea15e31a6 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 21 Dec 2017 18:44:19 +0100 Subject: [PATCH 08/24] catch empty string unit --- tmpo/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index d2ee286..a1c1a9e 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -692,4 +692,6 @@ def get_unit(self, sid): return None config = last_block['h']['cfg'] unit = config.get('unit') + if unit == '': + return None return unit From 574ede0feb8f2e6b7a418a001268d8b7b71731bd Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 1 Mar 2018 19:41:28 +0100 Subject: [PATCH 09/24] Catch empty list of series before attempting concatenation --- tmpo/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index a1c1a9e..e691fe5 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -672,7 +672,10 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): series = [self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) for sid in sids] series = [s for s in series if not s.empty] - df = pd.concat(series, axis=1) + if series: + df = pd.concat(series, axis=1) + else: + df = pd.DataFrame() return df def get_unit(self, sid): From 63df83165545336e6fa9c46e87f21a21990026c4 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 1 Mar 2018 19:47:52 +0100 Subject: [PATCH 10/24] use generators to do lazy loading and be more memory efficient --- tmpo/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index e691fe5..3608b6f 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -669,9 +669,9 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): ------- pd.DataFrame """ - series = [self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) - for sid in sids] - series = [s for s in series if not s.empty] + series = (self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) + for sid in sids) + series = (s for s in series if not s.empty) if series: df = pd.concat(series, axis=1) else: From 11e0d93c2696f16e607637882777d8f55b5d57ce Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 1 Mar 2018 19:53:22 +0100 Subject: [PATCH 11/24] replace empty list check with try catch, because it is a generator so you can't check if it is empty or not. --- tmpo/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index 3608b6f..f1a6962 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -672,9 +672,9 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): series = (self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) for sid in sids) series = (s for s in series if not s.empty) - if series: + try: df = pd.concat(series, axis=1) - else: + except ValueError: df = pd.DataFrame() return df From 5731656dd67ff8328cd38197e143709cf936f8e7 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 12 Apr 2018 12:10:27 +0200 Subject: [PATCH 12/24] add option to automatically get the diff for counters --- tmpo/__init__.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index f1a6962..4f278c3 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -610,7 +610,7 @@ def get_types(self, sid): subtype = config.get('subtype') return com, dtype, subtype - def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): + def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None, diff=False): """ Get data for a sensor in a format ready for analysis @@ -622,6 +622,9 @@ def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=N resolution : str tz : str IANA time zone + diff : bool + default False + if the sensor is a counter, return the difference between values Returns ------- @@ -646,13 +649,15 @@ def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=N ts = ts.reindex(ts.index.union(newindex)) ts = ts.interpolate(method='time') ts = ts.reindex(newindex) + if diff: + ts = ts.diff() else: raise NotImplementedError("I don't know the data type {}".format(data_type)) return ts.dropna() - def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): + def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None, diff=False): """ Get data for a sensor in a format ready for analysis @@ -664,12 +669,15 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): resolution : str tz : str IANA time zone + diff : bool + default False + if the sensor is a counter, return the difference between values Returns ------- pd.DataFrame """ - series = (self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) + series = (self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz, diff=diff) for sid in sids) series = (s for s in series if not s.empty) try: From 847c12fb7b3df2a39f9445f7164fcb3a1da74b8c Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Mon, 18 Dec 2017 21:07:05 +0100 Subject: [PATCH 13/24] get types --- tmpo/__init__.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index ab1e46c..5c8f6b3 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -588,3 +588,22 @@ def _blocktail(self, lvl, bid): def _dprintf(self, fmt, *args): if self.debug: print(fmt % args) + + def get_types(self, sid): + """ + Get commodity and data type (counter or gauge), and subtype (optional) + + Parameters + ---------- + sid : str + + Returns + ------- + (str, str, str) + """ + last_block = self._last_block(sid=sid) + config = last_block['h']['cfg'] + com = config['type'] + dtype = config['data_type'] + subtype = config.get('subtype') + return com, dtype, subtype \ No newline at end of file From 0db4738de713bea766a76e99395f628151d4fe40 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Mon, 18 Dec 2017 21:48:08 +0100 Subject: [PATCH 14/24] `get data` and `get sensor data` --- tmpo/__init__.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index 5c8f6b3..9655cdf 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -606,4 +606,65 @@ def get_types(self, sid): com = config['type'] dtype = config['data_type'] subtype = config.get('subtype') - return com, dtype, subtype \ No newline at end of file + return com, dtype, subtype + + def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): + """ + Get data for a sensor in a format ready for analysis + + Parameters + ---------- + sid : str + head : int | pd.Timestamp + tail : int | pd.Timestamp + resolution : str + tz : str + IANA time zone + + Returns + ------- + pd.Series + """ + commodity, data_type, subtype = self.get_types(sid=sid) + + ts = self.series(sid=sid, head=head, tail=tail) + if ts.dropna().empty: + return ts + if tz is not None: + ts = ts.tz_convert(tz=tz) + + if data_type == 'gauge': + ts = ts.resample(rule=resolution).mean() + + elif data_type == 'counter': + newindex = ts.resample(rule=resolution).first().index + ts = ts.reindex(ts.index.union(newindex)) + ts = ts.interpolate(method='time') + ts = ts.reindex(newindex) + + else: + raise NotImplementedError("I don't know the data type {}".format(data_type)) + + return ts.dropna() + + def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): + """ + Get data for a sensor in a format ready for analysis + + Parameters + ---------- + sid : [str] + head : int | pd.Timestamp + tail : int | pd.Timestamp + resolution : str + tz : str + IANA time zone + + Returns + ------- + pd.DataFrame + """ + series = [self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) + for sid in sids] + df = pd.concat(series, axis=1) + return df \ No newline at end of file From cb2b351b92e794253d8a1b6a73b8cd5fd50bba0d Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Mon, 18 Dec 2017 21:50:06 +0100 Subject: [PATCH 15/24] `get data` and `get sensor data` --- tmpo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index 9655cdf..003c3db 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -1,5 +1,5 @@ __title__ = "tmpo" -__version__ = "0.2.10" +__version__ = "0.2.11" __build__ = 0x000100 __author__ = "Bart Van Der Meerssche" __license__ = "MIT" From ec6da77373b0cc70a9361927b377e6a1d4872ca4 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Mon, 18 Dec 2017 21:53:36 +0100 Subject: [PATCH 16/24] typo --- tmpo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index 003c3db..83b5ba7 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -667,4 +667,4 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): series = [self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) for sid in sids] df = pd.concat(series, axis=1) - return df \ No newline at end of file + return df From 67cfe18116c9c14f8a78b0912de3849baf12e6cf Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 21 Dec 2017 15:10:17 +0100 Subject: [PATCH 17/24] better demo --- tmpo.ipynb | 214 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 179 insertions(+), 35 deletions(-) diff --git a/tmpo.ipynb b/tmpo.ipynb index 8855169..db0624a 100644 --- a/tmpo.ipynb +++ b/tmpo.ipynb @@ -3,20 +3,21 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "%matplotlib inline" + "import pandas as pd\n", + "import os\n", + "import tmpo\n", + "\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "token = \"b371402dc767cc83e41bc294b63f9586\"\n", @@ -27,17 +28,19 @@ "}" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Init session, add sensors and sync" + ] + }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "import os\n", - "import tmpo\n", - "\n", "s = tmpo.Session(path=os.getcwd())\n", "for room in house:\n", " for sensor in house[room]:\n", @@ -47,9 +50,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "s.debug = True" @@ -58,50 +59,57 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "s.sync()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get raw data" + ] + }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "start = pd.Timestamp('20160101')" + "s.debug = False" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "df = s.dataframe([house[room][\"electricity\"]], head=start)" + "start = pd.Timestamp('20160101')\n", + "end = pd.Timestamp('20160501')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = s.dataframe(sids=[house[room][\"electricity\"]], head=start, tail=end)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "scrolled": false }, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", - "\n", "room = \"cellar\"\n", - "plt.figure(figsize=(16,10))\n", + "plt.figure(figsize=(16,5))\n", "ax1=plt.subplot()\n", "ax1.grid()\n", "ax1.set_ylabel(u'Wh')\n", @@ -110,11 +118,147 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { "collapsed": true }, + "source": [ + "## Get resampled data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The difference between raw and resampled data is that raw data has data with an uneven time axis. This makes it difficult to calculate power values or flow rates." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = s.get_data(sids=[house[room][\"electricity\"]], head=start, tail=end, resolution='1h')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As resolution you can pick from these resolutions, and you can precede them with a number:\n", + "* B business day frequency\n", + "* C custom business day frequency (experimental)\n", + "* D calendar day frequency\n", + "* W weekly frequency\n", + "* M month end frequency\n", + "* BM business month end frequency\n", + "* MS month start frequency\n", + "* BMS business month start frequency\n", + "* Q quarter end frequency\n", + "* BQ business quarter endfrequency\n", + "* QS quarter start frequency\n", + "* BQS business quarter start frequency\n", + "* A year end frequency\n", + "* BA business year end frequency\n", + "* AS year start frequency\n", + "* BAS business year start frequency\n", + "* H hourly frequency\n", + "* T minutely frequency\n", + "* S secondly frequency\n", + "* L milliseconds\n", + "* U microseconds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "room = \"cellar\"\n", + "plt.figure(figsize=(16,5))\n", + "ax1=plt.subplot()\n", + "ax1.grid()\n", + "ax1.set_ylabel(u'Wh')\n", + "plt.title(room + \" electricity\")\n", + "df.plot(ax=ax1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now you can easily calculate power by taking the difference between values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "P = df.diff()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "room = \"cellar\"\n", + "plt.figure(figsize=(16,5))\n", + "ax1=plt.subplot()\n", + "ax1.grid()\n", + "ax1.set_ylabel(u'W')\n", + "plt.title(room + \" electricity\")\n", + "P.plot(ax=ax1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mind your dimensions! Since we had Wh values resampled to hourly basis, the `diff` gets us Wh/h, or W! If you choose another resolution you need to bring a factor into account." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export to CSV, Excel..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "P.to_csv('tmpo_example.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "P.to_excel('tmpo_example.xlsx')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pandas has many other export methods, see https://pandas.pydata.org/pandas-docs/stable/io.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [] } @@ -139,5 +283,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } From 1c941e4ddc7b688298a95197cd50d1cb68931ff9 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 21 Dec 2017 16:06:59 +0100 Subject: [PATCH 18/24] deal with sensors without data --- tmpo/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index 83b5ba7..c9937cd 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -602,6 +602,8 @@ def get_types(self, sid): (str, str, str) """ last_block = self._last_block(sid=sid) + if last_block is None: + raise LookupError('Sensor {} has no data'.format(sid)) config = last_block['h']['cfg'] com = config['type'] dtype = config['data_type'] @@ -625,7 +627,10 @@ def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=N ------- pd.Series """ - commodity, data_type, subtype = self.get_types(sid=sid) + try: + commodity, data_type, subtype = self.get_types(sid=sid) + except LookupError: # no data + return pd.Series() ts = self.series(sid=sid, head=head, tail=tail) if ts.dropna().empty: @@ -666,5 +671,6 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): """ series = [self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) for sid in sids] + series = [s for s in series if not s.empty] df = pd.concat(series, axis=1) return df From 8d7daf3e04cf1873b547b8f4ce9ceb7862d2c50f Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 21 Dec 2017 16:27:39 +0100 Subject: [PATCH 19/24] get unit --- tmpo.ipynb | 21 +++++++++++++++++++++ tmpo/__init__.py | 19 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/tmpo.ipynb b/tmpo.ipynb index db0624a..1e79c58 100644 --- a/tmpo.ipynb +++ b/tmpo.ipynb @@ -65,6 +65,27 @@ "s.sync()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get info about sensor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sensor = house[room][\"electricity\"]\n", + "\n", + "type, data_type, subtype = s.get_types(sensor)\n", + "unit = s.get_unit(sensor)\n", + "\n", + "print(type, data_type, subtype, unit)" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/tmpo/__init__.py b/tmpo/__init__.py index c9937cd..d2ee286 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -674,3 +674,22 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): series = [s for s in series if not s.empty] df = pd.concat(series, axis=1) return df + + def get_unit(self, sid): + """ + Get unit of a sensor + + Parameters + ---------- + sid : str + + Returns + ------- + str + """ + last_block = self._last_block(sid=sid) + if last_block is None: + return None + config = last_block['h']['cfg'] + unit = config.get('unit') + return unit From 45d75457bd5f3bea93985e34f8fa1cc53a76116e Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 21 Dec 2017 18:44:19 +0100 Subject: [PATCH 20/24] catch empty string unit --- tmpo/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index d2ee286..a1c1a9e 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -692,4 +692,6 @@ def get_unit(self, sid): return None config = last_block['h']['cfg'] unit = config.get('unit') + if unit == '': + return None return unit From 50ba56af338dd83c26337380ef204099c84bd7e0 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 1 Mar 2018 19:41:28 +0100 Subject: [PATCH 21/24] Catch empty list of series before attempting concatenation --- tmpo/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index a1c1a9e..e691fe5 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -672,7 +672,10 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): series = [self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) for sid in sids] series = [s for s in series if not s.empty] - df = pd.concat(series, axis=1) + if series: + df = pd.concat(series, axis=1) + else: + df = pd.DataFrame() return df def get_unit(self, sid): From 9117aaedf3c82b57ed76af3215c5fb0e0ecbba23 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 1 Mar 2018 19:47:52 +0100 Subject: [PATCH 22/24] use generators to do lazy loading and be more memory efficient --- tmpo/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index e691fe5..3608b6f 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -669,9 +669,9 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): ------- pd.DataFrame """ - series = [self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) - for sid in sids] - series = [s for s in series if not s.empty] + series = (self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) + for sid in sids) + series = (s for s in series if not s.empty) if series: df = pd.concat(series, axis=1) else: From 1c9ebc9699854716d07f36b256edfd1fc1d6a432 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 1 Mar 2018 19:53:22 +0100 Subject: [PATCH 23/24] replace empty list check with try catch, because it is a generator so you can't check if it is empty or not. --- tmpo/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index 3608b6f..f1a6962 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -672,9 +672,9 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): series = (self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) for sid in sids) series = (s for s in series if not s.empty) - if series: + try: df = pd.concat(series, axis=1) - else: + except ValueError: df = pd.DataFrame() return df From b5f353a315ed1a7e7e4412fd66cefd3eac1551e4 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Thu, 12 Apr 2018 12:10:27 +0200 Subject: [PATCH 24/24] add option to automatically get the diff for counters --- tmpo/__init__.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tmpo/__init__.py b/tmpo/__init__.py index f1a6962..4f278c3 100644 --- a/tmpo/__init__.py +++ b/tmpo/__init__.py @@ -610,7 +610,7 @@ def get_types(self, sid): subtype = config.get('subtype') return com, dtype, subtype - def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): + def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None, diff=False): """ Get data for a sensor in a format ready for analysis @@ -622,6 +622,9 @@ def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=N resolution : str tz : str IANA time zone + diff : bool + default False + if the sensor is a counter, return the difference between values Returns ------- @@ -646,13 +649,15 @@ def get_sensor_data(self, sid, head=0, tail=EPOCHS_MAX, resolution='15min', tz=N ts = ts.reindex(ts.index.union(newindex)) ts = ts.interpolate(method='time') ts = ts.reindex(newindex) + if diff: + ts = ts.diff() else: raise NotImplementedError("I don't know the data type {}".format(data_type)) return ts.dropna() - def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): + def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None, diff=False): """ Get data for a sensor in a format ready for analysis @@ -664,12 +669,15 @@ def get_data(self, sids, head=0, tail=EPOCHS_MAX, resolution='15min', tz=None): resolution : str tz : str IANA time zone + diff : bool + default False + if the sensor is a counter, return the difference between values Returns ------- pd.DataFrame """ - series = (self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz) + series = (self.get_sensor_data(sid=sid, head=head, tail=tail, resolution=resolution, tz=tz, diff=diff) for sid in sids) series = (s for s in series if not s.empty) try: