diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 8f9d8bf..aa6cd24 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -29,6 +29,7 @@ jobs: - uses: actions/checkout@v2 + - run: python -m pip install -r requirements_dev.txt - run: python -m pip install .[test] - run: python -m pytest tests --cov=endaq.batch diff --git a/endaq/batch/analyzer.py b/endaq/batch/analyzer.py index 73ff8b5..0e733b6 100644 --- a/endaq/batch/analyzer.py +++ b/endaq/batch/analyzer.py @@ -1,4 +1,3 @@ -from functools import wraps import logging import sys import warnings @@ -10,44 +9,13 @@ import numpy as np -import scipy.signal import pandas as pd +import endaq.calc +from endaq.calc import psd, stats, shock, integrate, filters + from endaq.batch import quat from endaq.batch.utils import ide_utils -from endaq.batch.utils.calc import psd, stats, shock, integrate, filters - - -def as_series( - unit_type, - data_name, - *, - edit_axis_names=lambda axis_names: axis_names, - default_axis_names, -): - """Format method output as a pandas Series.""" - - def decorator(method): - @wraps(method) - def wrapper(self, *args, **kwargs): - data = method(self, *args, **kwargs) - - try: - ch_struct = self._channels[unit_type] - except KeyError: - axis_names = default_axis_names - else: - axis_names = edit_axis_names(ch_struct.axis_names) - - return pd.Series( - data, - index=pd.Index(axis_names, name="axis"), - name=data_name, - ) - - return wrapper - - return decorator class Analyzer: @@ -168,7 +136,11 @@ def _accelerationData(self): ch_struct = self._channels.get("acc", None) if ch_struct is None: logging.warning(f"no acceleration channel in {self._filename}") - return np.empty((3, 0), dtype=np.float) + return pd.DataFrame( + np.empty((0, 3), dtype=np.float), + index=pd.Series([], name="time"), + columns=pd.Series(["X", "Y", "Z"], name="axis"), + ) aUnits = ch_struct.units[1] try: @@ -182,41 +154,44 @@ def _accelerationData(self): self._accelerationName = ch_struct.channel.name self._accelerationFs = ch_struct.fs - times = ch_struct.eventarray.arraySlice()[0] * 1e-6 # us -> s - aData = conversionFactor * ch_struct.eventarray.arrayValues( - subchannels=ch_struct.sch_ids, - ) + aData = conversionFactor * ch_struct.to_pandas(time_mode="timedelta") if self._accel_start_margin is not None: - margin = int(np.ceil(ch_struct.fs * self._accel_start_margin)) - aData = aData[:, margin:] + margin = int( + np.ceil( + ch_struct.fs * self._accel_start_margin / np.timedelta64(1, "s") + ) + ) + aData = aData.iloc[margin:] elif self._accel_start_time is not None: - i = np.searchsorted(times, self._accel_start_time) - aData = aData[:, i:] + aData = aData.loc[self._accel_start_time :] if self._accel_end_margin is not None: - margin = int(np.ceil(ch_struct.fs * self._accel_end_margin)) - aData = aData[:, : (-margin or None)] + margin = int( + np.ceil(ch_struct.fs * self._accel_end_margin / np.timedelta64(1, "s")) + ) + aData = aData.iloc[: (-margin or None)] elif self._accel_end_time is not None: - i = np.searchsorted(times, self._accel_end_time) - aData = aData[:, :i] + aData = aData.loc[: self._accel_end_time] - if self._accel_highpass_cutoff: - aData = filters.highpass( - aData, fs=ch_struct.fs, cutoff=self._accel_highpass_cutoff, axis=-1 - ) + aData = filters.butterworth(aData, low_cutoff=self._accel_highpass_cutoff) + assert isinstance(aData, pd.DataFrame) return aData @cached_property - def _accelerationResultant(self): - return stats.L2_norm(self._accelerationData, axis=0) + def _accelerationResultantData(self): + return self._accelerationData.apply(stats.L2_norm, axis="columns").to_frame() @cached_property def _microphoneData(self): """Populate the _microphone* fields, including splitting and extending data.""" ch_struct = self._channels.get("mic", None) if ch_struct is None: - return np.empty(0, dtype=np.float) + return pd.DataFrame( + np.empty((0, 1), dtype=np.float), + index=pd.Series([], name="time"), + columns=pd.Series(["mic"], name="axis"), + ) units = ch_struct.units[1] if units.lower() != "a": @@ -224,7 +199,7 @@ def _microphoneData(self): self._micName = ch_struct.channel.name self._micFs = ch_struct.fs - data = ch_struct.eventarray.arrayValues(subchannels=ch_struct.sch_ids) + data = ch_struct.to_pandas(time_mode="timedelta") return data @@ -232,7 +207,7 @@ def _microphoneData(self): def _velocityData(self): aData = self._accelerationData if aData.size == 0: - return np.empty((3, 0), dtype=np.float) + return aData if not self._accel_highpass_cutoff: logging.warning( @@ -240,15 +215,13 @@ def _velocityData(self): "velocity calculation may be unstable" ) - vData = integrate._integrate(aData, dt=1 / self._accelerationFs, axis=1) - - return vData + return integrate._integrate(aData) @cached_property def _displacementData(self): vData = self._velocityData if vData.size == 0: - return np.empty((3, 0), dtype=np.float) + return vData if not self._accel_highpass_cutoff: logging.warning( @@ -256,117 +229,95 @@ def _displacementData(self): "displacement calculation may be unstable" ) - dData = integrate._integrate(vData, dt=1 / self._accelerationFs, axis=1) - - return dData + return integrate._integrate(vData) @cached_property def _PVSSData(self): aData = self._accelerationData if aData.size == 0: - return np.empty(0, dtype=np.float), self._accelerationData - - log2_f0 = np.log2(self._pvss_init_freq) - log2_f1 = np.log2(self._accelerationFs) - num_bins = np.floor( - self._pvss_bins_per_octave * (log2_f1 - 1 - log2_f0) - ).astype(int) - - freqs = np.logspace( - start=log2_f0, - stop=log2_f0 + num_bins / self._pvss_bins_per_octave, - num=num_bins + 1, - base=2, - endpoint=True, + pvss = aData[:] + pvss.index.name = "frequency (Hz)" + return pvss + + freqs = endaq.calc.logfreqs( + aData, self._pvss_init_freq, self._pvss_bins_per_octave + ) + freqs = freqs[ + (freqs >= self._accelerationFs / self._accelerationData.shape[-1]) + ] + pv = shock.shock_spectrum( + self._accelerationData, + freqs, + damp=0.05, + mode="pvss", + ) + + return pv + + @cached_property + def _PVSSResultantData(self): + aData = self._accelerationData + if aData.size == 0: + pvss = self._accelerationResultantData[:] + pvss.index.name = "frequency (Hz)" + return pvss + + freqs = endaq.calc.logfreqs( + aData, self._pvss_init_freq, self._pvss_bins_per_octave ) freqs = freqs[ (freqs >= self._accelerationFs / self._accelerationData.shape[-1]) ] - pv = shock.pseudo_velocity( + pv = shock.shock_spectrum( self._accelerationData, freqs, - dt=1 / self._accelerationFs, damp=0.05, - two_sided=False, - axis=-1, + mode="pvss", + aggregate_axes=True, ) - assert pv.ndim == 2 - assert 1 <= pv.shape[0] <= 3 - assert pv.shape[-1] == len(freqs) - return freqs, pv + return pv @cached_property def _PSDData(self): aData = self._accelerationData if aData.size == 0: - return np.empty(0, dtype=np.float), self._accelerationData + psdData = aData[:] + psdData.index.name = "frequency (Hz)" + return psdData - return scipy.signal.welch( + return endaq.calc.psd.welch( aData, - fs=self._accelerationFs, - nperseg=int(np.ceil(self._accelerationFs / self._psd_freq_bin_width)), + bin_width=self._psd_freq_bin_width, window=self._psd_window, average="median", - axis=1, ) @cached_property def _VCCurveData(self): """Calculate Vibration Criteria (VC) Curves for the accelerometer.""" - aData = self._accelerationData - if aData.size == 0: - return np.empty(0, dtype=np.float), self._accelerationData - - """ - Theory behind the calculation: - - Let x(t) be a real-valued time-domain signal, and X(2πf) = F{x(t)}(2πf) - be the Fourier Transform of that signal. By Parseval's Theorem, - - ∫x(t)^2 dt = ∫|X(2πf)|^2 df - - (see https://en.wikipedia.org/wiki/Parseval%27s_theorem#Notation_used_in_physics) - - Rewriting the right side of that equation in the discrete form becomes - - ∫x(t)^2 dt ≈ ∑ |X[k]|^2 • ∆f - - where ∆f = fs/N = (1/∆t) / N = 1/T. - Limiting the right side to a range of discrete frequencies (k_0, k_1): - - ∫x(t)^2 dt ≈ [∑; k=k_0 -> k≤k_1] |X[k]|^2 • ∆f - - The VC curve calculation is the RMS over the time-domain. If T is the - duration of the time-domain signal, then: - - √((1/T) ∫x(t)^2 dt) - ≈ √((1/T) [∑; k=k_0 -> k≤k_1] |X[k]|^2 • ∆f) - = ∆f • √([∑; k=k_0 -> k≤k_1] |X[k]|^2) - - If the time-series data is acceleration, then the signal needs to first - be integrated into velocity. This can be done in the frequency domain - by replacing |X(2πf)|^2 with (1/2πf)^2 |X(2πf)|^2. - """ - f, a_psd = self._PSDData - f, v_psd = psd.differentiate(f, a_psd, n=-1) - f_oct, v_psd_oct = psd.to_octave( - f, - v_psd, + psdData = self._PSDData + if psdData.size == 0: + vcData = psdData[:] + vcData.index.name = "frequency (Hz)" + return vcData + + return psd.vc_curves( + self._PSDData, fstart=self._vc_init_freq, octave_bins=self._vc_bins_per_octave, - mode="sum", ) - v_vc = np.sqrt(f[1] * v_psd_oct) # the PSD must already scale by ∆f? - - return f_oct, v_vc @cached_property def _pressureData(self): """Populate the _pressure* fields, including splitting and extending data.""" ch_struct = self._channels.get("pre", None) if ch_struct is None: - return np.empty(0, dtype=np.float) + return pd.DataFrame( + np.empty((0, 1), dtype=np.float), + index=pd.Series([], name="time"), + columns=pd.Series(["Pressure"], name="axis"), + ) units = ch_struct.units[1] try: @@ -380,9 +331,7 @@ def _pressureData(self): self._preName = ch_struct.channel.name self._preFs = ch_struct.fs - data = conversionFactor * ch_struct.eventarray.arrayValues( - subchannels=ch_struct.sch_ids, - ) + data = conversionFactor * ch_struct.to_pandas(time_mode="timedelta") return data @@ -391,7 +340,11 @@ def _temperatureData(self): """Populate the _temperature* fields, including splitting and extending data.""" ch_struct = self._channels.get("tmp", None) if ch_struct is None: - return np.empty(0, dtype=np.float) + return pd.DataFrame( + np.empty((0, 1), dtype=np.float), + index=pd.Series([], name="time"), + columns=pd.Series(["Temperature"], name="axis"), + ) units = ch_struct.units[1] try: @@ -405,8 +358,8 @@ def _temperatureData(self): self._tmpName = ch_struct.channel.name self._tmpFs = ch_struct.fs - data = conversionOffset + conversionFactor * ch_struct.eventarray.arrayValues( - subchannels=ch_struct.sch_ids, + data = conversionOffset + conversionFactor * ch_struct.to_pandas( + time_mode="timedelta" ) return data @@ -416,23 +369,31 @@ def _gyroscopeData(self): """Populate the _gyro* fields, including splitting and extending data.""" ch_struct = self._channels.get("gyr", None) if ch_struct is None: - return np.empty((3, 0), dtype=np.float) + return pd.DataFrame( + np.empty((0, 3), dtype=np.float), + index=pd.Series([], name="time"), + columns=pd.Series(["X", "Y", "Z"], name="axis"), + ) self._gyroName = ch_struct.channel.name self._gyroFs = ch_struct.fs units = ch_struct.units[1] if units.lower() == "q": - quat_array = ch_struct.eventarray.arrayValues(subchannels=ch_struct.sch_ids) - quat_raw = quat_array[ - [3, 0, 1, 2] - ] # reorders to & strips out the "Acc" channel - - data = (180 / np.pi) * quat.quat_to_angvel(quat_raw.T, 1 / self._gyroFs).T + quat_df = ch_struct.to_pandas(time_mode="timedelta") + quat_raw = quat_df[["W", "X", "Y", "Z"]].to_numpy() + + data = pd.DataFrame( + (180 / np.pi) + * quat.quat_to_angvel(quat_raw, 1 / self._gyroFs, qaxis=1), + index=quat_df.index, + columns=pd.Series(["X", "Y", "Z"], name="axis"), + ) def strip_invalid_prefix(data, prefix_len): """Search prefix for invalid data and remove it (if any).""" - data_mag = stats.L2_norm(data[: 4 * prefix_len], axis=0) + data_array = data.to_numpy() + data_mag = stats.L2_norm(data_array[: 4 * prefix_len], axis=-1) # the derivative method for `quat_to_angvel` uses the *average* # of adjacent differences # -> any rotation spikes will result in two adjacent, @@ -443,7 +404,7 @@ def strip_invalid_prefix(data, prefix_len): # Prefix data is considered "anomalous" if it is much larger # than any surrounding data if data_agg[argmax_prefix] > 2 * data_mag[prefix_len:].max(): - data = data[..., argmax_prefix + 2 :] + data = data.iloc[argmax_prefix + 2 :] return data @@ -451,27 +412,27 @@ def strip_invalid_prefix(data, prefix_len): data, prefix_len=max(4, int(np.ceil(0.25 * self._gyroFs))) ) elif units.lower() in ("dps", "deg/s"): - data = ch_struct.eventarray.arrayValues(subchannels=ch_struct.sch_ids) + data = ch_struct.to_pandas(time_mode="timedelta") else: raise ValueError(f'unknown gyroscope channel units "{units}"') return data - def _processHumidity(self, channel): - """Populate the _humidity* fields, including splitting and extending data.""" - pass - @cached_property def _gpsPositionData(self): ch_struct = self._channels.get("gps", None) if ch_struct is None: - return np.empty((2, 0), dtype=np.float) + return pd.DataFrame( + np.empty((0, 2), dtype=np.float), + index=pd.Series([], name="time"), + columns=pd.Series(["Latitude", "Longitude"], name="axis"), + ) units = ch_struct.units[1] if units.lower() != "degrees": raise ValueError(f'unknown GPS position channel units "{units}"') - data = ch_struct.eventarray.arrayValues(subchannels=ch_struct.sch_ids) + data = ch_struct.to_pandas(time_mode="timedelta") self._gpsName = ch_struct.channel.name self._gpsFs = ch_struct.fs @@ -483,15 +444,17 @@ def _gpsPositionData(self): def _gpsSpeedData(self): ch_struct = self._channels.get("spd", None) if ch_struct is None: - return np.empty(0) + return pd.DataFrame( + np.empty((0, 1), dtype=np.float), + index=pd.Series([], name="time"), + columns=pd.Series(["GPS Speed"], name="axis"), + ) units = ch_struct.units[1] if units != "m/s": raise ValueError(f'unknown GPS ground speed channel units "{units}"') - data = self.MPS_TO_KMPH * ch_struct.eventarray.arrayValues( - subchannels=ch_struct.sch_ids - ) + data = self.MPS_TO_KMPH * ch_struct.to_pandas(time_mode="timedelta") self._gpsSpeedName = ch_struct.channel.name self._gpsSpeedFs = ch_struct.fs @@ -503,153 +466,133 @@ def _gpsSpeedData(self): # ========================================================================== @cached_property - @as_series( - "acc", - "RMS Acceleration", - edit_axis_names=lambda axis_names: axis_names + ["Resultant"], - default_axis_names=["X", "Y", "Z", "Resultant"], - ) def accRMSFull(self): """Accelerometer Tri-axial RMS.""" with warnings.catch_warnings(): warnings.simplefilter("ignore") - rms = stats.rms( - self._accelerationData, axis=1 + rms = self._accelerationData.apply( + stats.rms, axis="rows", raw=True ) # RuntimeWarning: Mean of empty slice. - return self.MPS2_TO_G * np.append(rms, stats.L2_norm(rms)) + + rms["Resultant"] = stats.L2_norm(rms.to_numpy()) + rms.name = "RMS Acceleration" + return self.MPS2_TO_G * rms @cached_property - @as_series( - "acc", - "RMS Velocity", - edit_axis_names=lambda axis_names: axis_names + ["Resultant"], - default_axis_names=["X", "Y", "Z", "Resultant"], - ) def velRMSFull(self): """Velocity Tri-axial RMS, after applying a 0.1Hz highpass filter.""" with warnings.catch_warnings(): warnings.simplefilter("ignore") - rms = stats.rms( - self._velocityData, axis=1 + rms = self._velocityData.apply( + stats.rms, axis="rows", raw=True ) # RuntimeWarning: Mean of empty slice. - return self.MPS_TO_MMPS * np.append(rms, stats.L2_norm(rms)) + + rms["Resultant"] = stats.L2_norm(rms.to_numpy()) + rms.name = "RMS Velocity" + return self.MPS_TO_MMPS * rms @cached_property - @as_series( - "acc", - "RMS Displacement", - edit_axis_names=lambda axis_names: axis_names + ["Resultant"], - default_axis_names=["X", "Y", "Z", "Resultant"], - ) def disRMSFull(self): """Displacement Tri-axial RMS, after applying a 0.1Hz highpass filter.""" with warnings.catch_warnings(): warnings.simplefilter("ignore") - rms = stats.rms( - self._displacementData, axis=1 + rms = self._displacementData.apply( + stats.rms, axis="rows", raw=True ) # RuntimeWarning: Mean of empty slice. - return self.M_TO_MM * np.append(rms, stats.L2_norm(rms)) + + rms["Resultant"] = stats.L2_norm(rms.to_numpy()) + rms.name = "RMS Displacement" + return self.M_TO_MM * rms @cached_property - @as_series( - "acc", - "Peak Absolute Acceleration", - edit_axis_names=lambda axis_names: axis_names + ["Resultant"], - default_axis_names=["X", "Y", "Z", "Resultant"], - ) def accPeakFull(self): """Peak instantaneous tri-axial acceleration""" - max_abs = stats.max_abs(self._accelerationData, axis=1) - max_abs_res = np.amax( - stats.L2_norm(self._accelerationData, axis=0), - initial=-np.inf, - axis=-1, - ) - return self.MPS2_TO_G * np.nan_to_num( - np.append(max_abs, max_abs_res), nan=np.nan, posinf=np.inf, neginf=np.nan - ) + max_abs = self._accelerationData.abs().max(axis="rows") + max_abs_res = self._accelerationResultantData.max(axis="rows") + max_abs["Resultant"] = max_abs_res.iloc[0] + max_abs.name = "Peak Absolute Acceleration" + return self.MPS2_TO_G * max_abs @cached_property - @as_series( - "acc", - "Peak Pseudo Velocity Shock Spectrum", - edit_axis_names=lambda axis_names: axis_names + ["Resultant"], - default_axis_names=["X", "Y", "Z", "Resultant"], - ) def pseudoVelPeakFull(self): """Peak Pseudo Velocity""" - if self._PVSSData[1].size == 0: - return np.full(self._PVSSData[1].shape[0] + 1, np.nan) - - pv = self._PVSSData[1] - max_pv = np.amax(pv, initial=-np.inf, axis=1) - max_pv_res = np.amax(stats.L2_norm(pv, axis=0), initial=-np.inf, axis=-1) - return self.MPS_TO_MMPS * np.nan_to_num( - np.append(max_pv, max_pv_res), nan=np.nan, posinf=np.inf, neginf=np.nan - ) + max_pv = self._PVSSData.max(axis="rows") + max_pv_res = self._PVSSResultantData.max(axis="rows") + max_pv["Resultant"] = max_pv_res.iloc[0] + max_pv.name = "Peak Pseudo Velocity Shock Spectrum" + return self.MPS2_TO_G * max_pv @cached_property - @as_series("gps", "GPS Position", default_axis_names=["Latitude", "Longitude"]) def gpsLocFull(self): """Average GPS location""" data = self._gpsPositionData # 0's occur when gps doesn't have a "lock" -> remove them - data = data[:, np.all(data != 0, axis=0)] + data = data.iloc[np.all(data.to_numpy() != 0, axis=1)] if data.size == 0: - return [np.nan, np.nan] + gpsPos = pd.Series( + [np.nan, np.nan], + index=pd.Series(["Latitude", "Longitude"], name="axis"), + ) + else: + gpsPos = data.iloc[-1] - return data[..., -1] + gpsPos.name = "GPS Position" + return gpsPos @cached_property - @as_series("spd", "GPS Speed", default_axis_names=["Ground"]) def gpsSpeedFull(self): """Average GPS speed""" data = self._gpsSpeedData # 0's occur when gps doesn't have a "lock" -> remove them - data = data[data != 0] + data = data.iloc[data.to_numpy() != 0] with warnings.catch_warnings(): warnings.simplefilter("ignore") - return np.mean(data) # RuntimeWarning: Mean of empty slice. + gpsSpeed = data.mean() # RuntimeWarning: Mean of empty slice. + + gpsSpeed.name = "GPS Speed" + return gpsSpeed @cached_property - @as_series( - "gyr", - "RMS Angular Velocity", - edit_axis_names=lambda axis_names: ["X", "Y", "Z", "Resultant"], - default_axis_names=["X", "Y", "Z", "Resultant"], - ) def gyroRMSFull(self): """Gyroscope RMS""" with warnings.catch_warnings(): warnings.simplefilter("ignore") - rms = stats.rms( - self._gyroscopeData, axis=1 + rms = self._gyroscopeData.apply( + stats.rms, axis="rows", raw=True ) # RuntimeWarning: Mean of empty slice. - return np.append(rms, stats.L2_norm(rms)) + rms["Resultant"] = stats.L2_norm(rms.to_numpy()) + rms.name = "RMS Angular Velocity" + return rms @cached_property - @as_series("mic", "RMS Microphone", default_axis_names=[""]) def micRMSFull(self): """Microphone RMS""" with warnings.catch_warnings(): warnings.simplefilter("ignore") - return stats.rms( - self._microphoneData + mic = self._microphoneData.apply( + stats.rms, axis="rows", raw=True ) # RuntimeWarning: Mean of empty slice. + mic.name = "RMS Microphone" + return mic + @cached_property - @as_series("tmp", "Average Temperature", default_axis_names=[""]) def tempFull(self): """Average Temperature""" with warnings.catch_warnings(): warnings.simplefilter("ignore") - return self._temperatureData.mean() # RuntimeWarning: Mean of empty slice. + temp = self._temperatureData.mean() # RuntimeWarning: Mean of empty slice. + + temp.name = "Average Temperature" + return temp @cached_property - @as_series("pre", "Average Pressure", default_axis_names=[""]) def pressFull(self): """Average Pressure""" with warnings.catch_warnings(): warnings.simplefilter("ignore") - return self._pressureData.mean() # RuntimeWarning: Mean of empty slice. + press = self._pressureData.mean() # RuntimeWarning: Mean of empty slice. + + press.name = "Average Temperature" + return press diff --git a/endaq/batch/calc.py b/endaq/batch/calc.py index 269784e..8a5ae11 100644 --- a/endaq/batch/calc.py +++ b/endaq/batch/calc.py @@ -4,11 +4,12 @@ import numpy as np import pandas as pd -import idelib + +import endaq.ide +from endaq.calc import stats as calc_stats +from endaq.calc import psd as calc_psd from endaq.batch.analyzer import Analyzer -from endaq.batch.utils.calc import stats as utils_stats -from endaq.batch.utils.calc import psd as utils_psd def _make_meta(dataset): @@ -36,26 +37,19 @@ def _make_psd(analyzer, fstart=None, bins_per_octave=None): if accel_ch is None: return None - f, psd = analyzer._PSDData + df_psd = analyzer._PSDData if bins_per_octave is not None: - f, psd = utils_psd.to_octave( - f, - psd, + df_psd = calc_psd.to_octave( + df_psd, fstart=(fstart or 1), octave_bins=bins_per_octave, - axis=1, - mode="mean", + agg=np.mean, ) - df_psd = pd.DataFrame( - psd.T * analyzer.MPS2_TO_G ** 2, # (m/s^2)^2/Hz -> g^2/Hz - index=pd.Index(f, name="frequency"), - columns=pd.Index(accel_ch.axis_names, name="axis"), - ) - df_psd["Resultant"] = np.sum(df_psd.to_numpy(), axis=1) + df_psd = df_psd * analyzer.MPS2_TO_G ** 2 # (m/s^2)^2/Hz -> g^2/Hz - return df_psd.stack(level="axis").reorder_levels(["axis", "frequency"]) + return df_psd.stack(level="axis").reorder_levels(["axis", "frequency (Hz)"]) def _make_pvss(analyzer): @@ -68,17 +62,11 @@ def _make_pvss(analyzer): if accel_ch is None: return None - f, pvss = analyzer._PVSSData - - df_pvss = pd.DataFrame( - pvss.T * analyzer.MPS_TO_MMPS, - index=pd.Index(f, name="frequency"), - columns=pd.Index(accel_ch.axis_names, name="axis"), - ) + df_pvss = analyzer._PVSSData + df_pvss["Resultant"] = analyzer._PVSSResultantData + df_pvss = df_pvss * analyzer.MPS_TO_MMPS - df_pvss["Resultant"] = utils_stats.L2_norm(df_pvss.to_numpy(), axis=1) - - return df_pvss.stack(level="axis").reorder_levels(["axis", "frequency"]) + return df_pvss.stack(level="axis").reorder_levels(["axis", "frequency (Hz)"]) def _make_metrics(analyzer): @@ -96,7 +84,7 @@ def _make_metrics(analyzer): - temperature - degrees Celsius - pressure - kiloPascals """ - df = pd.DataFrame( + df = pd.concat( [ analyzer.accRMSFull, analyzer.velRMSFull, @@ -109,12 +97,13 @@ def _make_metrics(analyzer): analyzer.micRMSFull, analyzer.tempFull, analyzer.pressFull, - ] + ], + axis="columns", ) # Format data into desired shape - df.index.name = "calculation" - series = df.stack() + df.columns.name = "calculation" + series = df.stack().reorder_levels(["calculation", "axis"]) return series @@ -131,44 +120,42 @@ def _make_peak_windows(analyzer, margin_len): if accel_ch is None: return None - data = analyzer.MPS2_TO_G * np.concatenate( # m/s^2 -> g - [analyzer._accelerationData, analyzer._accelerationResultant[None]], axis=0 - ) + data = analyzer._accelerationData[:] + data["Resultant"] = analyzer._accelerationResultantData + data = analyzer.MPS2_TO_G * data - window_len = 2 * margin_len + 1 - t = ( - accel_ch.eventarray.arraySlice()[0] - - accel_ch.channel.dataset.sessions[0].firstTime - ) dt = 1 / analyzer._accelerationFs - result_data = np.full((window_len, data.shape[0]), np.nan, dtype=data.dtype) - # Calculate ranges - i_max = np.argmax(np.abs(data), axis=1) - i_max_neg = i_max - data.shape[1] - for j in range(data.shape[0]): - result_data[-margin_len - 1 - i_max[j] : margin_len - i_max_neg[j], j] = data[ - j, i_max_neg[j] - margin_len : i_max[j] + margin_len + 1 - ] + data_noidx = data.reset_index(drop=True) + peak_indices = data_noidx.abs().idxmax(axis="rows") + aligned_peak_data = pd.concat( + [ + pd.Series( + data[col].to_numpy(), + index=(data_noidx.index - peak_indices[col]), + name=col, + ) + for col in data.columns + ], + axis="columns", + ) + aligned_peak_data = aligned_peak_data.loc[-margin_len : margin_len + 1] + aligned_peak_data.index = pd.Series( + aligned_peak_data.index * dt, name="peak offset" + ) + aligned_peak_data.columns = pd.MultiIndex.from_frame( + pd.DataFrame( + { + "axis": aligned_peak_data.columns.values, + "peak time": data.index[peak_indices], + } + ) + ) # Format results result = ( - pd.DataFrame( - result_data, - index=pd.to_timedelta( - dt * pd.RangeIndex(-margin_len, margin_len + 1, name="peak offset"), - unit="s", - ), - columns=pd.MultiIndex.from_arrays( - [ - accel_ch.axis_names + ["Resultant"], - t[i_max].astype("timedelta64[us]"), - ], - names=["axis", "peak time"], - ), - ) - .stack(level="axis") - .stack(level="peak time") + aligned_peak_data.stack() + .stack() .reorder_levels(["axis", "peak time", "peak offset"]) ) @@ -179,22 +166,14 @@ def _make_vc_curves(analyzer): """ Format the VC curves of the main accelerometer channel into a pandas object. """ - accel_ch = analyzer._channels.get("acc", None) if accel_ch is None: return None - f, vc = analyzer._VCCurveData - - df_vc = pd.DataFrame( - vc.T * analyzer.MPS_TO_UMPS, # (m/s) -> (μm/s) - index=pd.Index(f, name="frequency"), - columns=pd.Index(accel_ch.axis_names, name="axis"), - ) - - df_vc["Resultant"] = utils_stats.L2_norm(df_vc.to_numpy(), axis=1) + df_vc = analyzer._VCCurveData * analyzer.MPS_TO_UMPS # (m/s) -> (μm/s) + df_vc["Resultant"] = calc_stats.L2_norm(df_vc.to_numpy(), axis=1) - return df_vc.stack(level="axis").reorder_levels(["axis", "frequency"]) + return df_vc.stack(level="axis").reorder_levels(["axis", "frequency (Hz)"]) class GetDataBuilder: @@ -393,7 +372,7 @@ def _get_data(self, filename): print(f"processing {filename}...") data = {} - with idelib.importFile(filename) as ds: + with endaq.ide.get_doc(filename) as ds: analyzer = Analyzer( ds, **self._analyzer_kwargs, @@ -510,7 +489,11 @@ def to_html_plots(self, folder_path=None, show=False): continue if k == "psd": fig = px.line( - df, x="frequency", y="value", color="filename", line_dash="axis" + df, + x="frequency (Hz)", + y="value", + color="filename", + line_dash="axis", ) fig.update_xaxes(type="log", title_text="frequency (Hz)") @@ -519,7 +502,11 @@ def to_html_plots(self, folder_path=None, show=False): elif k == "pvss": fig = px.line( - df, x="frequency", y="value", color="filename", line_dash="axis" + df, + x="frequency (Hz)", + y="value", + color="filename", + line_dash="axis", ) fig.update_xaxes(type="log", title_text="frequency (Hz)") fig.update_yaxes(type="log", title_text="Velocity (mm/s)") @@ -544,7 +531,11 @@ def to_html_plots(self, folder_path=None, show=False): elif k == "vc_curves": fig = px.line( - df, x="frequency", y="value", color="filename", line_dash="axis" + df, + x="frequency (Hz)", + y="value", + color="filename", + line_dash="axis", ) fig.update_xaxes(type="log", title_text="frequency (Hz)") diff --git a/endaq/batch/quat.py b/endaq/batch/quat.py index 6b163ba..f1a4e45 100644 --- a/endaq/batch/quat.py +++ b/endaq/batch/quat.py @@ -1,21 +1,26 @@ import numpy as np -def as_quat_array(q, force_copy=False): +def as_quat_array(q, force_copy=False, qaxis=-1): """Formats a quaternion array.""" q = (np.array if force_copy else np.asarray)(q) - if q.ndim == 0 or q.shape[-1] != 4: + if not (-q.ndim <= qaxis < q.ndim): + raise ValueError("invalid quaternion axis") + + if q.ndim == 0 or q.shape[qaxis] != 4: raise ValueError( - "improper shape for quaternion data; should have wxyz in last axis" + "improper shape for quaternion data;" + " specified axis should contain WXYZ coordinates" ) return q -def quat_mul(q1, q2): +def quat_mul(q1, q2, qaxis=-1): """Multiply two quaternion arrays.""" - q1, q2 = as_quat_array(q1), as_quat_array(q2) + q1, q2 = as_quat_array(q1, qaxis=qaxis), as_quat_array(q2, qaxis=qaxis) + q1, q2 = np.moveaxis(q1, qaxis, -1), np.moveaxis(q2, qaxis, -1) result = np.empty(np.broadcast(q1, q2).shape, dtype=np.result_type(q1, q2)) @@ -30,28 +35,29 @@ def quat_mul(q1, q2): q1 * q2[..., [3, 2, 1, 0]] * np.array([1, 1, -1, 1]), axis=-1 ) - return result + return np.moveaxis(result, -1, qaxis) -def quat_conj(q): +def quat_conj(q, qaxis=-1): """Conjugate an array of quaternion.""" - q = as_quat_array(q, force_copy=True) - q[..., 1:] *= -1 + q = as_quat_array(q, force_copy=True, qaxis=qaxis) - return q + q = np.moveaxis(q, qaxis, 0) + q[1:] *= -1 + return np.moveaxis(q, 0, qaxis) -def quat_inv(q): +def quat_inv(q, qaxis=-1): """Invert an array of quaternions.""" - return quat_conj(q) / np.sum(q ** 2, axis=-1, keepdims=True) + return quat_conj(q, qaxis=qaxis) / np.sum(q ** 2, axis=qaxis, keepdims=True) -def quat_div(q1, q2): +def quat_div(q1, q2, qaxis=-1): """Divide two quaternion arrays.""" - return quat_mul(q1, quat_inv(q2)) + return quat_mul(q1, quat_inv(q2, qaxis=qaxis), qaxis=qaxis) -def quat_to_angvel(q, *dt): +def quat_to_angvel(q, *dt, qaxis=-1): """ Calculate the angular velocity for an array of orientation quaternions. @@ -59,5 +65,8 @@ def quat_to_angvel(q, *dt): :param dt: the time corresponing to each quaternion sample :return: the angular velocity """ + q = np.moveaxis(q, qaxis, -1) q_prime = np.gradient(q, *dt, axis=range(q.ndim - 1), edge_order=2) - return 2 * quat_div(q_prime, q)[..., 1:] + ang_vel = 2 * quat_div(q_prime, q)[..., 1:] + + return np.moveaxis(ang_vel, -1, qaxis) diff --git a/endaq/batch/utils/calc/__init__.py b/endaq/batch/utils/calc/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/endaq/batch/utils/calc/filters.py b/endaq/batch/utils/calc/filters.py deleted file mode 100644 index f34d969..0000000 --- a/endaq/batch/utils/calc/filters.py +++ /dev/null @@ -1,27 +0,0 @@ -import numpy as np -import scipy.signal - - -def highpass(array, fs, cutoff=1.0, half_order=3, axis=-1): - """Apply a highpass filter to an array.""" - array = np.moveaxis(array, axis, -1) - - sos_coeffs = scipy.signal.butter( - N=half_order, - Wn=cutoff, - btype="highpass", - fs=fs, - output="sos", - ) - - # vvv - init_state = scipy.signal.sosfilt_zi(sos_coeffs) - for _ in range(2): - init_fwd = init_state * array[(Ellipsis, 0) + ((None,) * init_state.ndim)] - init_fwd = np.moveaxis(init_fwd, array.ndim - 1, 0) - array, _zo = scipy.signal.sosfilt(sos_coeffs, array, axis=-1, zi=init_fwd) - array = array[..., ::-1] - # ^^^ could alternatively do this (not as good though?): - # array = scipy.signal.sosfiltfilt(sos_coeffs, array, axis=-1) - - return np.moveaxis(array, -1, axis) diff --git a/endaq/batch/utils/calc/integrate.py b/endaq/batch/utils/calc/integrate.py deleted file mode 100644 index e27f28a..0000000 --- a/endaq/batch/utils/calc/integrate.py +++ /dev/null @@ -1,26 +0,0 @@ -import scipy.integrate -import scipy.signal - -from . import filters - - -def _integrate(array, dt, axis=-1): - """Integrate data over an axis.""" - result = scipy.integrate.cumtrapz(array, dx=dt, initial=0, axis=axis) - # In lieu of explicit initial offset, set integration bias to remove mean - # -> avoids trend artifacts after successive integrations - result = result - result.mean(axis=axis, keepdims=True) - - return result - - -def iter_integrals(array, dt, axis=-1, highpass_cutoff=1.0): - """Iterate over conditioned integrals of the given original data.""" - array = filters.highpass( - array, fs=1 / dt, half_order=3, cutoff=highpass_cutoff, axis=axis - ) - while True: - array.setflags(write=False) # should NOT mutate shared data - yield array - array.setflags(write=True) # array will be replaced below -> now ok to edit - array = _integrate(array, dt, axis=axis) diff --git a/endaq/batch/utils/calc/psd.py b/endaq/batch/utils/calc/psd.py deleted file mode 100644 index 7a3437b..0000000 --- a/endaq/batch/utils/calc/psd.py +++ /dev/null @@ -1,210 +0,0 @@ -import numbers -import warnings - -import numpy as np -import scipy.signal - - -def welch(array, nfft=1024, dt=None, axis=-1): - """ - Calculate a periodogram using a modified welch's method. - - The modified method is designed to maintain equivalence between the input - and the result as dictated by Parseval's Theorem. In order to achieve this, - this method is modified to: - - generate a spectrogram using a flat boxcar window - - use zero overlap in the STFT - - pad the signal with zeros when windows extend over the edge of the array - - sum each frequency bin across time (instead of averaging) - """ - warnings.warn( - "this method is deprecated; use instead `scipy.signal.welch`", - DeprecationWarning, - ) - - if not isinstance(nfft, numbers.Integral): - raise TypeError - if dt is not None: - if not isinstance(dt, numbers.Real): - raise TypeError - if dt <= 0: - raise ValueError - - array = np.moveaxis(array, axis, -1) - _f, _t, a_stft = scipy.signal.stft( - array, - fs=1, - window="boxcar", - nperseg=nfft, - noverlap=0, - nfft=nfft, - boundary="zeros", - padded=True, - axis=-1, - ) - assert a_stft.shape[-2] == nfft // 2 + 1 - assert a_stft.shape[-1] == -((array.shape[-1] - 1) // -nfft) + 1 - - a_psd = np.sum(np.abs(a_stft) ** 2, axis=-1) - a_psd[..., 1 : (nfft - 1) // 2 + 1] *= 2 - a_psd *= nfft ** 2 / array.shape[-1] - - a_psd = np.moveaxis(a_psd, -1, axis) - - if dt is None: - return a_psd - else: - freqs = np.fft.rfftfreq(nfft, d=dt) - return freqs, dt * a_psd - - -def rms(array, fs, dn=0, min_freq=0, max_freq=np.inf, nfft=1024, axis=-1): - """ - Multi-axis RMS of nth derivative. - - :param array: the source data - :param fs: the sampling frequency - :param dn: the derivative number (e.g. 1 = first derivative, 2 = second, - -1 = first anti-derivative) - :param highpass_freq: the frequency up to which all frequency content is - set to zero (inclusive; default to 0) - """ - array = np.moveaxis(array, axis, -1) - if fs <= 0: - raise ValueError - if not isinstance(dn, numbers.Integral): - raise TypeError - - a_psd = welch(array, nfft=nfft, axis=-1) - a_freqs = np.fft.rfftfreq(nfft, d=1 / fs) - - v_psd = a_psd - # Bandpass filtering - i_min_freq, i_max_freq = np.searchsorted( - a_freqs, [min_freq, max_freq], side="right" - ) - v_psd[..., :i_min_freq] = 0 - v_psd[..., i_max_freq:] = 0 - # Time-domain derivation/integration - v_psd[..., i_min_freq:i_max_freq] *= a_freqs[i_min_freq:i_max_freq] ** (2 * dn) - - return np.sqrt(np.sum(v_psd, axis=-1) / nfft) - - -def _np_histogram_nd(array, bins=10, weights=None, axis=-1, **kwargs): - """Compute histograms over a specified axis.""" - array = np.asarray(array) - bins = np.asarray(bins) - weights = np.asarray(weights) if weights is not None else None - - # Collect all ND inputs - nd_params = {} - (nd_params if array.ndim > 1 else kwargs)["a"] = array - (nd_params if bins.ndim > 1 else kwargs)["bins"] = bins - if weights is not None: - (nd_params if weights.ndim > 1 else kwargs)["weights"] = weights - - if len(nd_params) == 0: - return np.histogram(**kwargs)[0] - - # Move the desired axes to the back - for k, v in nd_params.items(): - nd_params[k] = np.moveaxis(v, axis, -1) - - # Broadcast ND arrays to the same shape - ks, vs = zip(*nd_params.items()) - vs_broadcasted = np.broadcast_arrays(*vs) - for k, v in zip(ks, vs_broadcasted): - nd_params[k] = v - - # Generate output - bins = nd_params.get("bins", bins) - - result_shape = () - if len(nd_params) != 0: - result_shape = v.shape[:-1] - if bins.ndim >= 1: - result_shape = result_shape + (bins.shape[-1] - 1,) - else: - result_shape = result_shape + (bins,) - - result = np.empty( - result_shape, - dtype=(weights.dtype if weights is not None else int), - ) - loop_shape = v.shape[:-1] - - for nd_i in np.ndindex(*loop_shape): - nd_kwargs = {k: v[nd_i] for k, v in nd_params.items()} - result[nd_i], edges = np.histogram(**nd_kwargs, **kwargs) - - result = np.moveaxis(result, -1, axis) - return result - - -def differentiate(f, psd, n=1): - """Perform time-domain differentiation on periodogram data.""" - # Involves a division by zero for n<0 - with warnings.catch_warnings(): - warnings.simplefilter("ignore", RuntimeWarning) - factor = (2 * np.pi * f) ** (2 * n) # divide by zero - if n < 0: - factor[f == 0] = 0 - - return f, psd * factor - - -def to_jagged(f, psd, freq_splits, axis=-1, mode="sum"): - """ - Calculate a periodogram over non-uniformly spaced frequency bins. - - :param f, psd: the returned values from `scipy.signal.welch` - :param freq_splits: the boundaries of the frequency bins; must be strictly - increasing - :param axis: same as the axis parameter provided to `scipy.signal.welch` - :param mode: the method for aggregating values into bins; 'mean' preserves - the PSD's area-under-the-curve, 'sum' preserves the PSD's "energy" - """ - if not np.all(np.diff(freq_splits, prepend=0) > 0): - raise ValueError - - # Check that PSD samples do not skip any frequency bins - spacing_test = np.diff(np.searchsorted(freq_splits, f)) - if np.any(spacing_test > 1): - warnings.warn( - "empty frequency bins in re-binned PSD; " - "original PSD's frequency spacing is too coarse", - RuntimeWarning, - ) - - psd_jagged = _np_histogram_nd(f, bins=freq_splits, weights=psd) - if mode == "mean": - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=RuntimeWarning) # x/0 - - psd_jagged = np.nan_to_num( # <- fix divisions by zero - psd_jagged / np.histogram(f, bins=freq_splits)[0] - ) - - f = (freq_splits[1:] + freq_splits[:-1]) / 2 - return f, psd_jagged - - -def to_octave(f, psd, fstart=1, octave_bins=12, **kwargs): - """Calculate a periodogram over log-spaced frequency bins.""" - max_f = f.max() - - octave_step = 1 / octave_bins - center_freqs = 2 ** np.arange( - np.log2(fstart), - np.log2(max_f) - octave_step / 2, - octave_step, - ) - freq_splits = 2 ** np.arange( - np.log2(fstart) - octave_step / 2, - np.log2(max_f), - octave_step, - ) - assert len(center_freqs) + 1 == len(freq_splits) - - return center_freqs, to_jagged(f, psd, freq_splits=freq_splits, **kwargs)[1] diff --git a/endaq/batch/utils/calc/shock.py b/endaq/batch/utils/calc/shock.py deleted file mode 100644 index 90d7fe5..0000000 --- a/endaq/batch/utils/calc/shock.py +++ /dev/null @@ -1,70 +0,0 @@ -from collections import namedtuple -import warnings - -import numpy as np -import scipy.signal - - -def rel_displ(accel, omega, dt=1, damp=0, axis=-1): - """Calculate the relative velocity for a SDOF system.""" - # Generate the transfer function - # H(s) = L{z(t)}(s) / L{y"(t)}(s) = (1/s²)(Z(s)/Y(s)) - # for the PDE - # z" + (2ζω)z' + (ω^2)z = -y" - with warnings.catch_warnings(): - warnings.simplefilter("ignore", scipy.signal.BadCoefficients) - - tf = scipy.signal.TransferFunction( - [-1], - [1, 2 * damp * omega, omega ** 2], - ).to_discrete(dt=dt) - - return scipy.signal.lfilter(tf.num, tf.den, accel, axis=axis) - - -def rolling_rel_vel(accel, freqs, dt=1, damp=0, nperseg=256, axis=-1): - """Calculate a rolling windowed relative velocity for a SDOF system.""" - axis = (axis % accel.ndim) - accel.ndim # index axis from end - accel = np.moveaxis(accel, axis, -1) - freqs = np.asarray(freqs) - - t = dt * np.arange(nperseg) - η = np.sqrt(1 - damp ** 2) - omega = 2 * np.pi * freqs[..., np.newaxis] - filt = -np.imag(np.exp(omega * t * (-damp + 1j * η))) / η - - result = np.empty(freqs.shape + accel.shape) - for i_nd in np.ndindex(*freqs.shape): - for j_nd in np.ndindex(*accel.shape[:-1]): - result[i_nd + j_nd] = scipy.signal.oaconvolve( - accel[j_nd], filt[i_nd], mode="full", axes=-1 - )[: (1 - nperseg) or None] - return np.moveaxis(result, -1, axis) - - -def pseudo_velocity(accel, freqs, dt=1, damp=0, two_sided=False, axis=-1): - """The pseudo velocity of an acceleration signal.""" - freqs = np.asarray(freqs) - omega = 2 * np.pi * freqs - - accel = np.moveaxis(accel, axis, -1) - - results = np.empty((2,) + freqs.shape + accel.shape[:-1], dtype=np.float) - - for i_nd in np.ndindex(freqs.shape): - rd = rel_displ(accel, omega[i_nd], dt, damp) - - results[(0,) + i_nd] = -omega[i_nd] * rd.min(axis=-1) - results[(1,) + i_nd] = omega[i_nd] * rd.max(axis=-1) - - # Move any frequency axes in place of the specified acceleration axis - results = np.moveaxis( - results, - np.arange(1, omega.ndim + 1), - np.arange(1, omega.ndim + 1) + (axis % accel.ndim), - ) - - if not two_sided: - return np.maximum(results[0], results[1]) - - return namedtuple("PseudoVelocityResults", "neg pos")(*results) diff --git a/endaq/batch/utils/calc/stats.py b/endaq/batch/utils/calc/stats.py deleted file mode 100644 index a019cdc..0000000 --- a/endaq/batch/utils/calc/stats.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -import scipy.ndimage - - -def L2_norm(data, axis=None, keepdims=False): - """Compute the L2 norm (a.k.a. the Euclidean Norm).""" - return np.sqrt(np.sum(np.abs(data) ** 2, axis=axis, keepdims=keepdims)) - - -def max_abs(array, axis=None, keepdims=False): - """ - Compute the maximum of the absolute value of an array. - - This function should be equivalent to, but generally use less memory than - `np.amax(np.abs(array))`. - - Specifically, it generates the absolute-value maximum from `np.amax(array)` - and `-np.amin(array)`. Thus instead of allocating space for the intermediate - array `np.abs(array)`, it allocates for the axis-collapsed smaller arrays - `np.amax(array)` & `np.amin(array)`. - - Note - this method does not work on complex-valued arrays. - """ - # Forbid complex-valued data - if np.iscomplexobj(array): - raise ValueError("`max_abs` does not accept complex arrays") - - return np.maximum( - np.amax(array, initial=-np.inf, axis=axis, keepdims=keepdims), - -np.amin(array, initial=np.inf, axis=axis, keepdims=keepdims), - ) - - -def rms(data, axis=None, keepdims=False): - """Calculate the root-mean-square (RMS) along a given axis.""" - return np.sqrt(np.mean(np.abs(data) ** 2, axis=axis, keepdims=keepdims)) - - -def rolling_rms(array, nperseg=256, axis=-1): - """Calculate a rolling RMS along a given axis.""" - sq = array ** 2 - window = np.ones(nperseg, dtype=array.dtype) / nperseg - mean_sq = scipy.ndimage.convolve1d(sq, window, axis=axis, mode="mirror") - return np.sqrt(mean_sq) diff --git a/endaq/batch/utils/ide_utils.py b/endaq/batch/utils/ide_utils.py index 056531d..c83f6e6 100644 --- a/endaq/batch/utils/ide_utils.py +++ b/endaq/batch/utils/ide_utils.py @@ -1,6 +1,8 @@ from collections import defaultdict, namedtuple import warnings +from endaq.ide import to_pandas + # Constant Channel definitions UTYPE_GROUPS = { @@ -38,7 +40,12 @@ def units(self): @property def axis_names(self): - return [self.channel.subchannels[i].axisName for i in self.sch_ids] + return [self.channel.subchannels[i].name for i in self.sch_ids] + + def to_pandas(self, *args, **kwargs): + df = to_pandas(self.channel, *args, **kwargs).iloc[:, self.sch_ids] + df.columns.name = "axis" + return df def chs_by_utype(dataset): diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..746d34f --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,2 @@ +git+https://github.com/MideTechnology/endaq-python-ide.git@development +git+https://github.com/MideTechnology/endaq-python-calc.git@development diff --git a/setup.py b/setup.py index deec2c6..208876e 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,8 @@ "numpy", "scipy", "pandas", + "endaq-ide", + "endaq-calc", "idelib>=3.1.0", "backports.cached-property; python_version<'3.8'", ] diff --git a/tests/batch/test_analyzer.py b/tests/batch/test_analyzer.py index 466426d..82b9dcc 100644 --- a/tests/batch/test_analyzer.py +++ b/tests/batch/test_analyzer.py @@ -3,10 +3,11 @@ import idelib import numpy as np +import pandas as pd import pytest import endaq.batch.analyzer -from endaq.batch.utils.calc.stats import rms, L2_norm +from endaq.calc.stats import rms, L2_norm np.random.seed(0) @@ -47,16 +48,44 @@ def analyzer_bulk(analyzer_raw): } analyzer_mock._accelerationFs = 3000 - analyzer_mock._accelerationData = np.random.random((3, 21)) - analyzer_mock._accelerationResultant = L2_norm( - analyzer_mock._accelerationData, axis=0 + analyzer_mock._accelerationData = pd.DataFrame( + np.random.random((21, 3)), + index=pd.Series(np.arange(21) / 3000, name="time"), + columns=pd.Series(["X", "Y", "Z"], name="axis"), + ) + analyzer_mock._accelerationResultantData = analyzer_mock._accelerationData.apply( + L2_norm, axis="columns" + ).to_frame() + analyzer_mock._microphoneData = pd.DataFrame( + np.random.random(21), + index=pd.Series(np.arange(21) / 3000, name="time"), + columns=pd.Series(["Mic"], name="axis"), + ) + analyzer_mock._velocityData = pd.DataFrame( + np.random.random((21, 3)), + index=pd.Series(np.arange(21) / 3000, name="time"), + columns=pd.Series(["X", "Y", "Z"], name="axis"), + ) + analyzer_mock._displacementData = pd.DataFrame( + np.random.random((21, 3)), + index=pd.Series(np.arange(21) / 3000, name="time"), + columns=pd.Series(["X", "Y", "Z"], name="axis"), + ) + analyzer_mock._pressureData = pd.DataFrame( + np.random.random(5), + index=pd.Series(np.arange(5) / 5, name="time"), + columns=pd.Series(["Control"], name="axis"), + ) + analyzer_mock._temperatureData = pd.DataFrame( + np.random.random(5), + index=pd.Series(np.arange(5) / 5, name="time"), + columns=pd.Series(["Control"], name="axis"), + ) + analyzer_mock._gyroscopeData = pd.DataFrame( + np.random.random(11), + index=pd.Series(np.arange(11) / 5, name="time"), + columns=pd.Series(["Gyro"], name="axis"), ) - analyzer_mock._microphoneData = np.random.random((1, 21)) - analyzer_mock._velocityData = np.random.random((3, 21)) - analyzer_mock._displacementData = np.random.random((3, 21)) - analyzer_mock._pressureData = np.random.random(5) - analyzer_mock._temperatureData = np.random.random(5) - analyzer_mock._gyroscopeData = np.random.random(11) return analyzer_mock @@ -68,37 +97,43 @@ def analyzer_bulk(analyzer_raw): class TestAnalyzer: def test_accRMSFull(self, analyzer_bulk): - assert endaq.batch.analyzer.Analyzer.accRMSFull.func(analyzer_bulk)[ + calc_result = endaq.batch.analyzer.Analyzer.accRMSFull.func(analyzer_bulk)[ "Resultant" - ] == pytest.approx( - endaq.batch.analyzer.Analyzer.MPS2_TO_G - * rms(L2_norm(analyzer_bulk._accelerationData, axis=0)) + ] + expt_result = endaq.batch.analyzer.Analyzer.MPS2_TO_G * rms( + analyzer_bulk._accelerationData.apply(L2_norm, axis="columns") ) + assert calc_result == pytest.approx(expt_result) + def test_velRMSFull(self, analyzer_bulk): - assert endaq.batch.analyzer.Analyzer.velRMSFull.func(analyzer_bulk)[ + calc_result = endaq.batch.analyzer.Analyzer.velRMSFull.func(analyzer_bulk)[ "Resultant" - ] == pytest.approx( - endaq.batch.analyzer.Analyzer.MPS_TO_MMPS - * rms(L2_norm(analyzer_bulk._velocityData, axis=0)) + ] + expt_result = endaq.batch.analyzer.Analyzer.MPS_TO_MMPS * rms( + analyzer_bulk._velocityData.apply(L2_norm, axis="columns") ) + assert calc_result == pytest.approx(expt_result) def test_disRMSFull(self, analyzer_bulk): - assert endaq.batch.analyzer.Analyzer.disRMSFull.func(analyzer_bulk)[ + calc_result = endaq.batch.analyzer.Analyzer.disRMSFull.func(analyzer_bulk)[ "Resultant" - ] == pytest.approx( - endaq.batch.analyzer.Analyzer.M_TO_MM - * rms(L2_norm(analyzer_bulk._displacementData, axis=0)) + ] + expt_result = endaq.batch.analyzer.Analyzer.M_TO_MM * rms( + analyzer_bulk._displacementData.apply(L2_norm, axis="columns") ) + assert calc_result == pytest.approx(expt_result) def test_accPeakFull(self, analyzer_bulk): - assert endaq.batch.analyzer.Analyzer.accPeakFull.func(analyzer_bulk)[ + calc_result = endaq.batch.analyzer.Analyzer.accPeakFull.func(analyzer_bulk)[ "Resultant" - ] == pytest.approx( - endaq.batch.analyzer.Analyzer.MPS2_TO_G - * L2_norm(analyzer_bulk._accelerationData, axis=0).max() + ] + expt_result = endaq.batch.analyzer.Analyzer.MPS2_TO_G * rms( + analyzer_bulk._accelerationData.apply(L2_norm, axis="columns").max() ) + assert calc_result == pytest.approx(expt_result) + def test_pseudoVelPeakFull(self, analyzer_bulk): pass @@ -112,19 +147,28 @@ def test_gyroRMSFull(self, analyzer_bulk): pass def test_micRMSFull(self, analyzer_bulk): - assert endaq.batch.analyzer.Analyzer.micRMSFull.func(analyzer_bulk)[ + calc_result = endaq.batch.analyzer.Analyzer.micRMSFull.func(analyzer_bulk)[ "Mic" - ] == pytest.approx(rms(analyzer_bulk._microphoneData)) + ] + expt_result = rms(analyzer_bulk._microphoneData) + + assert calc_result == pytest.approx(expt_result) def test_pressFull(self, analyzer_bulk): - assert endaq.batch.analyzer.Analyzer.pressFull.func(analyzer_bulk)[ + calc_result = endaq.batch.analyzer.Analyzer.pressFull.func(analyzer_bulk)[ "Control" - ] == pytest.approx(analyzer_bulk._pressureData.mean()) + ] + expt_result = analyzer_bulk._pressureData.mean() + + assert calc_result == pytest.approx(expt_result) def test_tempFull(self, analyzer_bulk): - assert endaq.batch.analyzer.Analyzer.tempFull.func(analyzer_bulk)[ + calc_result = endaq.batch.analyzer.Analyzer.tempFull.func(analyzer_bulk)[ "Control" - ] == pytest.approx(analyzer_bulk._temperatureData.mean()) + ] + expt_result = analyzer_bulk._temperatureData.mean() + + assert calc_result == pytest.approx(expt_result) ########################################################################### # Live File Tests diff --git a/tests/batch/test_calc.py b/tests/batch/test_calc.py index e9bdeb1..666b659 100644 --- a/tests/batch/test_calc.py +++ b/tests/batch/test_calc.py @@ -50,14 +50,13 @@ def test_make_peak_windows(filename): with idelib.importFile(filename) as ds: accel_ch = ide_utils.get_ch_type_best(ds, "acc") - data = accel_ch.eventarray.arraySlice() - t, data = data[0], data[1:] - utc_start_time = ds.sessions[0].utcStartTime + data = accel_ch.to_pandas(time_mode="timedelta") + utc_start_time = ds.lastUtcTime axis_names = accel_ch.axis_names analyzer = endaq.batch.analyzer.Analyzer( ds, - accel_highpass_cutoff=1, + accel_highpass_cutoff=None, accel_start_time=None, accel_end_time=None, accel_start_margin=None, @@ -70,7 +69,7 @@ def test_make_peak_windows(filename): ) calc_meta = endaq.batch.calc._make_meta(ds) calc_peaks = endaq.batch.calc._make_peak_windows(analyzer, margin_len=10) - i_max = np.argmax(np.abs(analyzer._accelerationData), axis=1) + i_max = np.argmax(np.abs(analyzer._accelerationData.to_numpy()), axis=0) assert calc_peaks.index.names == ["axis", "peak time", "peak offset"] assert np.all( @@ -78,16 +77,14 @@ def test_make_peak_windows(filename): == ["Resultant"] + axis_names ) - calc_peak_times = calc_meta.loc["start time"] + ( + calc_peak_times = ( calc_peaks.index.droplevel("peak offset") .unique() .to_frame() .droplevel("peak time") .loc[axis_names, "peak time"] ) - expt_peak_times = np.datetime64(utc_start_time, "s") + t[i_max].astype( - "timedelta64[us]" - ) + expt_peak_times = data.index[i_max].astype("timedelta64[ns]") assert np.all(calc_peak_times == expt_peak_times) @@ -155,7 +152,7 @@ def assert_output_is_valid(output: endaq.batch.calc.OutputStruct): == [ "filename", "axis", - "frequency", + "frequency (Hz)", "value", "serial number", "start time", @@ -168,7 +165,7 @@ def assert_output_is_valid(output: endaq.batch.calc.OutputStruct): == [ "filename", "axis", - "frequency", + "frequency (Hz)", "value", "serial number", "start time", @@ -208,7 +205,7 @@ def assert_output_is_valid(output: endaq.batch.calc.OutputStruct): == [ "filename", "axis", - "frequency", + "frequency (Hz)", "value", "serial number", "start time", @@ -249,16 +246,24 @@ def assert_output_is_valid(output: endaq.batch.calc.OutputStruct): ), # Test time restrictions on acceleration data endaq.batch.calc.GetDataBuilder( - accel_highpass_cutoff=1, accel_start_time=5, accel_end_time=10 + accel_highpass_cutoff=1, + accel_start_time=np.timedelta64(5, "s"), + accel_end_time=np.timedelta64(10, "s"), ).add_psd(freq_bin_width=1), endaq.batch.calc.GetDataBuilder( - accel_highpass_cutoff=1, accel_start_margin=2, accel_end_margin=2 + accel_highpass_cutoff=1, + accel_start_margin=np.timedelta64(2, "s"), + accel_end_margin=np.timedelta64(2, "s"), ).add_psd(freq_bin_width=1), endaq.batch.calc.GetDataBuilder( - accel_highpass_cutoff=1, accel_start_time=5, accel_end_margin=2 + accel_highpass_cutoff=1, + accel_start_time=np.timedelta64(5, "s"), + accel_end_margin=np.timedelta64(2, "s"), ).add_psd(freq_bin_width=1), endaq.batch.calc.GetDataBuilder( - accel_highpass_cutoff=1, accel_start_margin=2, accel_end_time=10 + accel_highpass_cutoff=1, + accel_start_margin=np.timedelta64(2, "s"), + accel_end_time=np.timedelta64(10, "s"), ).add_psd(freq_bin_width=1), # Octave-spaced PSD parameters endaq.batch.calc.GetDataBuilder(accel_highpass_cutoff=1).add_psd( @@ -291,7 +296,22 @@ def test_aggregate_data(getdata_builder): def output_struct(): data = {} - RowStruct = namedtuple("RowStruct", ["filename", "serial_number", "start_time"]) + fieldname_mods = dict( + frequency="frequency (Hz)", + serial_number="serial number", + start_time="start time", + peak_time="peak time", + peak_offset="peak offset", + ) + + RowStruct = namedtuple( + "RowStruct", + [ + "filename", + "serial_number", + "start_time", + ], + ) data["meta"] = pd.DataFrame.from_records( [ RowStruct( @@ -306,7 +326,7 @@ def output_struct(): ), ], index="filename", - columns=[i.replace("_", " ") for i in RowStruct._fields], + columns=[fieldname_mods.get(i, i) for i in RowStruct._fields], ) RowStruct = namedtuple( @@ -355,7 +375,7 @@ def output_struct(): start_time=np.datetime64("2020-02-02 00:00:00"), ), ], - columns=[i.replace("_", " ") for i in RowStruct._fields], + columns=[fieldname_mods.get(i, i) for i in RowStruct._fields], ) RowStruct = namedtuple( @@ -404,7 +424,7 @@ def output_struct(): start_time=np.datetime64("2020-02-02 00:00:00"), ), ], - columns=[i.replace("_", " ") for i in RowStruct._fields], + columns=[fieldname_mods.get(i, i) for i in RowStruct._fields], ) RowStruct = namedtuple( @@ -437,7 +457,7 @@ def output_struct(): start_time=np.datetime64("2020-02-02 00:00:00"), ), ], - columns=[i.replace("_", " ") for i in RowStruct._fields], + columns=[fieldname_mods.get(i, i) for i in RowStruct._fields], ) RowStruct = namedtuple( @@ -509,7 +529,7 @@ def output_struct(): start_time=np.datetime64("2020-02-02 00:00:00"), ), ], - columns=[i.replace("_", " ") for i in RowStruct._fields], + columns=[fieldname_mods.get(i, i) for i in RowStruct._fields], ) RowStruct = namedtuple( @@ -558,7 +578,7 @@ def output_struct(): start_time=np.datetime64("2020-02-02 00:00:00"), ), ], - columns=[i.replace("_", " ") for i in RowStruct._fields], + columns=[fieldname_mods.get(i, i) for i in RowStruct._fields], ) result = endaq.batch.calc.OutputStruct(data)