microsoft · ashvina · Jun 28, 2023 · Jun 28, 2023 · Jun 28, 2023 · Jun 28, 2023
diff --git a/.gitignore b/.gitignore
@@ -68,3 +68,7 @@ local.properties
 /example/*/build
 /buildSrc/build
 /buildSrc/subprojects/*/build
+
+# Ignore python build files, e.g. egg-info, pycache, etc. 
+*.egg-info/
+__pycache__/
diff --git a/metrics/setup.py b/metrics/setup.py
@@ -0,0 +1,14 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='lst-metrics',
+    description='LST-Bench Metrics and analysis',
+    url='https://github.com/microsoft/lst-bench',
+    license='Apache License, 2.0',
+    version='0.1',
+    packages=find_packages(),
+    install_requires=[
+        'pandas>=1.5.3',
+    ],
+)
+
diff --git a/metrics/stability/__init__.py b/metrics/stability/__init__.py
@@ -0,0 +1,9 @@
+#stability/__init__.py
+"""module for stability analysis"""
+
+# Import public functions or classes
+from stability.degradation_analysis import DegradationEvaluatorBase
+from stability.degradation_analysis import DegradationAnalysisHelper
+
+# Define __all__ to control what gets imported with wildcard import *
+__all__ = ['DegradationEvaluatorBase', 'DegradationAnalysisHelper']
diff --git a/metrics/stability/degradation_analysis.py b/metrics/stability/degradation_analysis.py
@@ -0,0 +1,83 @@
+"""
+Copyright (c) Microsoft Corporation.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from typing import List
+import pandas as pd
+
+class DegradationEvaluatorBase:
+    """Provides a basic implementation for evaluating degradation of a metric in a long running
+    experiment on an LST (table format). The evaluator takes as in put an ordered list of metric's
+    values collected at various points in time and returns a single value that represents the
+    degradation of the metric over time.
+    This implementation evaluates and returns the average degradation rate, evaluated as
+     summation( (Mi - Mi-1) / Mi-1 ) ) / (n-1)
+    where,
+     Mi is the metric value at time i
+     n is the number of metric values provided,
+     and n-1 total differences are calculated
+    """
+
+    def evaluate(self, metric_values: List[float]) -> float:
+        """Evaluates the degradation rate for the input list of metric values."""
+
+        if len(metric_values) < 2:
+            raise ValueError(
+                "At least two metric values are needed to evaluate degradation"
+            )
+
+        # Calculate the degradation rate
+        degradation_rate = 0.0
+        for i in range(1, len(metric_values)):
+            degradation_rate += (
+                metric_values[i] - metric_values[i - 1]
+            ) / metric_values[i - 1]
+
+        return degradation_rate / (len(metric_values) - 1)
+
+
+class DegradationAnalysisHelper:
+    """This helper class provides utility methods to compute degradation of a metric for a set of
+    operators. It takes as input a DataFrame of metric values of operators of an experiment
+    (e.g. phases), desired metric for which the degradation is to be computed and an evaluator's
+    instance. The metric values DataFrame is expected to have the following columns:
+      operator_id: unique identifier for the operator (e.g. phase)
+      metric_name: name of the metric (e.g. latency)
+      and metric_value: value of the metric
+    """
+
+    def __init__(
+        self, metric_values: pd.DataFrame, evaluator: DegradationEvaluatorBase
+    ):
+        self.metric_values = metric_values
+        self.evaluator = evaluator
+
+    def evaluate(self, metric: str, operators: List[str]) -> float:
+        """Evaluates the degradation of the provided metric name for the input ordered-list of
+        operators ids. It filters the values from the intial DataFrame based on metric name
+        and operators, orders them baed on ordered list of operators and computes the
+        degradation using the initialized evaluator.
+        """
+        # Filter the metric values for the desired metric and operators
+        metric_values = self.metric_values[self.metric_values["metric_name"] == metric]
+        metric_values = metric_values[metric_values["operator_id"].isin(operators)]
+        metric_values["operator_index"] = metric_values["operator_id"].apply(
+            operators.index
+        )
+        metric_values = metric_values.sort_values(by="operator_index")
+        metric_values = metric_values["metric_value"].tolist()
+
+        # Evaluate the degradation
+        return self.evaluator.evaluate(metric_values)
diff --git a/metrics/tests/stability/degradation_analysis_test.py b/metrics/tests/stability/degradation_analysis_test.py
@@ -0,0 +1,76 @@
+"""
+Copyright (c) Microsoft Corporation.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import unittest
+import pandas as pd
+from stability import DegradationEvaluatorBase, DegradationAnalysisHelper
+
+class DegradationEvaluatorBaseTest(unittest.TestCase):
+    """Tests for the DegradationEvaluatorBase class"""
+
+    def setUp(self):
+        self.evaluator = DegradationEvaluatorBase()
+
+    def test_sdr_with_positive_degradation(self):
+        """Test that the evaluator returns positive degradation rate"""
+        values = [47, 75, 106, 131, 163, 157]
+        expected_dr = 0.29
+        degradation_rate = self.evaluator.evaluate(values)
+        self.assertAlmostEqual(degradation_rate, expected_dr, delta=0.001)
+
+    def test_sdr_with_insufficient_metrics(self):
+        """Test that the evaluator throws an exception when the list of values is insufficient"""
+        values = [47]
+        # assert evaluation of values throws an exception
+        with self.assertRaises(ValueError):
+            self.evaluator.evaluate(values)
+
+
+class DegradationAnalysisHelperTest(unittest.TestCase):
+    """Tests for the DegradationAnalysisHelper class"""
+
+    def test_helper_filters(self):
+        """Test that the helper returns correct degradation rate for a metric and phases"""
+
+        # test data
+        operator_ids = ["p1", "r1", "p2", "r2", "p3"]
+        metric_names = ["latency", "api"]
+        metric_values = [10, 1_000, 15, 1_500, 12, 10_000, 35, 35_000, 16, 100_000]
+
+        data = []
+        for operator_id in operator_ids:
+            for metric_name in metric_names:
+                metric_value = metric_values.pop(0)
+                data.append([operator_id, metric_name, metric_value])
+
+        dataframe = pd.DataFrame(data, columns=["operator_id", "metric_name", "metric_value"])
+        helper = DegradationAnalysisHelper(dataframe, DegradationEvaluatorBase())
+
+        d_r = helper.evaluate("latency", ["p1", "p2", "p3"])
+        self.assertAlmostEqual(d_r, 0.266, delta=0.001)
+
+        d_r = helper.evaluate("latency", ["p1", "r2"])
+        self.assertAlmostEqual(d_r, 2.5, delta=0.001)
+
+        d_r = helper.evaluate("api", ["p1", "p2", "p3"])
+        self.assertAlmostEqual(d_r, 9.0, delta=0.001)
+
+        d_r = helper.evaluate("api", ["r1", "r2"])
+        self.assertAlmostEqual(d_r, 22.333, delta=0.001)
+
+
+if __name__ == "__main__":
+    unittest.main()