From 8a2b4fb63f87457521e18a03b894737407878062 Mon Sep 17 00:00:00 2001
From: Ashvin Agrawal <ashvin@apache.org>
Date: Wed, 28 Jun 2023 08:57:21 -0700
Subject: [PATCH 1/5] Add degradation metric computation implementation

This change adds a baisc implementation for computing degradataion
metric as described in the paper. The implementation, along with the
helper class, can compute degradation from metric points provided in
input, where each metric point is annotated with the operator (e.g.
phase) it is associated with and the type of metric.

Additionally, tests have been created to ensure the functionality
is working as expected. The project structure has also been updated
to accommodate these new additions.
---
 metrics/setup.py                              | 14 ++++
 metrics/stability/__init__.py                 | 10 +++
 metrics/stability/degradation_analysis.py     | 69 ++++++++++++++++++
 .../stability/degradation_analysis_tests.py   | 71 +++++++++++++++++++
 4 files changed, 164 insertions(+)
 create mode 100644 metrics/setup.py
 create mode 100644 metrics/stability/__init__.py
 create mode 100644 metrics/stability/degradation_analysis.py
 create mode 100644 metrics/tests/stability/degradation_analysis_tests.py

diff --git a/metrics/setup.py b/metrics/setup.py
new file mode 100644
index 00000000..076160f8
--- /dev/null
+++ b/metrics/setup.py
@@ -0,0 +1,14 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='lst-metrics',
+    description='LST-Bench Metrics and analysis',
+    url='https://github.com/microsoft/lst-bench',
+    license='Apache License, 2.0',
+    version='0.1',
+    packages=find_packages(),
+    install_requires=[
+        'pandas>=1.5.3',
+    ],
+)
+
diff --git a/metrics/stability/__init__.py b/metrics/stability/__init__.py
new file mode 100644
index 00000000..d0040bba
--- /dev/null
+++ b/metrics/stability/__init__.py
@@ -0,0 +1,10 @@
+#stability/__init__.py
+# module for stability analysis
+
+# Import public functions or classes
+from stability.degradation_analysis import DegradationEvaluatorBase
+from stability.degradation_analysis import DegradationAnalysisHelper
+
+# Define __all__ to control what gets imported with wildcard import *
+__all__ = ['DegradationEvaluatorBase', 'DegradationAnalysisHelper']
+
diff --git a/metrics/stability/degradation_analysis.py b/metrics/stability/degradation_analysis.py
new file mode 100644
index 00000000..6536af05
--- /dev/null
+++ b/metrics/stability/degradation_analysis.py
@@ -0,0 +1,69 @@
+"""
+Copyright (c) Microsoft Corporation.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pandas as pd
+from typing import List
+
+# Provides a basic implementation for evaluating degradation of a metric in a long running experiment
+# on an LST (table format). The evaluator takes as in put an ordered list of metric's values collected 
+# at various points in time and returns a single value that represents the degradation of the metric 
+# over time.
+# This implementation evaluates and returns the average degradation rate, evaluated as
+# ( summation( (Mi - Mi-1) / Mi-1 ) ) / (n-1) 
+# where,
+#  Mi is the metric value at time i 
+#  n is the number of metric values provided, 
+#  and n-1 total differences are calculated
+class DegradationEvaluatorBase:
+    def evaluate(self, metricValues: List[float]) -> float:
+        if len(metricValues) < 2:
+            raise ValueError("At least two metric values are needed to evaluate degradation")
+        
+        # Calculate the degradation rate
+        degradationRate = 0.0
+        for i in range(1, len(metricValues)):
+            degradationRate += (metricValues[i] - metricValues[i-1]) / metricValues[i-1]
+        
+        return degradationRate / (len(metricValues) - 1)
+
+
+# This helper class provides utility methods to compute degradation of a metric for a set of
+# operators. It takes as input a DataFrame of metric values of operators of an experiment 
+# (e.g. phases), desired metric for which the degradation is to be computed and an evaluator's
+# instance. The metric values DataFrame is expected to have the following columns:
+#   operator_id: unique identifier for the operator (e.g. phase)
+#   metric_name: name of the metric (e.g. latency)
+#   and metric_value: value of the metric
+class DegradationAnalysisHelper:
+    def __init__(self, metric_values: pd.DataFrame, evaluator: DegradationEvaluatorBase):
+        self.metric_values = metric_values
+        self.evaluator = evaluator
+
+    # Evaluates the degradation of the provided metric name for the input ordered-list of 
+    # operators ids. It filters the values from the intial DataFrame based on metric name
+    # and operators, orders them baed on ordered list of operators and computes the 
+    # degradation using the initialized evaluator.
+    def evaluate(self, metric: str, operators: List[str]) -> float:
+        # Filter the metric values for the desired metric and operators
+        metric_values = self.metric_values[self.metric_values['metric_name'] == metric]
+        metric_values = metric_values[metric_values['operator_id'].isin(operators)]
+        metric_values['operator_index'] = metric_values['operator_id'].apply(lambda x: operators.index(x))
+        metric_values = metric_values.sort_values(by='operator_index')
+        metric_values = metric_values['metric_value'].tolist()
+        print(metric_values)
+
+        # Evaluate the degradation
+        return self.evaluator.evaluate(metric_values)
\ No newline at end of file
diff --git a/metrics/tests/stability/degradation_analysis_tests.py b/metrics/tests/stability/degradation_analysis_tests.py
new file mode 100644
index 00000000..5620f0e2
--- /dev/null
+++ b/metrics/tests/stability/degradation_analysis_tests.py
@@ -0,0 +1,71 @@
+"""
+Copyright (c) Microsoft Corporation.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import unittest
+import pandas as pd
+from stability import *
+
+class DegradationEvaluatorBaseTest(unittest.TestCase):
+    def setUp(self):
+        self.evaluator = DegradationEvaluatorBase()
+
+    # Test that the evaluator returns positive degradation rate
+    def test_sdr_with_positive_degradation(self):
+        values = [47, 75, 106, 131, 163, 157]
+        expected_DR = 0.29
+        degradation_rate = self.evaluator.evaluate(values)
+        self.assertAlmostEqual(degradation_rate, expected_DR, delta=0.001)
+
+    # Test that the evaluator throws an exception when the list of values is insufficient
+    def test_sdr_with_insufficient_metrics(self):
+        values = [47]
+        #assert evaluation of values throws an exception
+        with self.assertRaises(ValueError):
+            self.evaluator.evaluate(values)
+
+class DegradationAnalysisHelperTest(unittest.TestCase):
+    def test_xxx(self):
+        # test data
+        operator_ids = ['p1', 'r1', 'p2', 'r2', 'p3']
+        metric_names = ['latency', 'api']
+        metric_values = [10, 1_000, 15, 1_500, 
+                         12, 10_000, 35, 35_000, 
+                         16, 100_000]
+
+        data = []
+        for operator_id in operator_ids:
+            for metric_name in metric_names:
+                metric_value = metric_values.pop(0)
+                data.append([operator_id, metric_name, metric_value])
+
+        df = pd.DataFrame(data, columns=['operator_id', 'metric_name', 'metric_value'])
+        helper = DegradationAnalysisHelper(df, DegradationEvaluatorBase())
+        
+        d_r = helper.evaluate('latency', ['p1', 'p2', 'p3'])
+        self.assertAlmostEqual(d_r, .266, delta=0.001)
+
+        d_r = helper.evaluate('latency', ['p1', 'r2'])
+        self.assertAlmostEqual(d_r, 2.5, delta=0.001)
+
+        d_r = helper.evaluate('api', ['p1', 'p2', 'p3'])
+        self.assertAlmostEqual(d_r, 9.0, delta=0.001)
+
+        d_r = helper.evaluate('api', ['r1', 'r2'])
+        self.assertAlmostEqual(d_r, 22.333, delta=0.001)
+
+if __name__ == '__main__':
+    unittest.main()
+    
\ No newline at end of file

From 96cd742350da03de0879e04ec73014f7fdce8260 Mon Sep 17 00:00:00 2001
From: Ashvin Agrawal <ashvin@apache.org>
Date: Wed, 28 Jun 2023 09:37:38 -0700
Subject: [PATCH 2/5] Remove noisy print message

---
 metrics/stability/degradation_analysis.py             | 1 -
 metrics/tests/stability/degradation_analysis_tests.py | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/metrics/stability/degradation_analysis.py b/metrics/stability/degradation_analysis.py
index 6536af05..97a3b19f 100644
--- a/metrics/stability/degradation_analysis.py
+++ b/metrics/stability/degradation_analysis.py
@@ -63,7 +63,6 @@ def evaluate(self, metric: str, operators: List[str]) -> float:
         metric_values['operator_index'] = metric_values['operator_id'].apply(lambda x: operators.index(x))
         metric_values = metric_values.sort_values(by='operator_index')
         metric_values = metric_values['metric_value'].tolist()
-        print(metric_values)
 
         # Evaluate the degradation
         return self.evaluator.evaluate(metric_values)
\ No newline at end of file
diff --git a/metrics/tests/stability/degradation_analysis_tests.py b/metrics/tests/stability/degradation_analysis_tests.py
index 5620f0e2..495fc09a 100644
--- a/metrics/tests/stability/degradation_analysis_tests.py
+++ b/metrics/tests/stability/degradation_analysis_tests.py
@@ -41,8 +41,8 @@ def test_xxx(self):
         # test data
         operator_ids = ['p1', 'r1', 'p2', 'r2', 'p3']
         metric_names = ['latency', 'api']
-        metric_values = [10, 1_000, 15, 1_500, 
-                         12, 10_000, 35, 35_000, 
+        metric_values = [10, 1_000, 15, 1_500,
+                         12, 10_000, 35, 35_000,
                          16, 100_000]
 
         data = []

From 693a81e63d86f92dbf02d8f8c37096a86dc0e730 Mon Sep 17 00:00:00 2001
From: Ashvin Agrawal <ashvin@apache.org>
Date: Wed, 28 Jun 2023 14:12:20 -0700
Subject: [PATCH 3/5] Ignore python build files

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index bee554f5..3fed747b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -68,3 +68,7 @@ local.properties
 /example/*/build
 /buildSrc/build
 /buildSrc/subprojects/*/build
+
+# Ignore python build files, e.g. egg-info, pycache, etc. 
+*.egg-info/
+__pycache__/

From bca604e450bc5008084495a930526ef2837786ca Mon Sep 17 00:00:00 2001
From: Ashvin Agrawal <ashvin@apache.org>
Date: Wed, 28 Jun 2023 14:30:56 -0700
Subject: [PATCH 4/5] Apply pylint code style

---
 metrics/stability/degradation_analysis.py     | 91 +++++++++++--------
 .../stability/degradation_analysis_tests.py   | 49 +++++-----
 2 files changed, 80 insertions(+), 60 deletions(-)

diff --git a/metrics/stability/degradation_analysis.py b/metrics/stability/degradation_analysis.py
index 97a3b19f..cde5d7b0 100644
--- a/metrics/stability/degradation_analysis.py
+++ b/metrics/stability/degradation_analysis.py
@@ -14,55 +14,70 @@
 limitations under the License.
 """
 
-import pandas as pd
 from typing import List
+import pandas as pd
 
-# Provides a basic implementation for evaluating degradation of a metric in a long running experiment
-# on an LST (table format). The evaluator takes as in put an ordered list of metric's values collected 
-# at various points in time and returns a single value that represents the degradation of the metric 
-# over time.
-# This implementation evaluates and returns the average degradation rate, evaluated as
-# ( summation( (Mi - Mi-1) / Mi-1 ) ) / (n-1) 
-# where,
-#  Mi is the metric value at time i 
-#  n is the number of metric values provided, 
-#  and n-1 total differences are calculated
 class DegradationEvaluatorBase:
-    def evaluate(self, metricValues: List[float]) -> float:
-        if len(metricValues) < 2:
-            raise ValueError("At least two metric values are needed to evaluate degradation")
-        
+    """Provides a basic implementation for evaluating degradation of a metric in a long running
+    experiment on an LST (table format). The evaluator takes as in put an ordered list of metric's
+    values collected at various points in time and returns a single value that represents the
+    degradation of the metric over time.
+    This implementation evaluates and returns the average degradation rate, evaluated as
+     summation( (Mi - Mi-1) / Mi-1 ) ) / (n-1)
+    where,
+     Mi is the metric value at time i
+     n is the number of metric values provided,
+     and n-1 total differences are calculated
+    """
+
+    def evaluate(self, metric_values: List[float]) -> float:
+        """Evaluates the degradation rate for the input list of metric values."""
+
+        if len(metric_values) < 2:
+            raise ValueError(
+                "At least two metric values are needed to evaluate degradation"
+            )
+
         # Calculate the degradation rate
-        degradationRate = 0.0
-        for i in range(1, len(metricValues)):
-            degradationRate += (metricValues[i] - metricValues[i-1]) / metricValues[i-1]
-        
-        return degradationRate / (len(metricValues) - 1)
+        degradation_rate = 0.0
+        for i in range(1, len(metric_values)):
+            degradation_rate += (
+                metric_values[i] - metric_values[i - 1]
+            ) / metric_values[i - 1]
+
+        return degradation_rate / (len(metric_values) - 1)
 
 
-# This helper class provides utility methods to compute degradation of a metric for a set of
-# operators. It takes as input a DataFrame of metric values of operators of an experiment 
-# (e.g. phases), desired metric for which the degradation is to be computed and an evaluator's
-# instance. The metric values DataFrame is expected to have the following columns:
-#   operator_id: unique identifier for the operator (e.g. phase)
-#   metric_name: name of the metric (e.g. latency)
-#   and metric_value: value of the metric
 class DegradationAnalysisHelper:
-    def __init__(self, metric_values: pd.DataFrame, evaluator: DegradationEvaluatorBase):
+    """This helper class provides utility methods to compute degradation of a metric for a set of
+    operators. It takes as input a DataFrame of metric values of operators of an experiment
+    (e.g. phases), desired metric for which the degradation is to be computed and an evaluator's
+    instance. The metric values DataFrame is expected to have the following columns:
+      operator_id: unique identifier for the operator (e.g. phase)
+      metric_name: name of the metric (e.g. latency)
+      and metric_value: value of the metric
+    """
+
+    def __init__(
+        self, metric_values: pd.DataFrame, evaluator: DegradationEvaluatorBase
+    ):
         self.metric_values = metric_values
         self.evaluator = evaluator
 
-    # Evaluates the degradation of the provided metric name for the input ordered-list of 
-    # operators ids. It filters the values from the intial DataFrame based on metric name
-    # and operators, orders them baed on ordered list of operators and computes the 
-    # degradation using the initialized evaluator.
     def evaluate(self, metric: str, operators: List[str]) -> float:
+        """Evaluates the degradation of the provided metric name for the input ordered-list of
+        operators ids. It filters the values from the intial DataFrame based on metric name
+        and operators, orders them baed on ordered list of operators and computes the
+        degradation using the initialized evaluator.
+        """
         # Filter the metric values for the desired metric and operators
-        metric_values = self.metric_values[self.metric_values['metric_name'] == metric]
-        metric_values = metric_values[metric_values['operator_id'].isin(operators)]
-        metric_values['operator_index'] = metric_values['operator_id'].apply(lambda x: operators.index(x))
-        metric_values = metric_values.sort_values(by='operator_index')
-        metric_values = metric_values['metric_value'].tolist()
+        metric_values = self.metric_values[self.metric_values["metric_name"] == metric]
+        metric_values = metric_values[metric_values["operator_id"].isin(operators)]
+        metric_values["operator_index"] = metric_values["operator_id"].apply(
+            operators.index
+        )
+        metric_values = metric_values.sort_values(by="operator_index")
+        metric_values = metric_values["metric_value"].tolist()
 
         # Evaluate the degradation
-        return self.evaluator.evaluate(metric_values)
\ No newline at end of file
+        return self.evaluator.evaluate(metric_values)
diff --git a/metrics/tests/stability/degradation_analysis_tests.py b/metrics/tests/stability/degradation_analysis_tests.py
index 495fc09a..c2391b88 100644
--- a/metrics/tests/stability/degradation_analysis_tests.py
+++ b/metrics/tests/stability/degradation_analysis_tests.py
@@ -16,34 +16,39 @@
 
 import unittest
 import pandas as pd
-from stability import *
+from stability import DegradationEvaluatorBase, DegradationAnalysisHelper
 
 class DegradationEvaluatorBaseTest(unittest.TestCase):
+    """Tests for the DegradationEvaluatorBase class"""
+
     def setUp(self):
         self.evaluator = DegradationEvaluatorBase()
 
-    # Test that the evaluator returns positive degradation rate
     def test_sdr_with_positive_degradation(self):
+        """Test that the evaluator returns positive degradation rate"""
         values = [47, 75, 106, 131, 163, 157]
-        expected_DR = 0.29
+        expected_dr = 0.29
         degradation_rate = self.evaluator.evaluate(values)
-        self.assertAlmostEqual(degradation_rate, expected_DR, delta=0.001)
+        self.assertAlmostEqual(degradation_rate, expected_dr, delta=0.001)
 
-    # Test that the evaluator throws an exception when the list of values is insufficient
     def test_sdr_with_insufficient_metrics(self):
+        """Test that the evaluator throws an exception when the list of values is insufficient"""
         values = [47]
-        #assert evaluation of values throws an exception
+        # assert evaluation of values throws an exception
         with self.assertRaises(ValueError):
             self.evaluator.evaluate(values)
 
+
 class DegradationAnalysisHelperTest(unittest.TestCase):
-    def test_xxx(self):
+    """Tests for the DegradationAnalysisHelper class"""
+
+    def test_helper_filters(self):
+        """Test that the helper returns correct degradation rate for a metric and phases"""
+
         # test data
-        operator_ids = ['p1', 'r1', 'p2', 'r2', 'p3']
-        metric_names = ['latency', 'api']
-        metric_values = [10, 1_000, 15, 1_500,
-                         12, 10_000, 35, 35_000,
-                         16, 100_000]
+        operator_ids = ["p1", "r1", "p2", "r2", "p3"]
+        metric_names = ["latency", "api"]
+        metric_values = [10, 1_000, 15, 1_500, 12, 10_000, 35, 35_000, 16, 100_000]
 
         data = []
         for operator_id in operator_ids:
@@ -51,21 +56,21 @@ def test_xxx(self):
                 metric_value = metric_values.pop(0)
                 data.append([operator_id, metric_name, metric_value])
 
-        df = pd.DataFrame(data, columns=['operator_id', 'metric_name', 'metric_value'])
-        helper = DegradationAnalysisHelper(df, DegradationEvaluatorBase())
-        
-        d_r = helper.evaluate('latency', ['p1', 'p2', 'p3'])
-        self.assertAlmostEqual(d_r, .266, delta=0.001)
+        dataframe = pd.DataFrame(data, columns=["operator_id", "metric_name", "metric_value"])
+        helper = DegradationAnalysisHelper(dataframe, DegradationEvaluatorBase())
+
+        d_r = helper.evaluate("latency", ["p1", "p2", "p3"])
+        self.assertAlmostEqual(d_r, 0.266, delta=0.001)
 
-        d_r = helper.evaluate('latency', ['p1', 'r2'])
+        d_r = helper.evaluate("latency", ["p1", "r2"])
         self.assertAlmostEqual(d_r, 2.5, delta=0.001)
 
-        d_r = helper.evaluate('api', ['p1', 'p2', 'p3'])
+        d_r = helper.evaluate("api", ["p1", "p2", "p3"])
         self.assertAlmostEqual(d_r, 9.0, delta=0.001)
 
-        d_r = helper.evaluate('api', ['r1', 'r2'])
+        d_r = helper.evaluate("api", ["r1", "r2"])
         self.assertAlmostEqual(d_r, 22.333, delta=0.001)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     unittest.main()
-    
\ No newline at end of file

From 5664571dca2cdcd89ec979d61d3ddd7edb30860b Mon Sep 17 00:00:00 2001
From: Ashvin Agrawal <ashvin@apache.org>
Date: Wed, 28 Jun 2023 20:05:06 -0700
Subject: [PATCH 5/5] Refactor file name to match test naming convention

---
 metrics/stability/__init__.py                                  | 3 +--
 ...radation_analysis_tests.py => degradation_analysis_test.py} | 0
 2 files changed, 1 insertion(+), 2 deletions(-)
 rename metrics/tests/stability/{degradation_analysis_tests.py => degradation_analysis_test.py} (100%)

diff --git a/metrics/stability/__init__.py b/metrics/stability/__init__.py
index d0040bba..b4186ba1 100644
--- a/metrics/stability/__init__.py
+++ b/metrics/stability/__init__.py
@@ -1,5 +1,5 @@
 #stability/__init__.py
-# module for stability analysis
+"""module for stability analysis"""
 
 # Import public functions or classes
 from stability.degradation_analysis import DegradationEvaluatorBase
@@ -7,4 +7,3 @@
 
 # Define __all__ to control what gets imported with wildcard import *
 __all__ = ['DegradationEvaluatorBase', 'DegradationAnalysisHelper']
-
diff --git a/metrics/tests/stability/degradation_analysis_tests.py b/metrics/tests/stability/degradation_analysis_test.py
similarity index 100%
rename from metrics/tests/stability/degradation_analysis_tests.py
rename to metrics/tests/stability/degradation_analysis_test.py