diff --git a/.gitignore b/.gitignore index bee554f5..3fed747b 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,7 @@ local.properties /example/*/build /buildSrc/build /buildSrc/subprojects/*/build + +# Ignore python build files, e.g. egg-info, pycache, etc. +*.egg-info/ +__pycache__/ diff --git a/metrics/setup.py b/metrics/setup.py new file mode 100644 index 00000000..076160f8 --- /dev/null +++ b/metrics/setup.py @@ -0,0 +1,14 @@ +from setuptools import setup, find_packages + +setup( + name='lst-metrics', + description='LST-Bench Metrics and analysis', + url='https://github.com/microsoft/lst-bench', + license='Apache License, 2.0', + version='0.1', + packages=find_packages(), + install_requires=[ + 'pandas>=1.5.3', + ], +) + diff --git a/metrics/stability/__init__.py b/metrics/stability/__init__.py new file mode 100644 index 00000000..b4186ba1 --- /dev/null +++ b/metrics/stability/__init__.py @@ -0,0 +1,9 @@ +#stability/__init__.py +"""module for stability analysis""" + +# Import public functions or classes +from stability.degradation_analysis import DegradationEvaluatorBase +from stability.degradation_analysis import DegradationAnalysisHelper + +# Define __all__ to control what gets imported with wildcard import * +__all__ = ['DegradationEvaluatorBase', 'DegradationAnalysisHelper'] diff --git a/metrics/stability/degradation_analysis.py b/metrics/stability/degradation_analysis.py new file mode 100644 index 00000000..cde5d7b0 --- /dev/null +++ b/metrics/stability/degradation_analysis.py @@ -0,0 +1,83 @@ +""" +Copyright (c) Microsoft Corporation. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from typing import List +import pandas as pd + +class DegradationEvaluatorBase: + """Provides a basic implementation for evaluating degradation of a metric in a long running + experiment on an LST (table format). The evaluator takes as in put an ordered list of metric's + values collected at various points in time and returns a single value that represents the + degradation of the metric over time. + This implementation evaluates and returns the average degradation rate, evaluated as + summation( (Mi - Mi-1) / Mi-1 ) ) / (n-1) + where, + Mi is the metric value at time i + n is the number of metric values provided, + and n-1 total differences are calculated + """ + + def evaluate(self, metric_values: List[float]) -> float: + """Evaluates the degradation rate for the input list of metric values.""" + + if len(metric_values) < 2: + raise ValueError( + "At least two metric values are needed to evaluate degradation" + ) + + # Calculate the degradation rate + degradation_rate = 0.0 + for i in range(1, len(metric_values)): + degradation_rate += ( + metric_values[i] - metric_values[i - 1] + ) / metric_values[i - 1] + + return degradation_rate / (len(metric_values) - 1) + + +class DegradationAnalysisHelper: + """This helper class provides utility methods to compute degradation of a metric for a set of + operators. It takes as input a DataFrame of metric values of operators of an experiment + (e.g. phases), desired metric for which the degradation is to be computed and an evaluator's + instance. The metric values DataFrame is expected to have the following columns: + operator_id: unique identifier for the operator (e.g. phase) + metric_name: name of the metric (e.g. latency) + and metric_value: value of the metric + """ + + def __init__( + self, metric_values: pd.DataFrame, evaluator: DegradationEvaluatorBase + ): + self.metric_values = metric_values + self.evaluator = evaluator + + def evaluate(self, metric: str, operators: List[str]) -> float: + """Evaluates the degradation of the provided metric name for the input ordered-list of + operators ids. It filters the values from the intial DataFrame based on metric name + and operators, orders them baed on ordered list of operators and computes the + degradation using the initialized evaluator. + """ + # Filter the metric values for the desired metric and operators + metric_values = self.metric_values[self.metric_values["metric_name"] == metric] + metric_values = metric_values[metric_values["operator_id"].isin(operators)] + metric_values["operator_index"] = metric_values["operator_id"].apply( + operators.index + ) + metric_values = metric_values.sort_values(by="operator_index") + metric_values = metric_values["metric_value"].tolist() + + # Evaluate the degradation + return self.evaluator.evaluate(metric_values) diff --git a/metrics/tests/stability/degradation_analysis_test.py b/metrics/tests/stability/degradation_analysis_test.py new file mode 100644 index 00000000..c2391b88 --- /dev/null +++ b/metrics/tests/stability/degradation_analysis_test.py @@ -0,0 +1,76 @@ +""" +Copyright (c) Microsoft Corporation. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import unittest +import pandas as pd +from stability import DegradationEvaluatorBase, DegradationAnalysisHelper + +class DegradationEvaluatorBaseTest(unittest.TestCase): + """Tests for the DegradationEvaluatorBase class""" + + def setUp(self): + self.evaluator = DegradationEvaluatorBase() + + def test_sdr_with_positive_degradation(self): + """Test that the evaluator returns positive degradation rate""" + values = [47, 75, 106, 131, 163, 157] + expected_dr = 0.29 + degradation_rate = self.evaluator.evaluate(values) + self.assertAlmostEqual(degradation_rate, expected_dr, delta=0.001) + + def test_sdr_with_insufficient_metrics(self): + """Test that the evaluator throws an exception when the list of values is insufficient""" + values = [47] + # assert evaluation of values throws an exception + with self.assertRaises(ValueError): + self.evaluator.evaluate(values) + + +class DegradationAnalysisHelperTest(unittest.TestCase): + """Tests for the DegradationAnalysisHelper class""" + + def test_helper_filters(self): + """Test that the helper returns correct degradation rate for a metric and phases""" + + # test data + operator_ids = ["p1", "r1", "p2", "r2", "p3"] + metric_names = ["latency", "api"] + metric_values = [10, 1_000, 15, 1_500, 12, 10_000, 35, 35_000, 16, 100_000] + + data = [] + for operator_id in operator_ids: + for metric_name in metric_names: + metric_value = metric_values.pop(0) + data.append([operator_id, metric_name, metric_value]) + + dataframe = pd.DataFrame(data, columns=["operator_id", "metric_name", "metric_value"]) + helper = DegradationAnalysisHelper(dataframe, DegradationEvaluatorBase()) + + d_r = helper.evaluate("latency", ["p1", "p2", "p3"]) + self.assertAlmostEqual(d_r, 0.266, delta=0.001) + + d_r = helper.evaluate("latency", ["p1", "r2"]) + self.assertAlmostEqual(d_r, 2.5, delta=0.001) + + d_r = helper.evaluate("api", ["p1", "p2", "p3"]) + self.assertAlmostEqual(d_r, 9.0, delta=0.001) + + d_r = helper.evaluate("api", ["r1", "r2"]) + self.assertAlmostEqual(d_r, 22.333, delta=0.001) + + +if __name__ == "__main__": + unittest.main()