Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,7 @@ local.properties
/example/*/build
/buildSrc/build
/buildSrc/subprojects/*/build

# Ignore python build files, e.g. egg-info, pycache, etc.
*.egg-info/
__pycache__/
14 changes: 14 additions & 0 deletions metrics/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from setuptools import setup, find_packages

setup(
name='lst-metrics',
description='LST-Bench Metrics and analysis',
url='https://github.com/microsoft/lst-bench',
license='Apache License, 2.0',
version='0.1',
packages=find_packages(),
install_requires=[
'pandas>=1.5.3',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not that familiar with Python conventions, but the project contains a list of required packages that were added by @poojanilangekar :
https://github.com/microsoft/lst-bench/blob/main/metrics/notebooks/requirements.txt
Shouldn't this list match that one (and have a single source of truth rather than two as well)?

],
)

9 changes: 9 additions & 0 deletions metrics/stability/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#stability/__init__.py
"""module for stability analysis"""

# Import public functions or classes
from stability.degradation_analysis import DegradationEvaluatorBase
from stability.degradation_analysis import DegradationAnalysisHelper

# Define __all__ to control what gets imported with wildcard import *
__all__ = ['DegradationEvaluatorBase', 'DegradationAnalysisHelper']
83 changes: 83 additions & 0 deletions metrics/stability/degradation_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this file be under metrics/utils/ to match the existing folder structure of the Python project? It seems that all the other metrics (cluster, storage) where committed there. If we want to have a folder per metric type, maybe the others should be moved into their own folder (cc @anjagruenheid )? WDYT?

Copyright (c) Microsoft Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from typing import List
import pandas as pd

class DegradationEvaluatorBase:
"""Provides a basic implementation for evaluating degradation of a metric in a long running
experiment on an LST (table format). The evaluator takes as in put an ordered list of metric's
values collected at various points in time and returns a single value that represents the
degradation of the metric over time.
This implementation evaluates and returns the average degradation rate, evaluated as
summation( (Mi - Mi-1) / Mi-1 ) ) / (n-1)
where,
Mi is the metric value at time i
n is the number of metric values provided,
and n-1 total differences are calculated
"""

def evaluate(self, metric_values: List[float]) -> float:
"""Evaluates the degradation rate for the input list of metric values."""

if len(metric_values) < 2:
raise ValueError(
"At least two metric values are needed to evaluate degradation"
)

# Calculate the degradation rate
degradation_rate = 0.0
for i in range(1, len(metric_values)):
degradation_rate += (
metric_values[i] - metric_values[i - 1]
) / metric_values[i - 1]

return degradation_rate / (len(metric_values) - 1)


class DegradationAnalysisHelper:
"""This helper class provides utility methods to compute degradation of a metric for a set of
operators. It takes as input a DataFrame of metric values of operators of an experiment
(e.g. phases), desired metric for which the degradation is to be computed and an evaluator's
instance. The metric values DataFrame is expected to have the following columns:
operator_id: unique identifier for the operator (e.g. phase)
metric_name: name of the metric (e.g. latency)
and metric_value: value of the metric
"""

def __init__(
self, metric_values: pd.DataFrame, evaluator: DegradationEvaluatorBase
):
self.metric_values = metric_values
self.evaluator = evaluator

def evaluate(self, metric: str, operators: List[str]) -> float:
"""Evaluates the degradation of the provided metric name for the input ordered-list of
operators ids. It filters the values from the intial DataFrame based on metric name
and operators, orders them baed on ordered list of operators and computes the
degradation using the initialized evaluator.
"""
# Filter the metric values for the desired metric and operators
metric_values = self.metric_values[self.metric_values["metric_name"] == metric]
metric_values = metric_values[metric_values["operator_id"].isin(operators)]
metric_values["operator_index"] = metric_values["operator_id"].apply(
operators.index
)
metric_values = metric_values.sort_values(by="operator_index")
metric_values = metric_values["metric_value"].tolist()

# Evaluate the degradation
return self.evaluator.evaluate(metric_values)
76 changes: 76 additions & 0 deletions metrics/tests/stability/degradation_analysis_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""
Copyright (c) Microsoft Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import unittest
import pandas as pd
from stability import DegradationEvaluatorBase, DegradationAnalysisHelper

class DegradationEvaluatorBaseTest(unittest.TestCase):
"""Tests for the DegradationEvaluatorBase class"""

def setUp(self):
self.evaluator = DegradationEvaluatorBase()

def test_sdr_with_positive_degradation(self):
"""Test that the evaluator returns positive degradation rate"""
values = [47, 75, 106, 131, 163, 157]
expected_dr = 0.29
degradation_rate = self.evaluator.evaluate(values)
self.assertAlmostEqual(degradation_rate, expected_dr, delta=0.001)

def test_sdr_with_insufficient_metrics(self):
"""Test that the evaluator throws an exception when the list of values is insufficient"""
values = [47]
# assert evaluation of values throws an exception
with self.assertRaises(ValueError):
self.evaluator.evaluate(values)


class DegradationAnalysisHelperTest(unittest.TestCase):
"""Tests for the DegradationAnalysisHelper class"""

def test_helper_filters(self):
"""Test that the helper returns correct degradation rate for a metric and phases"""

# test data
operator_ids = ["p1", "r1", "p2", "r2", "p3"]
metric_names = ["latency", "api"]
metric_values = [10, 1_000, 15, 1_500, 12, 10_000, 35, 35_000, 16, 100_000]

data = []
for operator_id in operator_ids:
for metric_name in metric_names:
metric_value = metric_values.pop(0)
data.append([operator_id, metric_name, metric_value])

dataframe = pd.DataFrame(data, columns=["operator_id", "metric_name", "metric_value"])
helper = DegradationAnalysisHelper(dataframe, DegradationEvaluatorBase())

d_r = helper.evaluate("latency", ["p1", "p2", "p3"])
self.assertAlmostEqual(d_r, 0.266, delta=0.001)

d_r = helper.evaluate("latency", ["p1", "r2"])
self.assertAlmostEqual(d_r, 2.5, delta=0.001)

d_r = helper.evaluate("api", ["p1", "p2", "p3"])
self.assertAlmostEqual(d_r, 9.0, delta=0.001)

d_r = helper.evaluate("api", ["r1", "r2"])
self.assertAlmostEqual(d_r, 22.333, delta=0.001)


if __name__ == "__main__":
unittest.main()