diff --git a/lifetime_value/__pycache__/__init__.cpython-310.pyc b/lifetime_value/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..192f2d3 Binary files /dev/null and b/lifetime_value/__pycache__/__init__.cpython-310.pyc differ diff --git a/lifetime_value/__pycache__/metrics.cpython-310.pyc b/lifetime_value/__pycache__/metrics.cpython-310.pyc new file mode 100644 index 0000000..9d1638f Binary files /dev/null and b/lifetime_value/__pycache__/metrics.cpython-310.pyc differ diff --git a/lifetime_value/__pycache__/zero_inflated_lognormal.cpython-310.pyc b/lifetime_value/__pycache__/zero_inflated_lognormal.cpython-310.pyc new file mode 100644 index 0000000..c798b48 Binary files /dev/null and b/lifetime_value/__pycache__/zero_inflated_lognormal.cpython-310.pyc differ diff --git a/lifetime_value/metrics.py b/lifetime_value/metrics.py index 5bb7c7b..946561d 100644 --- a/lifetime_value/metrics.py +++ b/lifetime_value/metrics.py @@ -18,7 +18,7 @@ import numpy as np import pandas as pd -from sklearn import metrics +import sklearn.metrics as sk_metrics def cumulative_true( @@ -62,11 +62,11 @@ def gini_from_gain(df: pd.DataFrame) -> pd.DataFrame: def _normalized_rmse(y_true, y_pred): - return np.sqrt(metrics.mean_squared_error(y_true, y_pred)) / y_true.mean() + return np.sqrt(sk_metrics.mean_squared_error(y_true, y_pred)) / y_true.mean() def _normalized_mae(y_true, y_pred): - return metrics.mean_absolute_error(y_true, y_pred) / y_true.mean() + return sk_metrics.mean_absolute_error(y_true, y_pred) / y_true.mean() def _aggregate_fn(df): diff --git a/lifetime_value/metrics_test.py b/lifetime_value/metrics_test.py index 48a8b99..4835459 100644 --- a/lifetime_value/metrics_test.py +++ b/lifetime_value/metrics_test.py @@ -14,7 +14,13 @@ # ============================================================================ # Lint as: python3 # Dependency imports - +import sys +import os +import io +current_path = os.getcwd() +sys.path.append(os.getcwd()) +sys.path.append(os.path.join(current_path, "lifetime_value")) +print(sys.path) from lifetime_value import metrics import numpy as np import pandas as pd @@ -61,6 +67,30 @@ def test_decile_stats(self): rtol=1e-2, atol=1000)) + def test_gini_negative(self): + test_df2 = """a,0.1,0.115 +b,0.1,0.112 +c,0.1,0.1151 +d,0.9,0.01""" + df = pd.read_csv(io.StringIO(test_df2), header=None) + column_names = ['uid', 'label1', 'pred_scores'] + df.columns = column_names + df = df.sort_values(by='label1', ascending=False) + print(f"test_df: {df.head(10)}") + + + total_value = np.sum(df['label1']) + cumulative_true = np.cumsum(df['label1']) / total_value + gain_model = metrics.cumulative_true( + df['label1'], df['pred_scores']) + gain = pd.DataFrame({ + 'ground_truth': cumulative_true, + 'random_model': gain_model + }) + gini = metrics.gini_from_gain(gain) + print(f"test_gini: {gini.head(10)}") + assert gini.loc['random_model', 'raw'] < 0 + if __name__ == '__main__': unittest.main()