From de0e43326d8e964f0df3dd3dd39f0c5594985102 Mon Sep 17 00:00:00 2001
From: artemkuzmenko2501-del <artemkuzmenko2501@gmail.com>
Date: Fri, 13 Mar 2026 18:20:31 +0300
Subject: [PATCH 1/4] Add custom metric functions in Tester and linearization
 in Preprocessor

- Tester now accepts metric_funcs dict mapping metric names to callables,
  enabling ratio and composite metrics without pre-computing columns
- LinearizationTransformer added for ratio metrics (e.g. revenue/orders):
  linearized_i = numerator_i - ratio * denominator_i, where ratio is
  estimated on reference data passed to fit()
- Preprocessor.linearize() integrates linearization into the existing
  chain pattern with full serialization/replay support
- 8 new tests covering metric_funcs constructor/run/override behaviour
  and linearize formula, chaining, serialization

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 CLAUDE.md                              |  88 ++++++++++++++++
 ambrosia/preprocessing/__init__.py     |   3 +-
 ambrosia/preprocessing/preprocessor.py |  46 ++++++++-
 ambrosia/preprocessing/transformers.py | 133 ++++++++++++++++++++++++-
 ambrosia/tester/handlers.py            |  19 +++-
 ambrosia/tester/tester.py              |  20 +++-
 tests/test_preprocessor.py             |  69 +++++++++++++
 tests/test_tester.py                   |  58 +++++++++++
 8 files changed, 427 insertions(+), 9 deletions(-)
 create mode 100644 CLAUDE.md

diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..c322566
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,88 @@
+# Ambrosia
+
+A/B testing framework for experiment design, group splitting, and results evaluation.
+Supports both pandas and Spark DataFrames.
+
+## Commands
+
+```bash
+make install      # create .venv via Poetry (poetry install --all-extras)
+make test         # run pytest with coverage
+make lint         # isort + black + pylint + flake8 (checks only)
+make autoformat   # isort + black (fix in place)
+make clean        # remove .venv, build artifacts, reports/
+```
+
+Single test: `PYTHONPATH=. pytest tests/path/test_file.py::test_fn`
+
+Line length: **120**.
+
+## Architecture
+
+### Three-stage pipeline
+
+`Designer` → `Splitter` → `Tester` are independent, stateless-ish classes.
+No shared state between stages; each takes a DataFrame and parameters.
+
+### Pandas/Spark dispatch
+
+Never subclass for pandas vs. Spark. Instead use `DataframeHandler` or the
+free function `choose_on_table(alternatives, dataframe)` in
+`ambrosia/tools/ab_abstract_component.py`:
+
+```python
+choose_on_table([pandas_func, spark_func], dataframe)
+```
+
+`DataframeHandler._handle_cases` / `_handle_on_table` wrap this pattern for
+method dispatch in handlers (e.g. `TheoryHandler`, `EmpiricHandler`).
+
+### ABMetaClass
+
+`ABMetaClass(ABCMeta, YAMLObjectMetaclass)` in `ab_abstract_component.py`
+resolves the metaclass conflict between `ABCMeta` and PyYAML's
+`YAMLObjectMetaclass`. Any class that inherits from `ABToolAbstract` **and**
+needs YAML serialization must set `metaclass=ABMetaClass`.
+
+### ABToolAbstract._prepare_arguments()
+
+Constructor args are "saved" defaults; `run()` args can override them at
+call time. `_prepare_arguments` resolves the priority:
+run-time arg → constructor arg → `ValueError` if both are None.
+
+```python
+chosen = _prepare_arguments({"alpha": [self._alpha, given_alpha]})
+```
+
+### Stat criteria strategy pattern
+
+Hierarchy: `StatCriterion` (abstract, just `calculate_pvalue`) →
+`ABStatCriterion` (adds `calculate_effect`, `calculate_conf_interval`,
+`get_results`).
+
+Concrete implementations in `ambrosia/tools/stat_criteria.py`:
+`TtestIndCriterion`, `TtestRelCriterion`, `MannWhitneyCriterion`,
+`WilcoxonCriterion`.
+
+`Tester` dispatches by string alias via `AVAILABLE_AB_CRITERIA` dict — duck
+typing, not isinstance checks. To add a criterion: subclass `ABStatCriterion`,
+set `alias` and `implemented_effect_types` class attributes, register in the
+dict.
+
+### Preprocessor chain
+
+`Preprocessor` (pandas only) uses method chaining — each method returns
+`self`. Each step appends a fitted `AbstractFittableTransformer` to
+`self.transformers`. The transformer list supports serialization
+(`store_transformations` / `load_transformations` → JSON) and replay
+(`apply_transformations`) for consistent train/test preprocessing.
+
+### Theoretical vs empirical design
+
+Two design philosophies plug into the same `SimpleDesigner` interface:
+
+- **Theoretical** (`TheoryHandler`): closed-form power/sample-size formulas
+- **Empirical** (`EmpiricHandler`): bootstrap/simulation-based estimates
+
+Both implement `size_design`, `effect_design`, `power_design` and dispatch
+pandas vs. Spark internally via `DataframeHandler`.
diff --git a/ambrosia/preprocessing/__init__.py b/ambrosia/preprocessing/__init__.py
index 82561ab..e6d33e8 100644
--- a/ambrosia/preprocessing/__init__.py
+++ b/ambrosia/preprocessing/__init__.py
@@ -21,7 +21,7 @@
 from .ml_var_reducer import MLVarianceReducer
 from .preprocessor import Preprocessor
 from .robust import IQRPreprocessor, RobustPreprocessor
-from .transformers import BoxCoxTransformer, LogTransformer
+from .transformers import BoxCoxTransformer, LinearizationTransformer, LogTransformer
 
 __all__ = [
     "AggregatePreprocessor",
@@ -32,5 +32,6 @@
     "RobustPreprocessor",
     "IQRPreprocessor",
     "BoxCoxTransformer",
+    "LinearizationTransformer",
     "LogTransformer",
 ]
diff --git a/ambrosia/preprocessing/preprocessor.py b/ambrosia/preprocessing/preprocessor.py
index c865008..6d7b205 100644
--- a/ambrosia/preprocessing/preprocessor.py
+++ b/ambrosia/preprocessing/preprocessor.py
@@ -34,7 +34,7 @@
 from ambrosia.preprocessing.aggregate import AggregatePreprocessor
 from ambrosia.preprocessing.cuped import Cuped, MultiCuped
 from ambrosia.preprocessing.robust import IQRPreprocessor, RobustPreprocessor
-from ambrosia.preprocessing.transformers import BoxCoxTransformer, LogTransformer
+from ambrosia.preprocessing.transformers import BoxCoxTransformer, LinearizationTransformer, LogTransformer
 
 
 class Preprocessor:
@@ -378,6 +378,50 @@ def multicuped(
         self.transformers.append(transformer)
         return self
 
+    def linearize(
+        self,
+        numerator: types.ColumnNameType,
+        denominator: types.ColumnNameType,
+        transformed_name: Optional[types.ColumnNameType] = None,
+        load_path: Optional[Path] = None,
+    ) -> Preprocessor:
+        """
+        Linearize a ratio metric for use in A/B testing.
+
+        Computes a per-unit linearized value that is approximately normally
+        distributed, enabling correct t-test usage for ratio metrics:
+
+            linearized_i = numerator_i - ratio * denominator_i
+
+        where ratio = mean(numerator) / mean(denominator) is estimated on
+        the data passed to this ``Preprocessor`` instance (reference / control data).
+
+        Parameters
+        ----------
+        numerator : ColumnNameType
+            Column name of the ratio numerator (e.g. ``"revenue"``).
+        denominator : ColumnNameType
+            Column name of the ratio denominator (e.g. ``"orders"``).
+        transformed_name : ColumnNameType, optional
+            Name for the new linearized column. Defaults to
+            ``"{numerator}_lin"``.
+        load_path : Path, optional
+            Path to a json file with pre-fitted parameters.
+
+        Returns
+        -------
+        self : Preprocessor
+            Instance object.
+        """
+        transformer = LinearizationTransformer()
+        if load_path is None:
+            transformer.fit_transform(self.dataframe, numerator, denominator, transformed_name, inplace=True)
+        else:
+            transformer.load_params(load_path)
+            transformer.transform(self.dataframe, inplace=True)
+        self.transformers.append(transformer)
+        return self
+
     def transformations(self) -> List:
         """
         List of all transformations which were called.
diff --git a/ambrosia/preprocessing/transformers.py b/ambrosia/preprocessing/transformers.py
index 6397641..1649e86 100644
--- a/ambrosia/preprocessing/transformers.py
+++ b/ambrosia/preprocessing/transformers.py
@@ -16,7 +16,7 @@
 Module contains tools for metrics transformations during a
 preprocessing task.
 """
-from typing import Dict, Union
+from typing import Dict, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -386,3 +386,134 @@ def inverse_transform(self, dataframe: pd.DataFrame, inplace: bool = False) -> U
         transformed: pd.DataFrame = dataframe if inplace else dataframe.copy()
         transformed[self.column_names] = np.exp(transformed[self.column_names].values)
         return None if inplace else transformed
+
+
+class LinearizationTransformer(AbstractFittableTransformer):
+    """
+    Linearization transformer for ratio metrics.
+
+    Converts a ratio metric (numerator / denominator) into a per-unit linearized
+    metric that is approximately normally distributed, enabling correct t-test usage:
+
+        linearized_i = numerator_i - ratio * denominator_i
+
+    where ratio = mean(numerator) / mean(denominator), estimated on the reference
+    (control group / historical) data passed to fit().
+
+    Parameters
+    ----------
+    numerator : str
+        Column name of the ratio numerator (e.g. "revenue").
+    denominator : str
+        Column name of the ratio denominator (e.g. "orders").
+    transformed_name : str, optional
+        Name for the new column. Defaults to ``"{numerator}_lin"``.
+
+    Examples
+    --------
+    >>> transformer = LinearizationTransformer()
+    >>> transformer.fit(control_df, "revenue", "orders", "arpu_lin")
+    >>> transformer.transform(experiment_df, inplace=True)
+    """
+
+    def __str__(self) -> str:
+        return "Linearization transformation"
+
+    def __init__(self) -> None:
+        self.numerator: Optional[str] = None
+        self.denominator: Optional[str] = None
+        self.transformed_name: Optional[str] = None
+        self.ratio: Optional[float] = None
+        super().__init__()
+
+    def get_params_dict(self) -> Dict:
+        self._check_fitted()
+        return {
+            "numerator": self.numerator,
+            "denominator": self.denominator,
+            "transformed_name": self.transformed_name,
+            "ratio": self.ratio,
+        }
+
+    def load_params_dict(self, params: Dict) -> None:
+        for key in ("numerator", "denominator", "transformed_name", "ratio"):
+            if key not in params:
+                raise TypeError(f"params argument must contain: {key}")
+            setattr(self, key, params[key])
+        self.fitted = True
+
+    def fit(
+        self,
+        dataframe: pd.DataFrame,
+        numerator: str,
+        denominator: str,
+        transformed_name: Optional[str] = None,
+    ):
+        """
+        Estimate ratio = mean(numerator) / mean(denominator) on reference data.
+
+        Parameters
+        ----------
+        dataframe : pd.DataFrame
+            Reference dataframe (typically control group or historical data).
+        numerator : str
+            Column name of the ratio numerator.
+        denominator : str
+            Column name of the ratio denominator.
+        transformed_name : str, optional
+            Name for the linearized column. Defaults to ``"{numerator}_lin"``.
+        """
+        self._check_cols(dataframe, [numerator, denominator])
+        denom_mean = dataframe[denominator].mean()
+        if denom_mean == 0:
+            raise ValueError(f"Mean of denominator column '{denominator}' is zero; cannot compute ratio.")
+        self.numerator = numerator
+        self.denominator = denominator
+        self.transformed_name = transformed_name if transformed_name is not None else f"{numerator}_lin"
+        self.ratio = dataframe[numerator].mean() / denom_mean
+        self.fitted = True
+        return self
+
+    def transform(self, dataframe: pd.DataFrame, inplace: bool = False) -> Union[pd.DataFrame, None]:
+        """
+        Apply linearization: transformed = numerator - ratio * denominator.
+
+        Parameters
+        ----------
+        dataframe : pd.DataFrame
+            Dataframe to transform.
+        inplace : bool, default: ``False``
+            If ``True`` modifies dataframe in place, otherwise returns a copy.
+        """
+        self._check_fitted()
+        self._check_cols(dataframe, [self.numerator, self.denominator])
+        df = dataframe if inplace else dataframe.copy()
+        df[self.transformed_name] = df[self.numerator] - self.ratio * df[self.denominator]
+        return None if inplace else df
+
+    def fit_transform(
+        self,
+        dataframe: pd.DataFrame,
+        numerator: str,
+        denominator: str,
+        transformed_name: Optional[str] = None,
+        inplace: bool = False,
+    ) -> Union[pd.DataFrame, None]:
+        """
+        Fit and transform in one step.
+
+        Parameters
+        ----------
+        dataframe : pd.DataFrame
+            Reference dataframe for fitting and transformation.
+        numerator : str
+            Column name of the ratio numerator.
+        denominator : str
+            Column name of the ratio denominator.
+        transformed_name : str, optional
+            Name for the linearized column.
+        inplace : bool, default: ``False``
+            If ``True`` modifies dataframe in place.
+        """
+        self.fit(dataframe, numerator, denominator, transformed_name)
+        return self.transform(dataframe, inplace)
diff --git a/ambrosia/tester/handlers.py b/ambrosia/tester/handlers.py
index 7d65c35..5c97779 100644
--- a/ambrosia/tester/handlers.py
+++ b/ambrosia/tester/handlers.py
@@ -51,7 +51,15 @@ class SparkCriteria(enum.Enum):
 
 class TheoreticalTesterHandler:
     def __init__(
-        self, group_a, group_b, column: str, alpha: np.ndarray, effect_type: str, criterion: StatCriterion, **kwargs
+        self,
+        group_a,
+        group_b,
+        column: str,
+        alpha: np.ndarray,
+        effect_type: str,
+        criterion: StatCriterion,
+        metric_func=None,
+        **kwargs,
     ):
         self.group_a = group_a
         self.group_b = group_b
@@ -59,6 +67,7 @@ def __init__(
         self.alpha = alpha
         self.effect_type = effect_type
         self.criterion = criterion
+        self.metric_func = metric_func
         self.kwargs = kwargs
 
     def _correct_criterion(self, criterion: tp.Any) -> bool:
@@ -79,8 +88,12 @@ def get_criterion(self, criterion: str, data_example: types.SparkOrPandas):
 
     def _set_kwargs(self):
         if isinstance(self.group_a, pd.DataFrame):
-            self.group_a = self.group_a[self.column].values
-            self.group_b = self.group_b[self.column].values
+            if self.metric_func is not None:
+                self.group_a = np.asarray(self.metric_func(self.group_a))
+                self.group_b = np.asarray(self.metric_func(self.group_b))
+            else:
+                self.group_a = self.group_a[self.column].values
+                self.group_b = self.group_b[self.column].values
         elif isinstance(self.group_a, types.SparkDataFrame):
             self.kwargs["column"] = self.column
         self.kwargs["alpha"] = self.alpha
diff --git a/ambrosia/tester/tester.py b/ambrosia/tester/tester.py
index 386d6a5..304cdbe 100644
--- a/ambrosia/tester/tester.py
+++ b/ambrosia/tester/tester.py
@@ -29,7 +29,7 @@
 """
 import itertools
 from copy import deepcopy
-from typing import Dict, List, Optional, Union
+from typing import Callable, Dict, List, Optional, Union
 from warnings import warn
 
 import numpy as np
@@ -241,6 +241,7 @@ def __init__(
         id_column: Optional[types.ColumnNameType] = None,
         first_type_errors: types.StatErrorType = 0.05,
         metrics: Optional[types.MetricNamesType] = None,
+        metric_funcs: Optional[Dict[str, Callable]] = None,
     ):
         """
         Tester class constructor to initialize the object.
@@ -257,6 +258,7 @@ def __init__(
             self.set_experiment_results(experiment_results=experiment_results)
         self.set_errors(first_type_errors)
         self.set_metrics(metrics)
+        self.__metric_funcs = metric_funcs or {}
 
     @staticmethod
     def __filter_data(
@@ -372,9 +374,15 @@ def __pre_run(method: str, args: types._UsageArgumentsType, **kwargs) -> types.T
         if method not in accepted_methods:
             raise ValueError(f'Choose method from {", ".join(accepted_methods)}')
         result: types.TesterResult = {}
+        metric_funcs: Dict = args.get("metric_funcs", {})
         for metric in args["metrics"]:
-            a_values: np.ndarray = args["data_a_group"][metric].values
-            b_values: np.ndarray = args["data_b_group"][metric].values
+            metric_func = metric_funcs.get(metric)
+            if metric_func is not None:
+                a_values: np.ndarray = np.asarray(metric_func(args["data_a_group"]))
+                b_values: np.ndarray = np.asarray(metric_func(args["data_b_group"]))
+            else:
+                a_values = args["data_a_group"][metric].values
+                b_values = args["data_b_group"][metric].values
             if method == "theory":
                 # TODO: Make it SolverClass ~ method
                 # solver = SolverClass(...)
@@ -386,6 +394,7 @@ def __pre_run(method: str, args: types._UsageArgumentsType, **kwargs) -> types.T
                     alpha=np.array(args["alpha"]),
                     effect_type=args["effect_type"],
                     criterion=args["criterion"],
+                    metric_func=metric_func,
                     **kwargs,
                 )
                 sub_result = solver.solve()
@@ -473,6 +482,7 @@ def run(
         criterion: Optional[ABStatCriterion] = None,
         correction_method: Union[str, None] = "bonferroni",
         as_table: bool = True,
+        metric_funcs: Optional[Dict[str, Callable]] = None,
         **kwargs,
     ) -> types.TesterResult:
         """
@@ -556,6 +566,8 @@ def run(
         chosen_args: types._UsageArgumentsType = Tester._prepare_arguments(arguments_choice)
         chosen_args["effect_type"] = effect_type
         chosen_args["criterion"] = criterion
+        effective_metric_funcs = {**self.__metric_funcs, **(metric_funcs or {})}
+        chosen_args["metric_funcs"] = effective_metric_funcs
 
         hypothesis_num: int = len(list(itertools.combinations(chosen_args["experiment_results"], 2))) * len(
             chosen_args["metrics"]
@@ -602,6 +614,7 @@ def test(
     criterion: Optional[ABStatCriterion] = None,
     correction_method: Union[str, None] = "bonferroni",
     as_table: bool = True,
+    metric_funcs: Optional[Dict[str, Callable]] = None,
     **kwargs,
 ) -> types.TesterResult:
     """
@@ -673,5 +686,6 @@ def test(
         criterion=criterion,
         correction_method=correction_method,
         as_table=as_table,
+        metric_funcs=metric_funcs,
         **kwargs,
     )
diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py
index 9106336..e2954b9 100644
--- a/tests/test_preprocessor.py
+++ b/tests/test_preprocessor.py
@@ -1,5 +1,6 @@
 import os
 
+import numpy as np
 import pandas as pd
 import pytest
 
@@ -117,3 +118,71 @@ def test_store_load_config(data_for_agg):
     transformed_by_config: pd.DataFrame = loaded_preprocessor.apply_transformations()
     os.remove(store_path)
     assert (transformed == transformed_by_config).all(None)
+
+
+@pytest.mark.smoke()
+def test_linearize_basic(data_nonlin_var):
+    """
+    Test that linearize creates new column and returns self.
+    """
+    preprocessor = Preprocessor(data_nonlin_var, verbose=False)
+    result = preprocessor.linearize("target", "feature_1", transformed_name="target_lin")
+    assert result is preprocessor  # method chaining
+    assert "target_lin" in preprocessor.data().columns
+
+
+@pytest.mark.unit()
+def test_linearize_formula(data_nonlin_var):
+    """
+    Test that linearized values satisfy: linearized = num - ratio * denom.
+    """
+    preprocessor = Preprocessor(data_nonlin_var, verbose=False)
+    preprocessor.linearize("target", "feature_1", transformed_name="target_lin")
+    df = preprocessor.data()
+    transformer = preprocessor.transformations()[-1]
+    ratio = transformer.ratio
+    expected = data_nonlin_var["target"] - ratio * data_nonlin_var["feature_1"]
+    np.testing.assert_allclose(df["target_lin"].values, expected.values, rtol=1e-10)
+
+
+@pytest.mark.unit()
+def test_linearize_in_chain(data_nonlin_var):
+    """
+    Test linearize as part of a preprocessing chain.
+    """
+    preprocessor = Preprocessor(data_nonlin_var, verbose=False)
+    result = (
+        preprocessor.robust("feature_1", alpha=0.01)
+        .linearize("target", "feature_1", transformed_name="target_lin")
+        .data()
+    )
+    assert "target_lin" in result.columns
+
+
+@pytest.mark.unit()
+def test_linearize_load_store(data_nonlin_var):
+    """
+    Test that linearization transformer can be serialized and replayed.
+    """
+    store_path = "tests/configs/linearize_config.json"
+    preprocessor = Preprocessor(data_nonlin_var, verbose=False)
+    preprocessor.linearize("target", "feature_1", transformed_name="target_lin")
+    preprocessor.store_transformations(store_path)
+
+    loaded_preprocessor = Preprocessor(data_nonlin_var, verbose=False)
+    loaded_preprocessor.load_transformations(store_path)
+
+    os.remove(store_path)
+
+    for t, lt in zip(preprocessor.transformations(), loaded_preprocessor.transformations()):
+        assert t.get_params_dict() == lt.get_params_dict()
+
+
+@pytest.mark.unit()
+def test_linearize_default_name(data_nonlin_var):
+    """
+    Test that default transformed_name is '{numerator}_lin'.
+    """
+    preprocessor = Preprocessor(data_nonlin_var, verbose=False)
+    preprocessor.linearize("target", "feature_1")
+    assert "target_lin" in preprocessor.data().columns
diff --git a/tests/test_tester.py b/tests/test_tester.py
index c4ba892..a1597e8 100644
--- a/tests/test_tester.py
+++ b/tests/test_tester.py
@@ -384,3 +384,61 @@ def test_paired_bootstrap(effect_type, alternative):
     )
     assert test_results_dep[0]["pvalue"] < test_results_ind[0]["pvalue"]
     assert test_results_dep[0]["confidence_interval"][0] > test_results_ind[0]["confidence_interval"][0]
+
+
+@pytest.mark.unit
+def test_metric_func_constructor(results_ltv_retention_conversions):
+    """
+    Test that metric_funcs passed to constructor are used when metric name matches.
+    """
+    # ratio metric: ltv / retention (arbitrary, just to test callable path)
+    ratio_func = lambda df: (df["ltv"] / (df["retention"] + 1e-6)).values
+    tester = Tester(
+        dataframe=results_ltv_retention_conversions,
+        column_groups="group",
+        metrics=["ratio_metric"],
+        metric_funcs={"ratio_metric": ratio_func},
+    )
+    result = tester.run(as_table=False)
+    assert len(result) == 1
+    assert "pvalue" in result[0]
+
+
+@pytest.mark.unit
+@pytest.mark.parametrize("method", ["theory", "empiric"])
+def test_metric_func_run(method, results_ltv_retention_conversions):
+    """
+    Test that metric_funcs passed to run() work for theory and empiric methods.
+    """
+    double_ltv = lambda df: (df["ltv"] * 2).values
+    tester = Tester(
+        dataframe=results_ltv_retention_conversions,
+        column_groups="group",
+        metrics=["ltv"],
+    )
+    result_normal = tester.run(method=method, metrics=["ltv"], as_table=False)
+    result_func = tester.run(
+        method=method,
+        metrics=["custom"],
+        metric_funcs={"custom": double_ltv},
+        as_table=False,
+    )
+    # Doubling values doesn't change pvalue for ttest (same scale), but effect should be doubled
+    assert abs(result_func[0]["effect"]) == pytest.approx(abs(result_normal[0]["effect"]) * 2, rel=1e-4)
+
+
+@pytest.mark.unit
+def test_metric_func_overrides_constructor(results_ltv_retention_conversions):
+    """
+    Test that metric_funcs in run() override those set in constructor.
+    """
+    func_a = lambda df: df["ltv"].values
+    func_b = lambda df: (df["ltv"] * 3).values
+    tester = Tester(
+        dataframe=results_ltv_retention_conversions,
+        column_groups="group",
+        metric_funcs={"my_metric": func_a},
+    )
+    result_a = tester.run(metrics=["my_metric"], as_table=False)
+    result_b = tester.run(metrics=["my_metric"], metric_funcs={"my_metric": func_b}, as_table=False)
+    assert abs(result_b[0]["effect"]) == pytest.approx(abs(result_a[0]["effect"]) * 3, rel=1e-4)

From 65b823cdf5c05a552f31a7428bb391b6f80c109f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=90=D1=81=D0=BB=D0=B0=D0=BD=20=D0=91=D0=B0=D0=B9=D1=80?=
 =?UTF-8?q?=D0=B0=D0=BC=D0=BA=D1=83=D0=BB=D0=BE=D0=B2?=
 <aslan_bm@MacBook-Pro-Aslan.local>
Date: Thu, 26 Mar 2026 21:13:45 +0300
Subject: [PATCH 2/4] Add metric_funcs docstrings, bootstrap test, and resolve
 CLAUDE.md

- Document metric_funcs parameter in Tester class, run(), and test()
  docstrings with type hints, behavior description, and pandas-only note
- Add test_metric_func_bootstrap to verify metric_funcs works with
  empiric (bootstrap) method
- Replace PR's CLAUDE.md with the more complete project version

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                 | 137 +++++++++++++++++++-------------------
 ambrosia/tester/tester.py |  15 +++++
 tests/test_tester.py      |  19 ++++++
 3 files changed, 101 insertions(+), 70 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index c322566..aaa2144 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,88 +1,85 @@
 # Ambrosia
 
-A/B testing framework for experiment design, group splitting, and results evaluation.
-Supports both pandas and Spark DataFrames.
+Python-библиотека для A/B-тестирования: дизайн экспериментов, разбиение на группы, оценка эффекта. Поддержка pandas и PySpark.
 
-## Commands
+## Команды
 
 ```bash
-make install      # create .venv via Poetry (poetry install --all-extras)
-make test         # run pytest with coverage
-make lint         # isort + black + pylint + flake8 (checks only)
-make autoformat   # isort + black (fix in place)
-make clean        # remove .venv, build artifacts, reports/
-```
-
-Single test: `PYTHONPATH=. pytest tests/path/test_file.py::test_fn`
-
-Line length: **120**.
-
-## Architecture
+# Установка
+make install                  # poetry install + extras
 
-### Three-stage pipeline
+# Тесты
+make test                     # pytest
+poetry run pytest tests/ -x   # с остановкой на первом падении
+poetry run pytest tests/test_designer.py -x  # конкретный файл
 
-`Designer` → `Splitter` → `Tester` are independent, stateless-ish classes.
-No shared state between stages; each takes a DataFrame and parameters.
+# Линтеры (проверка)
+make lint                     # isort + black + pylint + flake8
 
-### Pandas/Spark dispatch
+# Форматирование (авто-исправление)
+make autoformat               # isort + black
 
-Never subclass for pandas vs. Spark. Instead use `DataframeHandler` or the
-free function `choose_on_table(alternatives, dataframe)` in
-`ambrosia/tools/ab_abstract_component.py`:
-
-```python
-choose_on_table([pandas_func, spark_func], dataframe)
+# Coverage
+make coverage
 ```
 
-`DataframeHandler._handle_cases` / `_handle_on_table` wrap this pattern for
-method dispatch in handlers (e.g. `TheoryHandler`, `EmpiricHandler`).
+## Архитектура
 
-### ABMetaClass
+Три основных модуля образуют пайплайн:
+- `ambrosia/designer/` — расчёт параметров эксперимента (размер выборки, MDE, мощность)
+- `ambrosia/splitter/` — разбиение пользователей на группы (simple, hash, metric, stratification)
+- `ambrosia/tester/` — оценка эффекта и статзначимости (t-test, Mann-Whitney, Wilcoxon, bootstrap)
 
-`ABMetaClass(ABCMeta, YAMLObjectMetaclass)` in `ab_abstract_component.py`
-resolves the metaclass conflict between `ABCMeta` and PyYAML's
-`YAMLObjectMetaclass`. Any class that inherits from `ABToolAbstract` **and**
-needs YAML serialization must set `metaclass=ABMetaClass`.
+Предобработка:
+- `ambrosia/preprocessing/` — агрегация, outlier removal, Box-Cox, Log, CUPED, ML variance reduction
 
-### ABToolAbstract._prepare_arguments()
+Ядро:
+- `ambrosia/tools/` — абстрактные классы, стат. критерии, KNN, утилиты
+- `ambrosia/spark_tools/` — PySpark-реализации (опциональная зависимость)
 
-Constructor args are "saved" defaults; `run()` args can override them at
-call time. `_prepare_arguments` resolves the priority:
-run-time arg → constructor arg → `ValueError` if both are None.
+### Иерархия абстракций
 
-```python
-chosen = _prepare_arguments({"alpha": [self._alpha, given_alpha]})
+```
+ABToolAbstract          — базовый класс для Designer, Splitter, Tester
+AbstractFittableTransformer — базовый для трансформеров (BoxCox, Log, Robust, IQR, Aggregate, Cuped)
+AbstractVarianceReducer     — базовый для Cuped, MultiCuped, MLVarianceReducer
+ABStatCriterion             — базовый для TtestIndCriterion, MannWhitneyCriterion и др.
 ```
 
-### Stat criteria strategy pattern
-
-Hierarchy: `StatCriterion` (abstract, just `calculate_pvalue`) →
-`ABStatCriterion` (adds `calculate_effect`, `calculate_conf_interval`,
-`get_results`).
-
-Concrete implementations in `ambrosia/tools/stat_criteria.py`:
-`TtestIndCriterion`, `TtestRelCriterion`, `MannWhitneyCriterion`,
-`WilcoxonCriterion`.
-
-`Tester` dispatches by string alias via `AVAILABLE_AB_CRITERIA` dict — duck
-typing, not isinstance checks. To add a criterion: subclass `ABStatCriterion`,
-set `alias` and `implemented_effect_types` class attributes, register in the
-dict.
-
-### Preprocessor chain
-
-`Preprocessor` (pandas only) uses method chaining — each method returns
-`self`. Each step appends a fitted `AbstractFittableTransformer` to
-`self.transformers`. The transformer list supports serialization
-(`store_transformations` / `load_transformations` → JSON) and replay
-(`apply_transformations`) for consistent train/test preprocessing.
-
-### Theoretical vs empirical design
-
-Two design philosophies plug into the same `SimpleDesigner` interface:
-
-- **Theoretical** (`TheoryHandler`): closed-form power/sample-size formulas
-- **Empirical** (`EmpiricHandler`): bootstrap/simulation-based estimates
-
-Both implement `size_design`, `effect_design`, `power_design` and dispatch
-pandas vs. Spark internally via `DataframeHandler`.
+Каждый основной класс (Designer, Splitter, Tester) реализует паттерн:
+- Конфигурация через `set_*()` методы или конструктор
+- Запуск через `run()` метод
+- Поддержка YAML-сериализации
+
+## Код-стайл
+
+- **Line length:** 120 символов (black, isort, flake8 — всё настроено на 120)
+- **Formatter:** black
+- **Import sort:** isort (trailing comma, parentheses, case-sensitive)
+- **Docstrings:** NumPy convention
+- **Лицензионный заголовок** в каждом .py файле:
+  ```python
+  #  Copyright 2022 MTS (Mobile Telesystems)
+  #
+  #  Licensed under the Apache License, Version 2.0 (the "License");
+  #  ...
+  ```
+- **Type hints:** используются через `ambrosia/types.py` — единый модуль типов
+- **Flake8 игнорирует:** D200, D205, D400, D105, D100, E203, W503
+- **Pylint:** конфигурация в `.pylintrc`, игнорирует `tests/`
+
+## Тестирование
+
+- Фреймворк: pytest
+- Маркеры: `@pytest.mark.unit`, `@pytest.mark.smoke`
+- Фикстуры: `tests/conftest.py` (включая local Spark session)
+- Тестовые данные: `tests/test_data/`
+- Паттерн именования: `test_*.py`, функции `test_*`
+
+## Важные соглашения
+
+- PySpark — опциональная зависимость (`pip install ambrosia[spark]`). Импорт Spark-модулей защищён через `ambrosia/tools/import_tools.py`
+- KNN использует nmslib (primary) с fallback на hnswlib (для macOS ARM)
+- Python 3.9–3.13, PySpark >= 3.4
+- Управление зависимостями: Poetry (pyproject.toml)
+- CI: GitHub Actions (lint + test matrix по версиям Python)
diff --git a/ambrosia/tester/tester.py b/ambrosia/tester/tester.py
index 304cdbe..38264e0 100644
--- a/ambrosia/tester/tester.py
+++ b/ambrosia/tester/tester.py
@@ -88,6 +88,12 @@ class Tester(ABToolAbstract):
     metrics : MetricNameType, optional
         Metrics (columns of dataframe) which is used to calculate
         experiment result.
+    metric_funcs : Dict[str, Callable], optional
+        Dictionary mapping metric names to callable functions.
+        Each function receives a ``pd.DataFrame`` (group data) and must
+        return an array-like of numeric values. When provided, the
+        function is used instead of column lookup for the corresponding
+        metric name. Only supported for pandas DataFrames.
 
     Attributes
     ----------
@@ -525,6 +531,11 @@ def run(
         as_table : bool, default: ``True``
             Return the test results as a pandas dataframe.
             If ``False``, a list of dicts with results will be returned.
+        metric_funcs : Dict[str, Callable], optional
+            Dictionary mapping metric names to callable functions.
+            Each function receives a group ``pd.DataFrame`` and returns
+            array-like values. Overrides functions set in constructor
+            for matching metric names. Only pandas DataFrames supported.
         **kwargs : Dict
             Other keyword arguments.
 
@@ -662,6 +673,10 @@ def test(
     as_table : bool, default: ``True``
         Return the test results as a pandas dataframe.
         If ``False``, a list of dicts with results will be returned.
+    metric_funcs : Dict[str, Callable], optional
+        Dictionary mapping metric names to callable functions.
+        Each function receives a group ``pd.DataFrame`` and returns
+        array-like values. Only pandas DataFrames supported.
     **kwargs : Dict
         Other keyword arguments.
 
diff --git a/tests/test_tester.py b/tests/test_tester.py
index a1597e8..b8eab6b 100644
--- a/tests/test_tester.py
+++ b/tests/test_tester.py
@@ -442,3 +442,22 @@ def test_metric_func_overrides_constructor(results_ltv_retention_conversions):
     result_a = tester.run(metrics=["my_metric"], as_table=False)
     result_b = tester.run(metrics=["my_metric"], metric_funcs={"my_metric": func_b}, as_table=False)
     assert abs(result_b[0]["effect"]) == pytest.approx(abs(result_a[0]["effect"]) * 3, rel=1e-4)
+
+
+@pytest.mark.unit
+def test_metric_func_bootstrap(results_ltv_retention_conversions):
+    """
+    Test that metric_funcs work with empiric (bootstrap) method.
+    """
+    double_ltv = lambda df: (df["ltv"] * 2).values
+    tester = Tester(
+        dataframe=results_ltv_retention_conversions,
+        column_groups="group",
+        metrics=["custom"],
+        metric_funcs={"custom": double_ltv},
+    )
+    result = tester.run(method="empiric", as_table=False)
+    assert len(result) == 1
+    assert "pvalue" in result[0]
+    assert "effect" in result[0]
+    assert "confidence_interval" in result[0]

From 560ea615edf2029b31b9cb5f507a4bff07d4b522 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=90=D1=81=D0=BB=D0=B0=D0=BD=20=D0=91=D0=B0=D0=B9=D1=80?=
 =?UTF-8?q?=D0=B0=D0=BC=D0=BA=D1=83=D0=BB=D0=BE=D0=B2?=
 <aslan_bm@MacBook-Pro-Aslan.local>
Date: Thu, 26 Mar 2026 21:15:39 +0300
Subject: [PATCH 3/4] Add CLAUDE.md to .gitignore and remove from tracking

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .gitignore |  3 ++
 CLAUDE.md  | 85 ------------------------------------------------------
 2 files changed, 3 insertions(+), 85 deletions(-)
 delete mode 100644 CLAUDE.md

diff --git a/.gitignore b/.gitignore
index c0adba5..5b741e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,9 @@ settings.json
 .mypy_cache/
 .pytest_cache/
 
+# Claude Code
+CLAUDE.md
+
 # Tests artifacts
 reports/
 coverage.xml
diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index aaa2144..0000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1,85 +0,0 @@
-# Ambrosia
-
-Python-библиотека для A/B-тестирования: дизайн экспериментов, разбиение на группы, оценка эффекта. Поддержка pandas и PySpark.
-
-## Команды
-
-```bash
-# Установка
-make install                  # poetry install + extras
-
-# Тесты
-make test                     # pytest
-poetry run pytest tests/ -x   # с остановкой на первом падении
-poetry run pytest tests/test_designer.py -x  # конкретный файл
-
-# Линтеры (проверка)
-make lint                     # isort + black + pylint + flake8
-
-# Форматирование (авто-исправление)
-make autoformat               # isort + black
-
-# Coverage
-make coverage
-```
-
-## Архитектура
-
-Три основных модуля образуют пайплайн:
-- `ambrosia/designer/` — расчёт параметров эксперимента (размер выборки, MDE, мощность)
-- `ambrosia/splitter/` — разбиение пользователей на группы (simple, hash, metric, stratification)
-- `ambrosia/tester/` — оценка эффекта и статзначимости (t-test, Mann-Whitney, Wilcoxon, bootstrap)
-
-Предобработка:
-- `ambrosia/preprocessing/` — агрегация, outlier removal, Box-Cox, Log, CUPED, ML variance reduction
-
-Ядро:
-- `ambrosia/tools/` — абстрактные классы, стат. критерии, KNN, утилиты
-- `ambrosia/spark_tools/` — PySpark-реализации (опциональная зависимость)
-
-### Иерархия абстракций
-
-```
-ABToolAbstract          — базовый класс для Designer, Splitter, Tester
-AbstractFittableTransformer — базовый для трансформеров (BoxCox, Log, Robust, IQR, Aggregate, Cuped)
-AbstractVarianceReducer     — базовый для Cuped, MultiCuped, MLVarianceReducer
-ABStatCriterion             — базовый для TtestIndCriterion, MannWhitneyCriterion и др.
-```
-
-Каждый основной класс (Designer, Splitter, Tester) реализует паттерн:
-- Конфигурация через `set_*()` методы или конструктор
-- Запуск через `run()` метод
-- Поддержка YAML-сериализации
-
-## Код-стайл
-
-- **Line length:** 120 символов (black, isort, flake8 — всё настроено на 120)
-- **Formatter:** black
-- **Import sort:** isort (trailing comma, parentheses, case-sensitive)
-- **Docstrings:** NumPy convention
-- **Лицензионный заголовок** в каждом .py файле:
-  ```python
-  #  Copyright 2022 MTS (Mobile Telesystems)
-  #
-  #  Licensed under the Apache License, Version 2.0 (the "License");
-  #  ...
-  ```
-- **Type hints:** используются через `ambrosia/types.py` — единый модуль типов
-- **Flake8 игнорирует:** D200, D205, D400, D105, D100, E203, W503
-- **Pylint:** конфигурация в `.pylintrc`, игнорирует `tests/`
-
-## Тестирование
-
-- Фреймворк: pytest
-- Маркеры: `@pytest.mark.unit`, `@pytest.mark.smoke`
-- Фикстуры: `tests/conftest.py` (включая local Spark session)
-- Тестовые данные: `tests/test_data/`
-- Паттерн именования: `test_*.py`, функции `test_*`
-
-## Важные соглашения
-
-- PySpark — опциональная зависимость (`pip install ambrosia[spark]`). Импорт Spark-модулей защищён через `ambrosia/tools/import_tools.py`
-- KNN использует nmslib (primary) с fallback на hnswlib (для macOS ARM)
-- Python 3.9–3.13, PySpark >= 3.4
-- Управление зависимостями: Poetry (pyproject.toml)
-- CI: GitHub Actions (lint + test matrix по версиям Python)

From d3c2366d56c9ae0ddef9a5ea34b7bcbdf73e3479 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=90=D1=81=D0=BB=D0=B0=D0=BD=20=D0=91=D0=B0=D0=B9=D1=80?=
 =?UTF-8?q?=D0=B0=D0=BC=D0=BA=D1=83=D0=BB=D0=BE=D0=B2?=
 <aslan_bm@MacBook-Pro-Aslan.local>
Date: Thu, 26 Mar 2026 21:22:04 +0300
Subject: [PATCH 4/4] Pin setuptools<82 to fix pkg_resources removal breaking
 hyperopt

setuptools 82.0 removed pkg_resources, which hyperopt 0.2.7 imports
at module level. This breaks `pip install ambrosia` on any Python
version when setuptools>=82 is resolved.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 poetry.lock    | 2 +-
 pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 8f87c88..2d6307d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3064,4 +3064,4 @@ spark = ["pyspark"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9, <3.14"
-content-hash = "ff83ae995d12b9539fa734edc21d97d6a3744d7318724696293ade95a6080488"
+content-hash = "290e95a6811bbc92479feec17207b508f1a29cb65bf76f454f05d3a3ea0fcf54"
diff --git a/pyproject.toml b/pyproject.toml
index 2df01d5..426c63e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,7 @@ scikit-learn = ">=1.3.0, <2.0.0"
 scipy = ">=1.10.0, <2.0.0"
 tqdm = ">=4.65.0, <5.0.0"
 hyperopt = ">=0.2.7, <0.3.0"
-setuptools = ">=65.0.0"
+setuptools = ">=65.0.0, <82.0.0"
 catboost = ">=1.2.0, <2.0.0"
 statsmodels = ">=0.14.0, <1.0.0"