diff --git a/experimental/README.md b/experimental/README.md
new file mode 100644
index 0000000..de36d79
--- /dev/null
+++ b/experimental/README.md
@@ -0,0 +1,6 @@
+# Experimental
+
+This folder contains experimental work on dpmm.
+
+## `audit_dpmm`  
+Contains the course code and experiments for the paper Tight Auditing of Differential Privacy in MST and AIM.
\ No newline at end of file
diff --git a/experimental/audit_dpmm/.gitignore b/experimental/audit_dpmm/.gitignore
new file mode 100644
index 0000000..992b3f4
--- /dev/null
+++ b/experimental/audit_dpmm/.gitignore
@@ -0,0 +1,212 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# DS_Store
+*.DS_Store
+
+data/features_2_rows.pkl
\ No newline at end of file
diff --git a/experimental/audit_dpmm/README.md b/experimental/audit_dpmm/README.md
new file mode 100644
index 0000000..918f8ad
--- /dev/null
+++ b/experimental/audit_dpmm/README.md
@@ -0,0 +1,14 @@
+# `audit-dpmm`
+
+This folder contains the course code and experiments for the paper Tight Auditing of Differential Privacy in MST and AIM by G. Ganev, M.S.M.S. Annamalai, B. Kulynych.
+
+## Installation
+
+The experiments require Python 3.11.
+All necessary dependencies are listed in environment.yaml.
+
+## Source Code Structure
+
+To replicate the experiemnts and the plots from the paper:
+1. Run `run_attack.py`
+2. Run `run_audit.ipynb`
\ No newline at end of file
diff --git a/experimental/audit_dpmm/code/audit_utils.py b/experimental/audit_dpmm/code/audit_utils.py
new file mode 100644
index 0000000..3ace325
--- /dev/null
+++ b/experimental/audit_dpmm/code/audit_utils.py
@@ -0,0 +1,357 @@
+import numpy as np
+
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
+from sklearn.metrics import roc_auc_score
+
+from scipy import integrate
+from scipy.stats import chi2
+from scipy.optimize import root_scalar
+from scipy.stats import norm, binomtest
+from scipy.stats import beta as beta_dist
+
+from riskcal.analysis import get_beta_from_gdp
+
+
+def mu_lower_from_two_groups(y_D, y_Dp, alpha=0.1, sens=1.0):
+    """
+    y_D: array of releases on dataset D (same query, rerun mechanism many times)
+    y_Dp: array of releases on dataset D' (neighboring dataset)
+    alpha: one-sided error; returns mu_lo with confidence ~1-alpha
+    sens: query sensitivity Δ (1.0 in our case)
+
+    Model: Y = q(D) + N(0, sigma^2) and Y' = q(D') + N(0, sigma^2)
+    mu = Δ / sigma
+    """
+    y_D = np.asarray(y_D, dtype=float)
+    y_Dp = np.asarray(y_Dp, dtype=float)
+
+    m = len(y_D)
+    n = len(y_Dp)
+    if m < 2 or n < 2:
+        raise ValueError("Need at least 2 samples in each group to estimate sigma.")
+
+    s2_D  = np.var(y_D, ddof=1)
+    s2_Dp = np.var(y_Dp, ddof=1)
+
+    nu = (m - 1) + (n - 1)  # degrees of freedom
+    sp2 = ((m - 1) * s2_D + (n - 1) * s2_Dp) / nu
+
+    # One-sided upper bound on sigma^2 via chi-square (use LOWER quantile!)
+    chi2_lower = chi2.ppf(alpha, nu)
+    if chi2_lower <= 0:
+        return 0.0
+
+    sigma2_up = nu * sp2 / chi2_lower
+    sigma_up = np.sqrt(sigma2_up)
+
+    mu_lo = sens / sigma_up
+    return float(max(mu_lo, 0.0)), float(sigma_up)
+
+
+def _conf_upper_binom_cp(k, n, alpha_one_sided=0.05):
+    """
+    Upper bound for a binomial proportion using Clopper–Pearson.
+
+    alpha_one_sided is the desired one-sided error rate.
+    We approximate this by taking the upper endpoint of a two-sided CI
+    with confidence_level = 1 - alpha_one_sided (conservative but standard).
+    """
+    if n <= 0:
+        return 1.0
+    ci = binomtest(int(k), int(n)).proportion_ci(confidence_level=1 - alpha_one_sided, method="exact")
+    return ci.high
+
+
+######
+class JointBetaMu:
+    """
+    Inspired from Bayesian Estimation of Differential Privacy (https://arxiv.org/abs/2206.05199)
+    and https://github.com/microsoft/responsible-ai-toolbox-privacy/blob/66d2d45b8f57683b0390cfa63774abb70235e5da/privacy_estimates/joint_density.py#L118
+
+    Joint-beta (Jeffreys) model for (FPR, FNR) + μ-GDP region inversion.
+
+    Posterior:
+      FPR ~ Beta(0.5+FP, 0.5+TN)
+      FNR ~ Beta(0.5+FN, 0.5+TP)
+    independent.
+    """
+    def __init__(self, fp, tn, fn, tp):
+        self.fpr_post = beta_dist(0.5 + fp, 0.5 + tn)
+        self.fnr_post = beta_dist(0.5 + fn, 0.5 + tp)
+
+    def prob_mu_private(self, mu, epsabs=1e-6):
+        """
+        Probability mass of μ-GDP feasible region:
+          fnr >= beta_from_mu(fpr, mu)
+        under independent posteriors.
+        """
+        def integrand(fpr):
+            b = get_beta_from_gdp(fpr, mu)
+            return self.fpr_post.pdf(fpr) * (1.0 - self.fnr_post.cdf(b))
+        
+        p, _ = integrate.quad(integrand, 0.0, 1.0, epsabs=epsabs)
+        return float(np.clip(p, 0.0, 1.0))
+
+    def mu_lo(self, alpha=0.1, xtol=1e-3, max_mu=50.0):
+        """
+        Returns μ_lo such that the μ-GDP region contains alpha posterior mass.
+        This mirrors the epsilon_estimation.DensityModel().eps_lo convention.
+        """
+        assert 0 < alpha < 1
+
+        def objective(mu):
+            return self.prob_mu_private(mu, epsabs=max(xtol/5, 1e-6)) - alpha
+
+        # If even μ=0 already contains >= alpha mass, lower bound is 0
+        if objective(0.0) >= 0.0:
+            return 0.0
+
+        lo, hi = 0.0, 1.0
+        while objective(hi) < 0.0:
+            hi *= 2
+            if hi >= max_mu:
+                hi = max_mu
+                break
+
+        # If still not enough mass even at max_mu, return max_mu (very conservative)
+        if objective(hi) < 0.0:
+            return float(max_mu)
+
+        res = root_scalar(objective, bracket=[lo, hi], xtol=xtol, method="brentq")
+        return float(res.root)
+######
+
+
+def _threshold_grid_from_scores(
+    valid_scores,
+    mode="quantiles",  # ["all_unique", "quantiles"]
+    n_thresholds=200,
+):
+    if mode == "all_unique":
+        thresholds = np.unique(valid_scores)
+    elif mode == "quantiles":
+        qs = np.linspace(0, 1, n_thresholds)
+        thresholds = np.unique(np.quantile(valid_scores, qs))
+    else:
+        raise ValueError("threshold_mode must be 'all_unique' or 'quantiles'")
+    return thresholds
+
+
+def _eval_thresholds(scores_out, scores_in, thresholds, ci_method="bonferroni_cp", alpha=0.1):
+    """Compute confusion/rates for each threshold."""
+    P = len(scores_in)
+    N = len(scores_out)
+
+    TP, FN, FP, TN = [], [], [], []
+    FPR, FNR, ADV, MU_HAT, MU_LOWER = [], [], [], [], []
+
+    for t in thresholds:
+        tp, fn, fp, tn = _confusion_at_threshold(scores_out, scores_in, float(t))
+        fpr, fnr, adv = _rates_from_confusion(tp, fn, fp, tn)
+        mu_hat = _mu_from_fpr_fnr(fpr, fnr)
+        mu_lower = _mu_lo_from_counts(tp=tp, fn=fn, fp=fp, tn=tn, ci_method=ci_method, alpha=alpha) 
+
+        TP.append(tp)
+        FN.append(fn)
+        FP.append(fp)
+        TN.append(tn)
+        FPR.append(fpr)
+        FNR.append(fnr)
+        ADV.append(adv)
+        MU_HAT.append(mu_hat)
+        MU_LOWER.append(mu_lower)
+
+    return {
+        "thresholds": thresholds.astype(float),
+        "TP": np.array(TP, dtype=int),
+        "FN": np.array(FN, dtype=int),
+        "FP": np.array(FP, dtype=int),
+        "TN": np.array(TN, dtype=int),
+        "FPR": np.array(FPR, dtype=float),
+        "FNR": np.array(FNR, dtype=float),
+        "advantage": np.array(ADV, dtype=float),
+        "mu_hat": np.array(MU_HAT, dtype=float),
+        "mu_lower": np.array(MU_LOWER, dtype=float),
+        "P": np.array([P], dtype=int)[0],
+        "N": np.array([N], dtype=int)[0],
+    }
+
+
+def _confusion_at_threshold(scores_out, scores_in, t):
+    """Return (TP, FN, FP, TN) when predicting 'in' if score>=t."""
+    P = len(scores_in)
+    N = len(scores_out)
+    tp = int(np.sum(scores_in >= t))
+    fn = int(P - tp)
+    fp = int(np.sum(scores_out >= t))
+    tn = int(N - fp)
+    return tp, fn, fp, tn
+
+
+def _rates_from_confusion(tp, fn, fp, tn):
+    """Return (FPR, FNR, advantage) where advantage = TPR - FPR."""
+    P = tp + fn
+    N = tn + fp
+    fpr = fp / max(N, 1)
+    fnr = fn / max(P, 1)
+    tpr = tp / max(P, 1)
+    adv = tpr - fpr
+    return float(fpr), float(fnr), float(adv)
+
+
+def _mu_from_fpr_fnr(fpr, fnr):
+    """Compute μ from a single (FPR,FNR) point."""
+    clip_eps = 1e-6
+    fpr = np.clip(fpr, clip_eps, 1 - clip_eps)
+    fnr = np.clip(fnr, clip_eps, 1 - clip_eps)
+    mu = norm.ppf(1 - fpr) - norm.ppf(fnr)
+    mu = np.clip(mu, 0, None)
+    return mu
+
+
+def _select_optimal_threshold(curve, threshold_selection):
+    """Select optimal threshold from cureve (validation curve)."""
+    if threshold_selection == "max_advantage":
+        idx = int(np.argmax(curve["advantage"]))
+    elif threshold_selection == "max_mu_hat":
+        idx = int(np.argmax(curve["mu_hat"]))
+    elif threshold_selection == "max_mu_lower":
+        idx = int(np.argmax(curve["mu_lower"]))
+    else:
+        raise ValueError("threshold_selection must be 'max_advantage', 'max_mu_hat' or 'max_mu_lower'")
+
+    t = float(curve["thresholds"][idx])
+    return t
+
+
+def _mu_lo_from_counts(tp, fn, fp, tn,
+    ci_method="bonferroni_cp",    # "bonferroni_cp", "joint_beta"
+    alpha=0.1,
+):
+    """Compute a single μ lower bound from one confusion tuple."""
+    P = tp + fn
+    N = tn + fp
+
+    if ci_method == "bonferroni_cp":
+        # Bonferroni across (FPR,FNR): alpha/2 per rate
+        # fix fpr for highest advantage; get ci for fnr with all alpha
+        alpha_each = alpha / 2
+        fpr_u = _conf_upper_binom_cp(fp, N, alpha_one_sided=alpha_each)
+        fnr_u = _conf_upper_binom_cp(fn, P, alpha_one_sided=alpha_each)
+        return _mu_from_fpr_fnr(fpr_u, fnr_u)
+
+    if ci_method == "joint_beta":
+        jb = JointBetaMu(fp=fp, tn=tn, fn=fn, tp=tp)
+        return jb.mu_lo(alpha=alpha)
+
+    raise ValueError("ci_method must be 'bonferroni_cp' or 'joint_beta'")
+
+
+def run_audit(
+    out_data,
+    in_data,
+    n_train,
+    n_valid,
+    n_test,
+    classifier="xgboost",                 # "xgboost", "random_forest"
+    threshold_mode="quantiles",           # "all_unique", "quantiles"
+    n_thresholds=200,
+    threshold_selection="max_advantage",  # "max_advantage", "max_mu_hat", "max_mu_lower"
+    ci_method="joint_beta",               # "bonferroni_cp", "joint_beta"
+    alpha=0.1,
+    random_state=None,
+):
+    """
+    1) Train attack model on TRAIN split.
+    2) Compute scores on VALID and TEST (but DO NOT use TEST for threshold selection).
+    3) Choose threshold t* using VALID only by objective:
+       - max_advantage: maximize TPR - FPR
+       - max_mu_hat: maximize μ_hat computed from (FPR,FNR)
+    4) Return artifacts + full VALID diagnostics curves for plotting.
+    """
+
+    # --- train attack model ---
+    X_train = np.concatenate([out_data[:n_train], in_data[:n_train]])
+    y_train = np.array([0] * n_train + [1] * n_train)
+
+    if classifier == "xgboost":
+        clf = GradientBoostingClassifier(random_state=random_state)
+    elif classifier == "random_forest":
+        clf = RandomForestClassifier(random_state=random_state)
+    else:
+        raise ValueError("classifier must be 'xgboost' or 'random_forest'")
+    clf.fit(X_train, y_train)
+
+    # --- compute scores for valid+test ---
+    out_scores_all = clf.predict_proba(out_data[n_train:])[:, 1]
+    in_scores_all = clf.predict_proba(in_data[n_train:])[:, 1]
+
+    out_scores_valid = out_scores_all[:n_valid]
+    out_scores_test = out_scores_all[n_valid:n_valid + n_test]
+    in_scores_valid = in_scores_all[:n_valid]
+    in_scores_test = in_scores_all[n_valid:n_valid + n_test]
+    
+    # --- AUC diagnostics (do NOT use for selection) ---
+    # valid auc
+    y_valid = np.array([0] * n_valid + [1] * n_valid)
+    scores_valid = np.concatenate([out_scores_valid, in_scores_valid])
+    auc_valid = roc_auc_score(y_valid, scores_valid)
+    auc_valid = max(auc_valid, 1 - auc_valid)
+
+    # test auc
+    y_test = np.array([0] * n_test + [1] * n_test)
+    scores_test = np.concatenate([out_scores_test, in_scores_test])
+    auc_test = roc_auc_score(y_test, scores_test)
+    auc_test = max(auc_test, 1 - auc_test)
+
+    # valid/test auc
+    y_vt = np.array([0] * (n_valid + n_test) + [1] * (n_valid + n_test))
+    scores_vt = np.concatenate([out_scores_all, in_scores_all])
+    auc_vt = roc_auc_score(y_vt, scores_vt)
+    auc_vt = max(auc_vt, 1 - auc_vt)
+    
+    # --- extract thresholds grid from VALID only ---
+    valid_scores = np.concatenate([out_scores_valid, in_scores_valid])
+    thresholds = _threshold_grid_from_scores(valid_scores, mode=threshold_mode, n_thresholds=n_thresholds)
+
+    # --- evaluate curve and select threshold on VALID only ---
+    valid_curve = _eval_thresholds(out_scores_valid, in_scores_valid, thresholds, ci_method=ci_method, alpha=alpha)
+    opt_t = _select_optimal_threshold(valid_curve, threshold_selection)
+    valid_curve["opt_t"] = opt_t
+    
+    # --- evaluate/estimate on TEST ---
+    tp, fn, fp, tn = _confusion_at_threshold(out_scores_test, in_scores_test, opt_t)
+    fpr, fnr, adv = _rates_from_confusion(tp, fn, fp, tn)
+    mu_hat = _mu_from_fpr_fnr(fpr, fnr)
+
+    mu_lower = _mu_lo_from_counts(
+        tp=tp, fn=fn, fp=fp, tn=tn,
+        ci_method=ci_method,
+        alpha=alpha,
+    )
+    
+    return {
+        "valid_test": {
+            "auc": auc_vt,
+        },
+        "valid": {
+            "auc": auc_valid,
+            "curve": valid_curve,
+        },
+        "test": {
+            "auc": auc_test,
+            "point": {
+                "TP": tp,
+                "FN": fn,
+                "FP": fp,
+                "TN": tn,
+                "FPR": fpr,
+                "FNR": fnr,
+                "advantage": adv,
+                "mu_hat": mu_hat,
+                "mu_lower": mu_lower,
+            },
+        },
+    }
+
+
diff --git a/experimental/audit_dpmm/code/mst/__init__.py b/experimental/audit_dpmm/code/mst/__init__.py
new file mode 100644
index 0000000..600bfde
--- /dev/null
+++ b/experimental/audit_dpmm/code/mst/__init__.py
@@ -0,0 +1,2 @@
+from mst.adp2gdp import mu_from_eps_delta
+from mst.mst import MST
diff --git a/experimental/audit_dpmm/code/mst/adp2gdp.py b/experimental/audit_dpmm/code/mst/adp2gdp.py
new file mode 100644
index 0000000..a768839
--- /dev/null
+++ b/experimental/audit_dpmm/code/mst/adp2gdp.py
@@ -0,0 +1,21 @@
+import numpy as np
+from scipy.stats import norm
+from scipy.optimize import brentq
+
+
+# convert mu-GDP to (eps, delta)-DP using Equation (6) from Tight Auditing DPML paper
+def delta_from_eps_mu(eps, mu):
+    return norm.cdf(-eps / mu + mu / 2) - np.exp(eps) * norm.cdf(-eps / mu - mu / 2)
+
+
+def mu_from_eps_delta(eps, delta):
+    # bracket search
+    lo, hi = 1e-6, 50.0
+
+    # expand hi if needed
+    while delta_from_eps_mu(eps, hi) < delta:
+        hi *= 2
+        if hi > 1e6:
+            raise RuntimeError("Failed to bracket μ")
+
+    return brentq(lambda m: delta_from_eps_mu(eps, m) - delta, lo, hi)
diff --git a/experimental/audit_dpmm/code/mst/mst.py b/experimental/audit_dpmm/code/mst/mst.py
new file mode 100644
index 0000000..7c05daf
--- /dev/null
+++ b/experimental/audit_dpmm/code/mst/mst.py
@@ -0,0 +1,123 @@
+# A generative model training algorithm based on
+# "Winning the NIST Contest: A scalable and general approach to differentially private synthetic data"
+# by Ryan McKenna, Gerome Miklau, Daniel Sheldon
+# Adapted from: https://github.com/ryan112358/private-pgm/blob/1da21c8b38149b05f1385b8e54116568b700b4fa/mechanisms/mst.py
+# and
+# Adapted from: https://github.com/sassoftware/dpmm/blob/752fd57480ec593a3b2b5950fd445e98cdedd7e3/src/dpmm/models/mst.py
+
+
+import numpy as np
+from logging import getLogger
+from typing import Tuple, Optional
+from numpy.random import RandomState
+
+from dpmm.models.base.mbi import Dataset, Domain
+from dpmm.models.base.mechanisms import cdp_rho
+from dpmm.models.base.memory import model_size
+from dpmm.models.base.mechanisms import Mechanism
+
+from mst import mu_from_eps_delta
+
+
+"""
+This is a generalization of the winning mechanism from the
+2018 NIST Differential Privacy Synthetic Data Competition.
+
+Unlike the original implementation, this one can work for any discrete dataset,
+and does not rely on public provisional data for measurement selection.
+"""
+
+
+logger = getLogger("dpmm")
+
+
+class MST(Mechanism):
+    """
+    Maximum Spanning Tree (MST) mechanism is a differentially private generative model relying
+    on selecting an optimal set of marginals to approximate the joint distribution of the data.
+    It uses the exponential mechanism to select higher-order marginals based on their weights.
+    The marginals are measured using the Laplace mechanism.
+    The measured marginals are then used to estimate a maximum spanning tree which will be able to generate data.
+
+    Ref: https://arxiv.org/pdf/2108.04978
+
+    :param epsilon: Privacy budget.
+    :type epsilon: float, optional
+    :param delta: Privacy parameter.
+    :type delta: float, optional
+    :param n_iters: Number of iterations for inference.
+    :type n_iters: int
+    :param compress: Whether to compress the data.
+    :type compress: bool
+    :param domain: The domain of the data.
+    :type domain: Domain, optional
+    :param prng: Random state for reproducibility.
+    :type prng: RandomState, optional
+    :param max_model_size: Maximum model size in MB.
+    :type max_model_size: int, optional
+    :param structural_zeros: Structural zeros in the data.
+    :type structural_zeros: dict, optional
+    :param n_jobs: Number of parallel jobs.
+    :type n_jobs: int
+    """
+
+    def __init__(
+        self,
+        domain: Domain,
+        epsilon: Optional[float] = None,
+        delta: Optional[float] = None,
+        n_iters: int = 5000,
+        compress: bool = False,
+        GDP: bool = False,
+        prng: Optional[RandomState] = None,
+        max_model_size: Optional[int] = None,
+        structural_zeros: Optional[dict] = None,
+        n_jobs: int = -1,
+    ):
+        super().__init__(
+            epsilon=epsilon,
+            delta=delta,
+            prng=prng,
+            max_model_size=max_model_size,
+            compress=compress,
+            domain=domain,
+            structural_zeros=structural_zeros,
+            n_jobs=n_jobs,
+        )
+
+        if GDP:
+            # HARDCODED -- 2 (convert ADP directly to GDP)
+            self.rho = None
+            mu = mu_from_eps_delta(self.epsilon, self.delta)
+            self.sigma = 1 / mu
+        else:
+            self.rho = cdp_rho(self.epsilon, self.delta)
+            # HARDCODED -- 1 (use all DP budget on 1-way marginals measurement)
+            self.sigma = np.sqrt(1 / (2 * self.rho))
+
+        self.n_iters = n_iters
+
+    def _fit(self, data: Dataset, public: bool = False) -> Tuple[Dataset, list]:
+        """
+        Fit the MST mechanism to the data.
+
+        :param data: The dataset.
+        :type data: Dataset
+        :param public: Whether the data is public. Defaults to False.
+        :type public: bool, optional
+        :return: The dataset and measurement log.
+        :rtype: Tuple[Dataset, list]
+        """
+        # select all 1-way marginals
+        self.cliques = cliques_1 = [(col,) for col in data.domain]
+
+        log1 = self.measure(data, cliques=cliques_1, public=public)
+        # compress domain of all 1-way marginals
+        if self.compress:
+            log1 = self.compressor.fit(log1)
+            data = self.compressor.transform(data)
+
+        self.model_size = model_size(data, cliques_1)
+        # HARDCODED - 3 (only select all 1-way marginals)
+
+        return data, log1
diff --git a/experimental/audit_dpmm/code/run_attack.py b/experimental/audit_dpmm/code/run_attack.py
new file mode 100644
index 0000000..81951bf
--- /dev/null
+++ b/experimental/audit_dpmm/code/run_attack.py
@@ -0,0 +1,94 @@
+import string
+import pickle
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from itertools import product
+from multiprocessing import Pool, cpu_count
+
+from mst import MST
+
+
+N_ROWS = 10
+N_COLS = 3
+N_ALL = 5000
+LEN_SYNTH = 25
+
+EPSILON = 1
+DELTA = 1e-2
+
+
+def featurize_df_queries(df, queries):
+    features = np.zeros(len(queries))
+    for i, query in enumerate(queries):
+        features[i] = (df == query).all(axis=1).sum()
+    return features.astype(int)
+
+
+def featurize_model(model, columns):
+    meas = model.measures
+
+    measures = np.zeros(2 * len(columns))
+    for col_idx, col in enumerate(columns):
+        col_proj = sorted([_meas for _meas in meas if col in _meas[3]], key=lambda x: len(x[3]))
+
+        proj = col_proj[0][3]
+        _meas = col_proj[0][1]
+        _meas = _meas.reshape(*[_meas.size // 2**(len(proj) - 1) for _ in proj])
+
+        if len(col_proj[0][3]) > 1:
+            axis = col_proj[0][3].index(col)
+            _meas = np.sum(_meas, axis=tuple([i for i in range(len(_meas.shape)) if i != axis]))
+
+        measures[2 * col_idx: (2 * col_idx) + _meas.shape[0]] = _meas
+
+    return measures
+
+
+def one_iteration(args):
+    i, df_out, df_in, columns, domain, queries, epsilon, delta, len_synth = args
+
+    # out data
+    gen_out = MST(epsilon=epsilon, delta=delta, domain=domain, compress=False, n_jobs=1)
+    gen_out.fit(df_out)
+    synth_out = gen_out.generate(len_synth)
+    out_feats = np.concatenate([featurize_df_queries(synth_out, queries), featurize_model(gen_out, columns)])
+
+    # in data
+    gen_in = MST(epsilon=epsilon, delta=delta, domain=domain, compress=False, n_jobs=1)
+    gen_in.fit(df_in)
+    synth_in = gen_in.generate(len_synth)
+    in_feats = np.concatenate([featurize_df_queries(synth_in, queries), featurize_model(gen_in, columns)])
+
+    return i, out_feats, in_feats
+
+
+if __name__ == "__main__":
+    # data
+    columns = list(string.ascii_uppercase[:N_COLS])
+    domain = {col: 2 for col in columns}
+
+    df_out = pd.DataFrame(np.zeros((N_ROWS, N_COLS), dtype=int), columns=columns)
+    df_in = pd.DataFrame(np.vstack([np.ones((1, N_COLS), dtype=int), np.zeros((N_ROWS, N_COLS), dtype=int)]), columns=columns)
+
+    # black-box + white-box features
+    queries = np.array(list(product([0, 1], repeat=N_COLS)))
+    n_features = len(queries) + 2 * len(columns)
+    data = {"out": np.zeros([N_ALL, n_features]), "in": np.zeros([N_ALL, n_features])}
+
+    # build tasks
+    tasks = [(i, df_out, df_in, columns, domain, queries, EPSILON, DELTA, LEN_SYNTH) for i in range(N_ALL)]
+    n_cpu = max(1, cpu_count() - 1)
+
+    with Pool(processes=n_cpu, maxtasksperchild=1) as pool:
+        for i, out_row, in_row in tqdm(
+            pool.imap_unordered(one_iteration, tasks, chunksize=1),
+            total=N_ALL,
+            desc="it",
+            leave=False,
+        ):
+            data["out"][i, :] = out_row
+            data["in"][i, :] = in_row
+
+    with open('../data/features.pkl', 'wb') as handle:
+        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
diff --git a/experimental/audit_dpmm/code/run_audit.ipynb b/experimental/audit_dpmm/code/run_audit.ipynb
new file mode 100644
index 0000000..0a5a0ae
--- /dev/null
+++ b/experimental/audit_dpmm/code/run_audit.ipynb
@@ -0,0 +1,572 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dc57a5b6-6745-4cbe-88c6-7da26eb7ba74",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from riskcal.analysis import get_beta_from_adp, get_beta_from_zcdp, get_beta_from_gdp, get_advantage_from_gdp\n",
+    "from dpmm.models.base.mechanisms import cdp_rho\n",
+    "from mst import mu_from_eps_delta\n",
+    "from audit_utils import run_audit, mu_lower_from_two_groups"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7626c21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "EPSILON = 1.0\n",
+    "DELTA = 1e-2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c4f6b01",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "N_TRAIN = 2000\n",
+    "N_VALID = 1000\n",
+    "N_TEST = 2000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5320fcef",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9931c23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "THEORY_RHO= cdp_rho(EPSILON, DELTA)\n",
+    "IMPLIED_MU = np.sqrt(2*THEORY_RHO)\n",
+    "print(f\"Implied mu: {IMPLIED_MU} <---\")\n",
+    "\n",
+    "\n",
+    "THEORY_MU = mu_from_eps_delta(EPSILON, DELTA)\n",
+    "print(f\"Theory mu: {THEORY_MU}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05bc4f2b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62395d6f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab427b5d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/features.pkl', 'rb') as handle:\n",
+    "    features = pickle.load(handle)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cace5da0-acde-43a3-8edd-efd41826350a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "default_results = run_audit(features[\"out\"],\n",
+    "                            features[\"in\"],\n",
+    "                            n_train=N_TRAIN,\n",
+    "                            n_valid=N_VALID,\n",
+    "                            n_test=N_TEST,\n",
+    "                            random_state=13)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2b6388eb-0304-4888-b59f-f983595e9be1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f\"Empirical mu: {default_results['test']['point']['mu_lower']} <--\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c7aa8b52-3664-4640-9988-7069c5c8ffdd",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e124a42-0704-443d-ba5f-e043a66566c5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0347e26c-e25d-40ac-a404-ea5b3518a1b1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def adp_frontier_from_eps_delta(epsilon, delta, n_points=500):\n",
+    "    \"\"\"\n",
+    "    Returns theoretical ADP frontier curve\n",
+    "    \"\"\"\n",
+    "    clip_eps = 1e-6\n",
+    "    alpha = np.linspace(clip_eps, 1 - clip_eps, n_points)\n",
+    "    beta = get_beta_from_adp(epsilon, delta, alpha)\n",
+    "    return alpha, beta\n",
+    "\n",
+    "\n",
+    "def zcdp_frontier_from_rho(rho, n_points=500):\n",
+    "    \"\"\"\n",
+    "    Returns theoretical zCDP frontier curve\n",
+    "    \"\"\"\n",
+    "    clip_eps = 1e-6\n",
+    "    alpha = np.linspace(clip_eps, 1 - clip_eps, n_points)\n",
+    "    beta = get_beta_from_zcdp(rho, alpha)\n",
+    "    return alpha, beta\n",
+    "\n",
+    "\n",
+    "def gdp_frontier_from_mu(mu, n_points=500):\n",
+    "    \"\"\"\n",
+    "    Returns theoretical GDP frontier curve\n",
+    "    \"\"\"\n",
+    "    clip_eps = 1e-6\n",
+    "    alpha = np.linspace(clip_eps, 1 - clip_eps, n_points)\n",
+    "    beta = get_beta_from_gdp(mu, alpha)\n",
+    "    return alpha, beta\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f91bbc97-491d-4579-8a86-c4a8345f3288",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "val_curve = default_results[\"valid\"][\"curve\"]\n",
+    "fpr = val_curve[\"FPR\"]\n",
+    "fnr = val_curve[\"FNR\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4860c03e-354b-4746-ac8d-5e1ec92820b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alpha_th_eps, beta_th_eps = adp_frontier_from_eps_delta(EPSILON, DELTA)\n",
+    "alpha_th_rho, beta_th_rho = zcdp_frontier_from_rho(THEORY_RHO)\n",
+    "alpha_th_mu_imp, beta_th_mu_imp = gdp_frontier_from_mu(IMPLIED_MU)\n",
+    "alpha_th_mu, beta_th_mu = gdp_frontier_from_mu(THEORY_MU)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6a7ac340-cebf-44b6-815c-8320ae0de57d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(6,6))\n",
+    "plt.gca().set_aspect('equal', adjustable='box')\n",
+    "\n",
+    "# Empirical audit (primary)\n",
+    "plt.plot(\n",
+    "    fpr, fnr,\n",
+    "    color=\"black\",\n",
+    "    linewidth=3.0,\n",
+    "    alpha=0.95,\n",
+    "    label=\"Empirical audit\"\n",
+    ")\n",
+    "\n",
+    "# μ-GDP via zCDP (primary theory)\n",
+    "plt.plot(\n",
+    "    alpha_th_mu_imp, beta_th_mu_imp,\n",
+    "    color=\"red\",\n",
+    "    linewidth=3.0,\n",
+    "    alpha=0.95,\n",
+    "    label=r\"$\\mu$-GDP (via $\\rho$-zCDP)\"\n",
+    ")\n",
+    "\n",
+    "# μ-GDP direct (secondary theory)\n",
+    "plt.plot(\n",
+    "    alpha_th_mu, beta_th_mu,\n",
+    "    color=\"red\",\n",
+    "    linestyle=\"--\",\n",
+    "    linewidth=2.0,\n",
+    "    alpha=0.65,\n",
+    "    label=r\"$\\mu$-GDP (via $(\\epsilon,\\delta)$-DP)\"\n",
+    ")\n",
+    "\n",
+    "# zCDP frontier (context)\n",
+    "plt.plot(\n",
+    "    alpha_th_rho, beta_th_rho,\n",
+    "    color=\"royalblue\",\n",
+    "    linestyle=\"-.\",\n",
+    "    linewidth=2.0,\n",
+    "    alpha=0.65,\n",
+    "    label=r\"$\\rho$-zCDP (context)\"\n",
+    ")\n",
+    "\n",
+    "# (ε,δ)-DP frontier (context)\n",
+    "plt.plot(\n",
+    "    alpha_th_eps, beta_th_eps,\n",
+    "    color=\"gray\",\n",
+    "    linestyle=\":\",\n",
+    "    linewidth=2.0,\n",
+    "    alpha=0.5,\n",
+    "    label=r\"$(\\epsilon,\\delta)$-DP (context)\"\n",
+    ")\n",
+    "\n",
+    "# 45-degree random-guess baseline: β = 1 − α\n",
+    "alpha_diag = np.linspace(0, 1, 200)\n",
+    "plt.plot(\n",
+    "    alpha_diag, 1 - alpha_diag,\n",
+    "    color=\"gray\",\n",
+    "    linestyle=\"--\",\n",
+    "    linewidth=2.0,\n",
+    "    alpha=0.5,\n",
+    "    label=\"Random guess\"\n",
+    ")\n",
+    "\n",
+    "plt.xlabel(\"FPR (α)\", fontsize=12)\n",
+    "plt.ylabel(\"FNR (β)\", fontsize=12)\n",
+    "plt.xlim(0, 1)\n",
+    "plt.ylim(0, 1)\n",
+    "\n",
+    "plt.legend(loc=\"upper right\", fontsize=12)\n",
+    "\n",
+    "plt.grid(alpha=0.12)\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n",
+    "# fig.savefig(\"../data/tradeoff.pdf\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2586db4f-3bb5-42cb-9314-db821e0aaf11",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9a179941-78da-46f2-9488-0ae27463d978",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c9c2c8d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t = val_curve[\"thresholds\"]\n",
+    "fpr = val_curve[\"FPR\"]\n",
+    "fnr = val_curve[\"FNR\"]\n",
+    "tpr = 1 - fnr\n",
+    "adv = val_curve[\"advantage\"]\n",
+    "mu_hat = val_curve[\"mu_hat\"]\n",
+    "\n",
+    "# selected threshold index\n",
+    "t_sel = val_curve[\"opt_t\"]\n",
+    "idx_sel = int(np.argmin(np.abs(t - t_sel)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee19b711",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# --- Figure & axes ---\n",
+    "fig, ax = plt.subplots()\n",
+    "\n",
+    "# FORCE square plotting box\n",
+    "# ax.set_box_aspect(1)\n",
+    "\n",
+    "# --- Baseline ---\n",
+    "# ax.axhline(y=0.0, color=\"lightgray\", linestyle=\":\", linewidth=1.5, label=\"Random guess (adv.=0)\")\n",
+    "\n",
+    "# --- FPR / FNR ---\n",
+    "ax.plot(t, fpr, color=\"tab:blue\", linewidth=2, alpha=0.65, label=\"FPR (α)\")\n",
+    "\n",
+    "ax.plot(t, fnr, color=\"tab:orange\", linewidth=2, alpha=0.65, label=\"FNR (β)\")\n",
+    "\n",
+    "# --- Advantage (primary signal) ---\n",
+    "ax.plot(t, adv, color=\"darkgreen\", linewidth=3, alpha=0.95, label=\"Empirical advantage\")\n",
+    "\n",
+    "# --- Theory reference ---\n",
+    "adv_theory = get_advantage_from_gdp(IMPLIED_MU)\n",
+    "\n",
+    "ax.axhline(y=adv_theory, color=\"gray\", linestyle=\"-.\", linewidth=2, alpha=0.5, label=r\"Theory advantage ($\\mu$-GDP)\")\n",
+    "\n",
+    "# --- Selected threshold ---\n",
+    "ax.axvline(t_sel, color=\"black\", linestyle=\"--\", linewidth=2, alpha=0.95, label=r\"Selected $\\tau^*$\")\n",
+    "\n",
+    "# Star marker\n",
+    "# ax.scatter([t_sel], [adv[idx_sel]], s=160, marker=\"*\", color=\"black\", zorder=5)\n",
+    "\n",
+    "# Annotation\n",
+    "# ax.text(t_sel + 0.015, adv[idx_sel], rf\"$\\tau^\\star={t_sel:.2f}$\", fontsize=11, va=\"center\")\n",
+    "\n",
+    "# --- Axes styling ---\n",
+    "ax.set_xlabel(r\"Threshold $\\tau$\", fontsize=12)\n",
+    "ax.set_ylabel(\"Rate\", fontsize=12)\n",
+    "\n",
+    "ax.set_xlim(0,1)\n",
+    "ax.set_ylim(0,1)\n",
+    "\n",
+    "# --- Legend (inside, clean) ---\n",
+    "handles, labels = ax.get_legend_handles_labels()\n",
+    "order = [\n",
+    "    labels.index(r\"Selected $\\tau^*$\"),\n",
+    "    labels.index(\"Empirical advantage\"),\n",
+    "    labels.index(r\"Theory advantage ($\\mu$-GDP)\"),\n",
+    "    labels.index(\"FPR (α)\"),\n",
+    "    labels.index(\"FNR (β)\"),\n",
+    "]\n",
+    "ax.legend([handles[i] for i in order], [labels[i] for i in order],\n",
+    "          loc=\"upper right\", fontsize=11)\n",
+    "\n",
+    "plt.grid(alpha=0.12)\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n",
+    "# fig.savefig(\"../data/valid.pdf\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0094d407",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d17410be",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd890bce-a532-4cb2-9a49-0574abd917aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results_all = {}\n",
+    "\n",
+    "# baseline\n",
+    "baseline_ww_out, baseline_ww_in = features[\"out\"][:, 9], features[\"in\"][:, 9]\n",
+    "results, _ = mu_lower_from_two_groups(baseline_ww_out, baseline_ww_in)\n",
+    "results_all[\"Baseline\"] = results\n",
+    "\n",
+    "# threshold_selection -- max_mu_hat\n",
+    "results = run_audit(features[\"out\"],\n",
+    "                    features[\"in\"],\n",
+    "                    n_train=N_TRAIN,\n",
+    "                    n_valid=N_VALID,\n",
+    "                    n_test=N_TEST,\n",
+    "                    threshold_selection=\"max_mu_hat\",\n",
+    "                    random_state=13)\n",
+    "results_all[r\"$\\hat{\\mu}$\"] = results['test']['point']['mu_lower']\n",
+    "\n",
+    "# ci_method -- bonferroni_cp\n",
+    "results = run_audit(features[\"out\"],\n",
+    "                    features[\"in\"],\n",
+    "                    n_train=N_TRAIN,\n",
+    "                    n_valid=N_VALID,\n",
+    "                    n_test=N_TEST,\n",
+    "                    ci_method=\"bonferroni_cp\",\n",
+    "                    random_state=13)\n",
+    "results_all[\"Clopper–Pearson\"] = results['test']['point']['mu_lower']\n",
+    "\n",
+    "# D_out size -- 2\n",
+    "# with open('../data/features_2.pkl', 'rb') as handle:\n",
+    "#     features_2 = pickle.load(handle)\n",
+    "# results = run_audit(features_2[\"out\"],\n",
+    "#                     features_2[\"in\"],\n",
+    "#                     n_train=N_TRAIN,\n",
+    "#                     n_valid=N_VALID,\n",
+    "#                     n_test=N_TEST,\n",
+    "#                     random_state=13)\n",
+    "# results_all[\"$|D_{out}| = 2$\"] = results['test']['point']['mu_lower']\n",
+    "results_all[\"$|D_{out}| = 2$\"] = 0.29832043950664605\n",
+    "\n",
+    "# classifier -- random_forest\n",
+    "results = run_audit(features[\"out\"],\n",
+    "                    features[\"in\"],\n",
+    "                    n_train=N_TRAIN,\n",
+    "                    n_valid=N_VALID,\n",
+    "                    n_test=N_TEST,\n",
+    "                    classifier=\"random_forest\",\n",
+    "                    random_state=13)\n",
+    "results_all[\"Random Forest\"] = results['test']['point']['mu_lower']\n",
+    "\n",
+    "# threat model -- black-box\n",
+    "results = run_audit(features[\"out\"][:, :8],\n",
+    "                    features[\"in\"][:, :8],\n",
+    "                    n_train=N_TRAIN,\n",
+    "                    n_valid=N_VALID,\n",
+    "                    n_test=N_TEST,\n",
+    "                    random_state=13)\n",
+    "results_all[\"Black-box\"] = results['test']['point']['mu_lower']\n",
+    "\n",
+    "# threat model -- white-box\n",
+    "results = run_audit(features[\"out\"][:, 8:],\n",
+    "                    features[\"in\"][:, 8:],\n",
+    "                    n_train=N_TRAIN,\n",
+    "                    n_valid=N_VALID,\n",
+    "                    n_test=N_TEST,\n",
+    "                    random_state=13)\n",
+    "results_all[\"White-box\"] = results['test']['point']['mu_lower']\n",
+    "\n",
+    "# default\n",
+    "results_all[\"Default\"] = default_results['test']['point']['mu_lower']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2aeac441-8509-40e0-953b-a59fbcbbfeb0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# results = {\n",
+    "#     \"Baseline\": 0.2601335097836669,\n",
+    "#     r\"$\\hat{\\mu}$\": 0.0,\n",
+    "#     \"Clopper–Pearson\": 0.2796166719825678,\n",
+    "#     r\"$|D_{out}| = 2$\": 0.29832043950664605,\n",
+    "#     \"Random Forest\": 0.32397972624803917,\n",
+    "#     \"Black-box\": 0.3901927720079095,\n",
+    "#     \"White-box\": 0.4150704207293557,\n",
+    "#     \"Default\": 0.42617713009324837,\n",
+    "# }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f775da6-6557-4b71-8dd5-39046d05e422",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "names = list(results_all.keys())\n",
+    "values = list(results_all.values())\n",
+    "\n",
+    "fig = plt.figure(figsize=(7, 5))\n",
+    "\n",
+    "bars = plt.bar(names, values, color=\"black\", label=\"Empirical audit\")\n",
+    "\n",
+    "# ---- highlight baseline ----\n",
+    "for bar, name in zip(bars, names):\n",
+    "    if name == \"Default\":\n",
+    "        bar.set_edgecolor(\"red\")\n",
+    "        bar.set_linewidth(5)\n",
+    "\n",
+    "for bar in bars:\n",
+    "    h = bar.get_height()\n",
+    "    plt.text(bar.get_x() + bar.get_width()/2, h + 0.003, f\"{h:.2f}\",\n",
+    "             ha=\"center\", va=\"bottom\", fontsize=11)\n",
+    "\n",
+    "# ---- theoretical line ----\n",
+    "plt.axhline(\n",
+    "    IMPLIED_MU,\n",
+    "    linestyle=\"-\",\n",
+    "    linewidth=3.0,\n",
+    "    color=\"red\",\n",
+    "    alpha=0.95,\n",
+    "    label=r\"Theory $\\mu$ (via $\\rho$-zCDP)\",\n",
+    ")\n",
+    "\n",
+    "plt.legend(\n",
+    "    loc=\"upper left\",\n",
+    "    bbox_to_anchor=(0.01, 0.95),  # move down a bit\n",
+    "    fontsize=12\n",
+    ")\n",
+    "\n",
+    "plt.ylabel(r\"$\\mu_{emp}$\", fontsize=12)\n",
+    "# plt.xlabel(\"Ablation setting\")\n",
+    "\n",
+    "plt.grid(axis=\"y\", linestyle=\"--\", alpha=0.12)\n",
+    "plt.xticks(rotation=30, fontsize=12)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n",
+    "# fig.savefig(\"../data/abl.pdf\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51dc3a22-1a31-46e9-bdd7-79c99d30f5e5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pgm-audit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/experimental/audit_dpmm/data/abl.pdf b/experimental/audit_dpmm/data/abl.pdf
new file mode 100644
index 0000000..96bdff2
Binary files /dev/null and b/experimental/audit_dpmm/data/abl.pdf differ
diff --git a/experimental/audit_dpmm/data/features.pkl b/experimental/audit_dpmm/data/features.pkl
new file mode 100644
index 0000000..ac41a5d
Binary files /dev/null and b/experimental/audit_dpmm/data/features.pkl differ
diff --git a/experimental/audit_dpmm/data/tradeoff.pdf b/experimental/audit_dpmm/data/tradeoff.pdf
new file mode 100644
index 0000000..b4527b6
Binary files /dev/null and b/experimental/audit_dpmm/data/tradeoff.pdf differ
diff --git a/experimental/audit_dpmm/data/valid.pdf b/experimental/audit_dpmm/data/valid.pdf
new file mode 100644
index 0000000..ea9c12b
Binary files /dev/null and b/experimental/audit_dpmm/data/valid.pdf differ
diff --git a/experimental/audit_dpmm/environment.yml b/experimental/audit_dpmm/environment.yml
new file mode 100644
index 0000000..ef0713f
--- /dev/null
+++ b/experimental/audit_dpmm/environment.yml
@@ -0,0 +1,14 @@
+name: pgm-audit
+channels:
+  - defaults
+dependencies:
+  - python=3.11
+  - tqdm
+  - scikit-learn
+  - pandas
+  - matplotlib
+  - jupyterlab
+  - pip
+  - pip:
+      - dpmm
+      - riskcal