From e3308aeff2c3f7923eae140565dd7386d1f67aad Mon Sep 17 00:00:00 2001 From: Fangyi Zhu Date: Tue, 27 Jan 2026 21:31:55 -0800 Subject: [PATCH 1/2] Add progress bar to all permutation tests I realized progress bars can get overwhelming if pted is put in any kind of loop run, so I keep the default to be False. --- .gitignore | 1 + README.md | 4 ++++ src/pted/pted.py | 16 +++++++++++++--- src/pted/utils.py | 14 +++++++++----- tests/test_pted.py | 10 ++++++++++ 5 files changed, 37 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 5751abe..a0e1a41 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ _build build **.ipynb_checkpoints src/pted/_version.py +.idea diff --git a/README.md b/README.md index 6493799..941692c 100644 --- a/README.md +++ b/README.md @@ -269,6 +269,7 @@ def pted( chunk_size: Optional[int] = None, chunk_iter: Optional[int] = None, two_tailed: bool = True, + prog_bar: bool = False, ) -> Union[float, tuple[float, np.ndarray, float]]: ``` @@ -280,6 +281,7 @@ def pted( * **chunk_size** *(Optional[int])*: if not None, use chunked energy distance estimation. This is useful for large datasets. The chunk size is the number of samples to use for each chunk. If None, use the full dataset. * **chunk_iter** *(Optional[int])*: The chunk iter is the number of iterations to use with the given chunk size. * **two_tailed** *(bool)*: if True, compute a two-tailed p-value. This is useful if you want to reject the null hypothesis when x and y are either too similar or too different. If False, only checks for dissimilarity but is more sensitive. Default is True. +* **prog_bar** *(bool)*: if True, show a progress bar to track the progress of permutation tests. Default is False. ### Coverage test @@ -295,6 +297,7 @@ def pted_coverage_test( chunk_iter: Optional[int] = None, sbc_histogram: Optional[str] = None, sbc_bins: Optional[int] = None, + prog_bar: bool = False, ) -> Union[float, tuple[np.ndarray, np.ndarray, float]]: ``` @@ -307,6 +310,7 @@ def pted_coverage_test( * **chunk_iter** *(Optional[int])*: The chunk iter is the number of iterations to use with the given chunk size. * **sbc_histogram** *(Optional[str])*: If given, the path/filename to save a Simulation-Based-Calibration histogram. * **sbc_bins** *(Optional[int])*: If given, force the histogram to have the provided number of bins. Otherwise, select an appropriate size: ~sqrt(N). +* **prog_bar** *(bool)*: if True, show a progress bar to track the progress of permutation tests. Default is False. ## GPU Compatibility diff --git a/src/pted/pted.py b/src/pted/pted.py index f92fcf6..4b55861 100644 --- a/src/pted/pted.py +++ b/src/pted/pted.py @@ -24,6 +24,7 @@ def pted( chunk_size: Optional[int] = None, chunk_iter: Optional[int] = None, two_tailed: bool = True, + prog_bar: bool = False, ) -> Union[float, tuple[float, np.ndarray, float]]: """ Two sample null hypothesis test using a permutation test on the energy @@ -90,6 +91,9 @@ def pted( two_tailed (bool): if True, compute a two-tailed p-value. This is useful if you want to reject the null hypothesis when x and y are either too similar or too different. Default is True. + prog_bar (bool): if True, show a progress bar to track the progress + of permutation tests. Default is False. + Note ---- @@ -131,9 +135,10 @@ def pted( metric=metric, chunk_size=int(chunk_size), chunk_iter=int(chunk_iter), + prog_bar=prog_bar, ) elif is_torch_tensor(x): - test, permute = pted_torch(x, y, permutations=permutations, metric=metric) + test, permute = pted_torch(x, y, permutations=permutations, metric=metric, prog_bar=prog_bar) elif chunk_size is not None: test, permute = pted_chunk_numpy( x, @@ -142,9 +147,10 @@ def pted( metric=metric, chunk_size=int(chunk_size), chunk_iter=int(chunk_iter), + prog_bar=prog_bar, ) else: - test, permute = pted_numpy(x, y, permutations=permutations, metric=metric) + test, permute = pted_numpy(x, y, permutations=permutations, metric=metric, prog_bar=prog_bar) permute = np.array(permute) @@ -173,6 +179,7 @@ def pted_coverage_test( chunk_iter: Optional[int] = None, sbc_histogram: Optional[str] = None, sbc_bins: Optional[int] = None, + prog_bar: bool = False, ) -> Union[float, tuple[np.ndarray, np.ndarray, float]]: """ Coverage test using a permutation test on the energy distance. @@ -231,7 +238,7 @@ def pted_coverage_test( return_all (bool): if True, return the test statistic and the permuted statistics with the p-value. If False, just return the p-value. bool (default: False) - chunk_size (Optional[int]): if not None, use chunked energy distance + chunk_size (Optional[int]): If not None, use chunked energy distance estimation. This is useful for large datasets. The chunk size is the number of samples to use for each chunk. If None, use the full dataset. @@ -241,6 +248,8 @@ def pted_coverage_test( Simulation-Based-Calibration histogram. sbc_bins (Optional[int]): If given, force the histogram to have the provided number of bins. Otherwise, select an appropriate size: ~sqrt(N). + prog_bar (bool): If True, show a progress bar to track the progress + of permutation tests. Default is False. Note ---- @@ -282,6 +291,7 @@ def pted_coverage_test( two_tailed=False, chunk_size=chunk_size, chunk_iter=chunk_iter, + prog_bar=prog_bar, ) test_stats.append(test) permute_stats.append(permute) diff --git a/src/pted/utils.py b/src/pted/utils.py index b166656..d00e0ec 100644 --- a/src/pted/utils.py +++ b/src/pted/utils.py @@ -5,6 +5,7 @@ from scipy.spatial.distance import cdist from scipy.stats import chi2 as chi2_dist, binom from scipy.optimize import root_scalar +from tqdm.auto import trange try: import torch @@ -116,13 +117,14 @@ def pted_chunk_numpy( metric: str = "euclidean", chunk_size: int = 100, chunk_iter: int = 10, + prog_bar: bool = False, ) -> tuple[float, list[float]]: assert np.all(np.isfinite(x)) and np.all(np.isfinite(y)), "Input contains NaN or Inf!" nx = len(x) test_stat = _energy_distance_estimate_numpy(x, y, chunk_size, chunk_iter, metric=metric) permute_stats = [] - for _ in range(permutations): + for _ in trange(permutations, disable=not prog_bar): z = np.concatenate((x, y), axis=0) z = z[np.random.permutation(len(z))] x, y = z[:nx], z[nx:] @@ -139,6 +141,7 @@ def pted_chunk_torch( metric: Union[str, float] = "euclidean", chunk_size: int = 100, chunk_iter: int = 10, + prog_bar: bool = False, ) -> tuple[float, list[float]]: assert torch.__version__ != "null", "PyTorch is not installed! try: `pip install torch`" assert torch.all(torch.isfinite(x)) and torch.all( @@ -148,7 +151,7 @@ def pted_chunk_torch( test_stat = _energy_distance_estimate_torch(x, y, chunk_size, chunk_iter, metric=metric) permute_stats = [] - for _ in range(permutations): + for _ in trange(permutations, disable=not prog_bar): z = torch.cat((x, y), dim=0) z = z[torch.randperm(len(z))] x, y = z[:nx], z[nx:] @@ -159,7 +162,7 @@ def pted_chunk_torch( def pted_numpy( - x: np.ndarray, y: np.ndarray, permutations: int = 100, metric: str = "euclidean" + x: np.ndarray, y: np.ndarray, permutations: int = 100, metric: str = "euclidean", prog_bar: bool = False, ) -> tuple[float, list[float]]: z = np.concatenate((x, y), axis=0) assert np.all(np.isfinite(z)), "Input contains NaN or Inf!" @@ -172,7 +175,7 @@ def pted_numpy( test_stat = _energy_distance_precompute(dmatrix, nx, ny) permute_stats = [] - for _ in range(permutations): + for _ in trange(permutations, disable=not prog_bar): I = np.random.permutation(len(z)) dmatrix = dmatrix[I][:, I] permute_stats.append(_energy_distance_precompute(dmatrix, nx, ny)) @@ -184,6 +187,7 @@ def pted_torch( y: torch.Tensor, permutations: int = 100, metric: Union[str, float] = "euclidean", + prog_bar: bool = False, ) -> tuple[float, list[float]]: assert torch.__version__ != "null", "PyTorch is not installed! try: `pip install torch`" z = torch.cat((x, y), dim=0) @@ -199,7 +203,7 @@ def pted_torch( test_stat = _energy_distance_precompute(dmatrix, nx, ny).item() permute_stats = [] - for _ in range(permutations): + for _ in trange(permutations, disable=not prog_bar): I = torch.randperm(len(z)) dmatrix = dmatrix[I][:, I] permute_stats.append(_energy_distance_precompute(dmatrix, nx, ny).item()) diff --git a/tests/test_pted.py b/tests/test_pted.py index 2cc2795..d794a2e 100644 --- a/tests/test_pted.py +++ b/tests/test_pted.py @@ -32,6 +32,16 @@ def test_pted_main(): pted.test() +def test_pted_progress_bar(capsys): + pted.pted(np.array([[1,2],[3,4]]), np.array([[3,2],[1,4]]), permutations=42) + captured = capsys.readouterr().err + assert "42/42" not in captured, "progress bar showed up when prog_bar is set to False by default" + + pted.pted(np.array([[1,2],[3,4]]), np.array([[3,2],[1,4]]), permutations=42, prog_bar=True) + captured = capsys.readouterr().err + assert "42/42" in captured, "progress bar did not show when prog_bar is set to True" + + def test_pted_torch(): if torch is None: pytest.skip("torch not installed") From ae44e8debb036fd90ba203569471a7092d09535f Mon Sep 17 00:00:00 2001 From: Fangyi Zhu Date: Wed, 28 Jan 2026 22:15:26 -0800 Subject: [PATCH 2/2] Move progress bar to outer loop for coverage tests --- README.md | 2 +- requirements.txt | 3 ++- src/pted/pted.py | 6 +++--- tests/test_pted.py | 12 ++++++++++++ 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 941692c..fcddd20 100644 --- a/README.md +++ b/README.md @@ -310,7 +310,7 @@ def pted_coverage_test( * **chunk_iter** *(Optional[int])*: The chunk iter is the number of iterations to use with the given chunk size. * **sbc_histogram** *(Optional[str])*: If given, the path/filename to save a Simulation-Based-Calibration histogram. * **sbc_bins** *(Optional[int])*: If given, force the histogram to have the provided number of bins. Otherwise, select an appropriate size: ~sqrt(N). -* **prog_bar** *(bool)*: if True, show a progress bar to track the progress of permutation tests. Default is False. +* **prog_bar** *(bool)*: if True, show a progress bar to track the progress of simulations. Default is False. ## GPU Compatibility diff --git a/requirements.txt b/requirements.txt index 5576e19..53e852f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ numpy -scipy \ No newline at end of file +scipy +tqdm \ No newline at end of file diff --git a/src/pted/pted.py b/src/pted/pted.py index 4b55861..9d401ee 100644 --- a/src/pted/pted.py +++ b/src/pted/pted.py @@ -1,3 +1,4 @@ +from tqdm.auto import trange from typing import Union, Optional import numpy as np @@ -249,7 +250,7 @@ def pted_coverage_test( sbc_bins (Optional[int]): If given, force the histogram to have the provided number of bins. Otherwise, select an appropriate size: ~sqrt(N). prog_bar (bool): If True, show a progress bar to track the progress - of permutation tests. Default is False. + of simulations. Default is False. Note ---- @@ -281,7 +282,7 @@ def pted_coverage_test( test_stats = [] permute_stats = [] pvals = [] - for i in range(nsim): + for i in trange(nsim, disable=not prog_bar): test, permute, p = pted( g[:, i], s[:, i], @@ -291,7 +292,6 @@ def pted_coverage_test( two_tailed=False, chunk_size=chunk_size, chunk_iter=chunk_iter, - prog_bar=prog_bar, ) test_stats.append(test) permute_stats.append(permute) diff --git a/tests/test_pted.py b/tests/test_pted.py index d794a2e..abaaa04 100644 --- a/tests/test_pted.py +++ b/tests/test_pted.py @@ -122,6 +122,18 @@ def test_pted_coverage_edgecase(): assert p > 1e-2 and p < 0.99, f"p-value {p} is not in the expected range (U(0,1))" +def test_pted_coverage_progress_bar(capsys): + g = np.random.normal(size=(42, 10)) + s = np.random.normal(size=(100, 42, 10)) + pted.pted_coverage_test(g, s) + captured = capsys.readouterr().err + assert "42/42" not in captured, "progress bar showed up when prog_bar is set to False by default" + + pted.pted_coverage_test(g, s, prog_bar=True) + captured = capsys.readouterr().err + assert "42/42" in captured, "progress bar did not show when prog_bar is set to True" + + def test_pted_coverage_overunder(): if torch is None: pytest.skip("torch not installed")