Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
82 commits
Select commit Hold shift + click to select a range
56cc1cc
refactor MLP to LoRAMLP
mcgibbon Jan 13, 2026
5204ae1
add lora to Conv2d in conditional sfno
mcgibbon Jan 13, 2026
0a6ab69
add lora for spectral convolutions, add to csfno config
mcgibbon Jan 14, 2026
044168a
avoid crash on super init
mcgibbon Jan 15, 2026
3dd7aa9
fix change to model output
mcgibbon Jan 15, 2026
f6d62d4
Merge branch 'main' into feature/sfno_lora
mcgibbon Jan 15, 2026
d35f5f0
Merge branch 'main' into feature/sfno_lora
mcgibbon Jan 16, 2026
ca1c39a
Merge branch 'main' into feature/sfno_lora
mcgibbon Jan 22, 2026
e203eb9
Merge branch 'feature/sfno_lora' into feature/grouped_spectral_conv
mcgibbon Jan 22, 2026
7f6cc52
update regression target to use dhconv
mcgibbon Jan 22, 2026
71aac8b
use dhconv directly, disable other options
mcgibbon Jan 22, 2026
c09fd5a
delete unused code
mcgibbon Jan 22, 2026
2f7ca0f
enable grouped convolutions for linear filter type
mcgibbon Jan 22, 2026
bc6b789
restore MLP checkpointing
mcgibbon Jan 23, 2026
4ee5be4
Merge branch 'main' into feature/sfno_lora
mcgibbon Jan 26, 2026
5159ed8
Merge branch 'main' into feature/sfno_lora
mcgibbon Jan 29, 2026
3529e1e
Merge branch 'feature/sfno_lora' into feature/grouped_spectral_conv
mcgibbon Jan 29, 2026
9f21b52
Merge branch 'feature/grouped_spectral_conv' into feature/grouped_spe…
mcgibbon Jan 29, 2026
08323ba
enforce not implemented features at config level
mcgibbon Jan 29, 2026
c3f65fe
update sfno init to use updated makani scheme
mcgibbon Jan 29, 2026
29cd0ff
use correctly shaped scale
mcgibbon Jan 29, 2026
88c2e13
Merge branch 'main' into feature/grouped_spectral_conv
mcgibbon Feb 3, 2026
f460da7
Merge branch 'feature/grouped_spectral_conv' into feature/grouped_spe…
mcgibbon Feb 3, 2026
16813fc
default to linear filter type, disallow non-linear
mcgibbon Feb 3, 2026
30f3afb
Merge branch 'main' into feature/grouped_spectral_conv
mcgibbon Feb 3, 2026
15abf2c
allow makani-linear filter
mcgibbon Feb 3, 2026
5bef726
Merge branch 'feature/grouped_spectral_conv' of github.com:ai2cm/ace …
mcgibbon Feb 3, 2026
39dec0d
update sfnonet regression target to match primary code path
mcgibbon Feb 3, 2026
30d9a8a
Merge branch 'feature/update_reference' into feature/grouped_spectral…
mcgibbon Feb 3, 2026
2787426
Merge branch 'main' into feature/grouped_spectral_conv
mcgibbon Feb 3, 2026
f615a26
update diffusion regression test to latest settings
mcgibbon Feb 3, 2026
1147499
Merge branch 'feature/grouped_spectral_conv' of github.com:ai2cm/ace …
mcgibbon Feb 3, 2026
5acc370
Merge branch 'feature/grouped_spectral_conv' into feature/grouped_spe…
mcgibbon Feb 3, 2026
54fecc6
incorporate review comments
mcgibbon Feb 3, 2026
577dd2d
Merge branch 'main' into feature/grouped_spectral_conv
mcgibbon Feb 3, 2026
b93ab14
Merge branch 'main' into feature/grouped_spectral_conv
mcgibbon Feb 4, 2026
788aac0
Merge branch 'feature/grouped_spectral_conv' into feature/grouped_spe…
mcgibbon Feb 4, 2026
7acb412
Merge branch 'main' into feature/makani_sfno_init
mcgibbon Feb 4, 2026
f43e8af
remove overwrite of conv2d weights
mcgibbon Feb 4, 2026
cbbc0b6
use varname makani is using
mcgibbon Feb 4, 2026
965765e
update regression target
mcgibbon Feb 4, 2026
8c18465
update diffusion regression targets
mcgibbon Feb 4, 2026
aef7ce3
Merge branch 'main' into feature/grouped_spectral_conv_2
mcgibbon Feb 4, 2026
e0fc1b4
remove second copy of _contract_dhconv
mcgibbon Feb 4, 2026
4db57d3
Merge branch 'feature/makani_sfno_init' into feature/grouped_spectral…
mcgibbon Feb 6, 2026
aa03c6d
add unit test that dhconv is faster when using groups
mcgibbon Feb 6, 2026
fbe3f9d
Merge branch 'main' into feature/grouped_spectral_conv_2
mcgibbon Feb 6, 2026
460198f
move test to correct file
mcgibbon Feb 6, 2026
7780916
Merge branch 'main' into feature/grouped_spectral_conv_2
mcgibbon Feb 6, 2026
0bc701a
add test with profiling for sfno
mcgibbon Feb 6, 2026
ad0d7b4
Merge branch 'main' into feature/grouped_spectral_conv_2
mcgibbon Feb 6, 2026
f788fae
update docstrings
mcgibbon Feb 6, 2026
0a861b9
Merge branch 'feature/grouped_spectral_conv_2' of github.com:ai2cm/ac…
mcgibbon Feb 9, 2026
768fdd9
add CUDATimer
mcgibbon Feb 9, 2026
db2c174
Merge commit '0bc701af46cd1a679dac36a09db24134ee01c9d7' into feature/…
mcgibbon Feb 9, 2026
1bbbf57
Merge branch 'feature/cuda_timing' into feature/profile_sfno_no_changes
mcgibbon Feb 9, 2026
dd9d85d
use core timer
mcgibbon Feb 9, 2026
5f6ae94
Merge branch 'feature/grouped_spectral_conv_2' into feature/profile_s…
mcgibbon Feb 9, 2026
25a9fdb
add timing for sht operations
mcgibbon Feb 9, 2026
7a5b7c3
remove redundant context timers
mcgibbon Feb 9, 2026
88f682a
add memory benchmarking context
mcgibbon Feb 9, 2026
7eb9d85
implement combining results
mcgibbon Feb 9, 2026
122c1df
refactor sfno profile to use central benchmark
mcgibbon Feb 9, 2026
83a302c
define a way to run benchmarks
mcgibbon Feb 9, 2026
66ca2aa
add regressions
mcgibbon Feb 9, 2026
76fe452
use ms for times
mcgibbon Feb 9, 2026
d61eefa
add code to plot benchmark results
mcgibbon Feb 9, 2026
5d82867
remove unit from unitless axis
mcgibbon Feb 9, 2026
594f78d
add benchmark regression test, remove low-time timers
mcgibbon Feb 10, 2026
34d0353
make timing regression gpu specific
mcgibbon Feb 10, 2026
69bb606
Merge branch 'main' into feature/profile_sfno_no_changes
mcgibbon Feb 10, 2026
0adb4ef
avoid instantiating cudatimer when might not be on gpu
mcgibbon Feb 10, 2026
e36e53e
skip more tests on cpu
mcgibbon Feb 10, 2026
7dfd05b
update regression target from main merge
mcgibbon Feb 10, 2026
c73aac3
accept timer in the rest of the filters
mcgibbon Feb 10, 2026
8ba5341
skip memory tests on cpu
mcgibbon Feb 10, 2026
ba14a9c
seed benchmark before regression
mcgibbon Feb 10, 2026
6aa6708
add automatic gpu benchmarking
mcgibbon Feb 10, 2026
c347c7c
push to branch
mcgibbon Feb 10, 2026
bc68f45
use PAT to push
mcgibbon Feb 10, 2026
7f4305c
remove benchmarking performance regression
mcgibbon Feb 10, 2026
e764f97
Merge branch 'main' into feature/profile_sfno_no_changes
mcgibbon Feb 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repos:
args: [--maxkb=250]
exclude: |
(?x)^(
fme/ace/aggregator/inference/testdata/.*-regression.pt
fme/ace/aggregator/inference/testdata/.*-regression\.pt |
)$
- id: trailing-whitespace
- id: file-contents-sorter
Expand Down
13 changes: 13 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
import os

os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # required for determinism

import gc
import signal
from unittest import mock

import pytest
import torch

from fme.core.rand import set_seed


@pytest.fixture(autouse=True, scope="session")
def deterministic_pytorch():
torch.use_deterministic_algorithms(True)
torch.backends.cudnn.benchmark = False
set_seed(0)


def pytest_addoption(parser):
parser.addoption(
Expand Down
3 changes: 3 additions & 0 deletions fme/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import models as _ # to trigger registrations
from .atmosphere_data import AtmosphereData
from .device import get_device, using_gpu
from .gridded_ops import GriddedOperations
Expand All @@ -14,6 +15,8 @@
from .rand import set_seed
from .registry import Registry

del _

__all__ = [
"spherical_area_weights",
"weighted_mean",
Expand Down
1 change: 1 addition & 0 deletions fme/core/benchmark/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
results
Empty file added fme/core/benchmark/__init__.py
Empty file.
305 changes: 305 additions & 0 deletions fme/core/benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@
import abc
import dataclasses
import pathlib
from collections.abc import Callable
from typing import Self, TypeVar

import dacite
import matplotlib.pyplot as plt
import torch

from fme.core.benchmark.memory import MemoryResult, benchmark_memory
from fme.core.benchmark.timer import CUDATimer, NullTimer, Timer, TimerResult
from fme.core.typing_ import TensorDict


@dataclasses.dataclass
class BenchmarkResult:
memory: MemoryResult
timer: TimerResult

def __repr__(self) -> str:
return f"BenchmarkResult(memory={self.memory}, timer={self.timer})"

def asdict(self) -> dict:
return dataclasses.asdict(self)

@classmethod
def from_dict(cls, d: dict) -> "BenchmarkResult":
return dacite.from_dict(cls, d, config=dacite.Config(strict=True))

def assert_close(
self, other: "BenchmarkResult", rtol=0.02, children_rtol=0.02
) -> None:
try:
self.timer.assert_close(other.timer, rtol=rtol, children_rtol=children_rtol)
except AssertionError as e:
raise AssertionError(f"Timer results differ: {e}") from e
try:
self.memory.assert_close(other.memory, rtol=rtol)
except AssertionError as e:
raise AssertionError(f"Memory results differ: {e}") from e

def to_png(
self, path: str | pathlib.Path, label: str, child: str | None = None
) -> None:
# note this function was generated with AI
def avg_time(t: TimerResult) -> float:
return float(t.avg_time)

def self_time(t: TimerResult) -> float:
t_avg = avg_time(t)
c_avg = sum(avg_time(c) for c in t.children.values())
return max(t_avg - c_avg, 0.0)

def fmt_time(ms: float) -> str:
if ms >= 1000.0:
return f"{ms/1000.0:.2f}s"
if ms >= 10.0:
return f"{ms:.1f}ms"
return f"{ms:.2f}ms"

def label_ok(name: str, ms: float, frac_of_root: float) -> bool:
if not name:
return False
return frac_of_root >= 0.05

def sorted_children(t: TimerResult) -> list[tuple[str, TimerResult]]:
return sorted(
t.children.items(), key=lambda kv: avg_time(kv[1]), reverse=True
)

def blend_with_white(
rgb: tuple[float, float, float], amount: float
) -> tuple[float, float, float]:
# amount in [0,1]: 0 -> original, 1 -> white
return (
rgb[0] + (1.0 - rgb[0]) * amount,
rgb[1] + (1.0 - rgb[1]) * amount,
rgb[2] + (1.0 - rgb[2]) * amount,
)

root = self.timer
if child is not None:
for part in child.split("."):
if part not in root.children:
raise ValueError(f"Child '{child}' not found in timer results.")
root = root.children[part]
root_avg = avg_time(root)

max_alloc_mb = self.memory.max_alloc / (1024.0 * 1024.0)

fig = plt.figure(figsize=(8, 6), constrained_layout=True)
if root_avg <= 0.0:
fig.suptitle(
f"Benchmark for {label}\ntotal=0.00s, max_alloc={max_alloc_mb:.1f} MB",
fontsize=14,
)
ax0 = fig.add_subplot(1, 1, 1)
ax0.text(0.5, 0.5, "No timing data", ha="center", va="center")
ax0.axis("off")
fig.savefig(path, dpi=200)
plt.close(fig)
return

fig.suptitle(
f"Benchmark for {label}\ntotal={fmt_time(root_avg)}, "
f"max_alloc={max_alloc_mb:.1f} MB",
fontsize=14,
)

ax = fig.add_subplot(1, 1, 1)
ax.set_xlim(0, 2)
ax.set_ylim(0, root_avg)
ax.set_xticks([0.5, 1.5])
ax.set_xticklabels(["Level 1", "Level 2"])
ax.set_ylabel("Avg time")
ax.set_yticks([])
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

gray = (0.85, 0.85, 0.85, 1.0)
cmap = plt.get_cmap("tab20")

lvl1 = sorted_children(root)
lvl1_names = [n for n, _ in lvl1]
lvl1_index = {n: i for i, n in enumerate(lvl1_names)}

# Level 1 stack (root children + root self in gray, unlabeled)
lvl1_segments: list[tuple[str, float, tuple[float, float, float, float]]] = []
for n1, t1 in lvl1:
base = cmap(lvl1_index[n1] % cmap.N)
lvl1_segments.append((n1, avg_time(t1), base))
r_self = self_time(root)
if r_self > 0.0:
lvl1_segments.append(("", r_self, gray))

def draw_stack(
x_center: float,
segments: list[tuple[str, float, tuple[float, float, float, float]]],
) -> None:
width = 0.86
y = 0.0
for name, sec, color in segments:
if sec <= 0.0:
continue
ax.bar(
x_center,
sec,
bottom=y,
width=width,
align="center",
color=color,
edgecolor="white",
linewidth=1.0,
)
frac = sec / root_avg
if label_ok(name, sec, frac):
ax.text(
x_center,
y + sec / 2.0,
f"{name}\n{fmt_time(sec)}",
ha="center",
va="center",
fontsize=9,
rotation=0, # keep horizontal to avoid cross-column overlap
clip_on=True,
)
y += sec
if y < root_avg:
ax.bar(
x_center,
root_avg - y,
bottom=y,
width=width,
align="center",
color=gray,
edgecolor="white",
linewidth=1.0,
)

draw_stack(0.5, lvl1_segments)

# Level 2 stack:
# For each level-1 slice, stack its children
# (colored as parent hue variants) + self in gray.
lvl2_segments: list[tuple[str, float, tuple[float, float, float, float]]] = []
for n1, t1 in lvl1:
parent_rgba = cmap(lvl1_index[n1] % cmap.N)
parent_rgb = (parent_rgba[0], parent_rgba[1], parent_rgba[2])

children = sorted_children(t1)
k = len(children)
for i, (n2, t2) in enumerate(children):
# Same “type” of color as parent: lighten progressively per child.
# First child is closest to parent; later children are lighter.
lighten = 0.10 + (0.55 * (i / max(k - 1, 1)))
rgb = blend_with_white(parent_rgb, lighten)
lvl2_segments.append((n2, avg_time(t2), (rgb[0], rgb[1], rgb[2], 1.0)))

s1 = self_time(t1)
if s1 > 0.0:
lvl2_segments.append(("", s1, gray))

draw_stack(1.5, lvl2_segments)

fig.tight_layout(rect=(0.02, 0.02, 0.98, 0.98))
fig.savefig(path, dpi=200, bbox_inches="tight")
plt.close(fig)


T = TypeVar("T")


class BenchmarkABC(abc.ABC):
@classmethod
def new_from_fn(
cls,
fn: Callable[[Timer], TensorDict],
) -> "BenchmarkABC":
class FnBenchmark(BenchmarkABC):
@classmethod
def new(cls) -> "FnBenchmark":
return FnBenchmark()

def run_instance(self, timer: Timer) -> TensorDict:
return fn(timer)

return FnBenchmark()

@classmethod
@abc.abstractmethod
def new(cls: type[Self]) -> Self:
"""
Initialize any state needed for the benchmark.
This will be called once before the benchmark is run.
"""
pass

@classmethod
def new_for_regression(cls: type[Self]) -> Self | None:
"""
Initialize any state needed for regression testing.
This will be called once before regression tests are run.

If regression testing is not needed, this can return None,
and regression testing will not be run.

This exists as a separate method from new so that it can
use small data sizes more conducive to storing regression targets in git.
"""
return None

@classmethod
def run_benchmark(cls, iters=10, warmup=1) -> BenchmarkResult:
if not torch.cuda.is_available():
raise RuntimeError("CUDA is not available, cannot run benchmark.")
null_timer = NullTimer()
benchmark = cls.new()
for _ in range(warmup):
benchmark.run_instance(null_timer)
timer = CUDATimer()
with benchmark_memory() as bm:
for _ in range(iters):
with timer:
benchmark.run_instance(timer)
return BenchmarkResult(
timer=timer.result,
memory=bm.result,
)

@classmethod
def run_regression(cls) -> TensorDict | None:
benchmark = cls.new_for_regression()
if benchmark is None:
return None
null_timer = NullTimer()
return benchmark.run_instance(null_timer)

@abc.abstractmethod
def run_instance(self: Self, timer: Timer) -> TensorDict:
"""
Run the benchmark. This will be called multiple times,
and should return a TensorDict of results.

This must not mutate any state on self, since the same instance may be
used across multiple iterations.
"""
pass


_BENCHMARKS: dict[str, type[BenchmarkABC]] = {}


def register_benchmark(name: str) -> Callable[[type[BenchmarkABC]], type[BenchmarkABC]]:
def _register(fn: type[BenchmarkABC]) -> type[BenchmarkABC]:
if name in _BENCHMARKS:
raise ValueError(f"Benchmark with name '{name}' is already registered.")
_BENCHMARKS[name] = fn
return fn

return _register


def get_benchmarks() -> dict[str, type[BenchmarkABC]]:
return _BENCHMARKS.copy()
Loading