From a2e16397b1314888d321bc29502d0660eb30ca81 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Fri, 27 Feb 2026 09:49:05 +0000 Subject: [PATCH] Add challenge 74: N-body Gravitational Force (Medium) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Teaches the all-pairs O(N²) parallel computation pattern: each output depends on all N inputs, requiring shared-memory tiling to avoid the global-memory bandwidth bottleneck. Force uses the softened gravity formula with ε = 1e-3, tested on sizes up to N = 8,192. Co-Authored-By: Claude Sonnet 4.6 --- .../medium/74_n_body_force/challenge.html | 58 ++++++++ .../medium/74_n_body_force/challenge.py | 131 ++++++++++++++++++ .../medium/74_n_body_force/starter/starter.cu | 4 + .../74_n_body_force/starter/starter.cute.py | 8 ++ .../74_n_body_force/starter/starter.jax.py | 9 ++ .../74_n_body_force/starter/starter.mojo | 7 + .../starter/starter.pytorch.py | 6 + .../74_n_body_force/starter/starter.triton.py | 8 ++ 8 files changed, 231 insertions(+) create mode 100644 challenges/medium/74_n_body_force/challenge.html create mode 100644 challenges/medium/74_n_body_force/challenge.py create mode 100644 challenges/medium/74_n_body_force/starter/starter.cu create mode 100644 challenges/medium/74_n_body_force/starter/starter.cute.py create mode 100644 challenges/medium/74_n_body_force/starter/starter.jax.py create mode 100644 challenges/medium/74_n_body_force/starter/starter.mojo create mode 100644 challenges/medium/74_n_body_force/starter/starter.pytorch.py create mode 100644 challenges/medium/74_n_body_force/starter/starter.triton.py diff --git a/challenges/medium/74_n_body_force/challenge.html b/challenges/medium/74_n_body_force/challenge.html new file mode 100644 index 00000000..144e40f5 --- /dev/null +++ b/challenges/medium/74_n_body_force/challenge.html @@ -0,0 +1,58 @@ +

+ Given N bodies in 3D space, each with a position vector + (x, y, z) and a scalar mass, compute the gravitational force acting on each body + due to all other bodies. The softened gravitational force on body i is: +

+

+ \[ + \mathbf{F}_i = \sum_{j \neq i} m_j + \frac{\mathbf{r}_j - \mathbf{r}_i} + {\left(|\mathbf{r}_j - \mathbf{r}_i|^2 + \varepsilon^2\right)^{3/2}} + \] +

+

+ where \(\varepsilon = 10^{-3}\) is a softening factor that prevents singularities when two + bodies occupy the same position. Positions and forces are stored in row-major order: + array[i * 3 + k] is the k-th coordinate of body i. +

+ +

Implementation Requirements

+ + +

Example

+

+ Input (N = 2):
+ positions: + \[ + \begin{bmatrix} + 0.0 & 0.0 & 0.0 \\ + 0.0 & 3.0 & 4.0 + \end{bmatrix} + \] + masses: + \[ + \begin{bmatrix} + 2.0 & 1.0 + \end{bmatrix} + \] + Output:
+ forces: + \[ + \begin{bmatrix} + 0.000 & 0.024 & 0.032 \\ + 0.000 & -0.048 & -0.064 + \end{bmatrix} + \] +

+ +

Constraints

+ diff --git a/challenges/medium/74_n_body_force/challenge.py b/challenges/medium/74_n_body_force/challenge.py new file mode 100644 index 00000000..b4557028 --- /dev/null +++ b/challenges/medium/74_n_body_force/challenge.py @@ -0,0 +1,131 @@ +import ctypes +from typing import Any, Dict, List + +import torch +from core.challenge_base import ChallengeBase + +_EPS = 1e-3 + + +class Challenge(ChallengeBase): + def __init__(self): + super().__init__( + name="N-body Gravitational Force", + atol=1e-2, + rtol=1e-2, + num_gpus=1, + access_tier="free", + ) + + def reference_impl( + self, + positions: torch.Tensor, + masses: torch.Tensor, + forces: torch.Tensor, + N: int, + ): + assert positions.shape == ( + N, + 3, + ), f"Expected positions.shape=({N}, 3), got {positions.shape}" + assert masses.shape == (N,), f"Expected masses.shape=({N},), got {masses.shape}" + assert forces.shape == (N, 3), f"Expected forces.shape=({N}, 3), got {forces.shape}" + assert positions.dtype == torch.float32 + assert masses.dtype == torch.float32 + assert forces.dtype == torch.float32 + assert positions.device.type == "cuda" + + CHUNK = 1024 + result = torch.zeros((N, 3), device=positions.device, dtype=positions.dtype) + for start in range(0, N, CHUNK): + end = min(start + CHUNK, N) + # r[i, k] = positions[start+k] - positions[i]: vector from particle i to source k + # positions[start:end].unsqueeze(0): [1, C, 3] (C = end-start) + # positions.unsqueeze(1): [N, 1, 3] + # broadcast result: [N, C, 3] + r = positions[start:end].unsqueeze(0) - positions.unsqueeze(1) # [N, C, 3] + dist_sq = (r * r).sum(dim=2) # [N, C] + denom = (dist_sq + _EPS * _EPS) ** 1.5 # [N, C] + mass_chunk = masses[start:end] # [C] + result += (mass_chunk.view(1, -1, 1) * r / denom.unsqueeze(2)).sum(dim=1) + forces.copy_(result) + + def get_solve_signature(self) -> Dict[str, tuple]: + return { + "positions": (ctypes.POINTER(ctypes.c_float), "in"), + "masses": (ctypes.POINTER(ctypes.c_float), "in"), + "forces": (ctypes.POINTER(ctypes.c_float), "out"), + "N": (ctypes.c_int, "in"), + } + + def generate_example_test(self) -> Dict[str, Any]: + dtype = torch.float32 + positions = torch.tensor([[0.0, 0.0, 0.0], [0.0, 3.0, 4.0]], device="cuda", dtype=dtype) + masses = torch.tensor([2.0, 1.0], device="cuda", dtype=dtype) + forces = torch.zeros((2, 3), device="cuda", dtype=dtype) + return {"positions": positions, "masses": masses, "forces": forces, "N": 2} + + def _make_test(self, N: int, pos_range: float = 5.0) -> Dict[str, Any]: + dtype = torch.float32 + positions = torch.empty((N, 3), device="cuda", dtype=dtype).uniform_(-pos_range, pos_range) + masses = torch.empty(N, device="cuda", dtype=dtype).uniform_(0.5, 2.0) + forces = torch.zeros((N, 3), device="cuda", dtype=dtype) + return {"positions": positions, "masses": masses, "forces": forces, "N": N} + + def generate_functional_test(self) -> List[Dict[str, Any]]: + dtype = torch.float32 + tests = [] + + # N=1: single particle — no other bodies, forces must be zero + tests.append( + { + "positions": torch.tensor([[1.0, 2.0, 3.0]], device="cuda", dtype=dtype), + "masses": torch.tensor([1.5], device="cuda", dtype=dtype), + "forces": torch.zeros((1, 3), device="cuda", dtype=dtype), + "N": 1, + } + ) + + # N=2: one particle at a negative position + tests.append( + { + "positions": torch.tensor( + [[-3.0, 0.0, 0.0], [0.0, 0.0, 0.0]], device="cuda", dtype=dtype + ), + "masses": torch.tensor([1.0, 2.0], device="cuda", dtype=dtype), + "forces": torch.zeros((2, 3), device="cuda", dtype=dtype), + "N": 2, + } + ) + + # N=4: all particles co-located at the origin — zero displacement gives zero forces + tests.append( + { + "positions": torch.zeros((4, 3), device="cuda", dtype=dtype), + "masses": torch.tensor([1.0, 2.0, 3.0, 4.0], device="cuda", dtype=dtype), + "forces": torch.zeros((4, 3), device="cuda", dtype=dtype), + "N": 4, + } + ) + + # Power-of-2 sizes + tests.append(self._make_test(16)) + tests.append(self._make_test(256)) + + # Non-power-of-2 sizes + tests.append(self._make_test(30)) + tests.append(self._make_test(100)) + tests.append(self._make_test(255)) + + # Realistic size + tests.append(self._make_test(1024)) + + return tests + + def generate_performance_test(self) -> Dict[str, Any]: + N = 8192 + dtype = torch.float32 + positions = torch.empty((N, 3), device="cuda", dtype=dtype).uniform_(-10.0, 10.0) + masses = torch.empty(N, device="cuda", dtype=dtype).uniform_(0.1, 10.0) + forces = torch.zeros((N, 3), device="cuda", dtype=dtype) + return {"positions": positions, "masses": masses, "forces": forces, "N": N} diff --git a/challenges/medium/74_n_body_force/starter/starter.cu b/challenges/medium/74_n_body_force/starter/starter.cu new file mode 100644 index 00000000..c75b13fc --- /dev/null +++ b/challenges/medium/74_n_body_force/starter/starter.cu @@ -0,0 +1,4 @@ +#include + +// positions, masses, forces are device pointers +extern "C" void solve(const float* positions, const float* masses, float* forces, int N) {} diff --git a/challenges/medium/74_n_body_force/starter/starter.cute.py b/challenges/medium/74_n_body_force/starter/starter.cute.py new file mode 100644 index 00000000..096aa529 --- /dev/null +++ b/challenges/medium/74_n_body_force/starter/starter.cute.py @@ -0,0 +1,8 @@ +import cutlass +import cutlass.cute as cute + + +# positions, masses, forces are tensors on the GPU +@cute.jit +def solve(positions: cute.Tensor, masses: cute.Tensor, forces: cute.Tensor, N: cute.Uint32): + pass diff --git a/challenges/medium/74_n_body_force/starter/starter.jax.py b/challenges/medium/74_n_body_force/starter/starter.jax.py new file mode 100644 index 00000000..73bdd177 --- /dev/null +++ b/challenges/medium/74_n_body_force/starter/starter.jax.py @@ -0,0 +1,9 @@ +import jax +import jax.numpy as jnp + + +# positions, masses are tensors on GPU +@jax.jit +def solve(positions: jax.Array, masses: jax.Array, N: int) -> jax.Array: + # return output tensor directly + pass diff --git a/challenges/medium/74_n_body_force/starter/starter.mojo b/challenges/medium/74_n_body_force/starter/starter.mojo new file mode 100644 index 00000000..f5d974c7 --- /dev/null +++ b/challenges/medium/74_n_body_force/starter/starter.mojo @@ -0,0 +1,7 @@ +from gpu.host import DeviceContext +from memory import UnsafePointer + +# positions, masses, forces are device pointers +@export +def solve(positions: UnsafePointer[Float32], masses: UnsafePointer[Float32], forces: UnsafePointer[Float32], N: Int32): + pass diff --git a/challenges/medium/74_n_body_force/starter/starter.pytorch.py b/challenges/medium/74_n_body_force/starter/starter.pytorch.py new file mode 100644 index 00000000..1998585f --- /dev/null +++ b/challenges/medium/74_n_body_force/starter/starter.pytorch.py @@ -0,0 +1,6 @@ +import torch + + +# positions, masses, forces are tensors on the GPU +def solve(positions: torch.Tensor, masses: torch.Tensor, forces: torch.Tensor, N: int): + pass diff --git a/challenges/medium/74_n_body_force/starter/starter.triton.py b/challenges/medium/74_n_body_force/starter/starter.triton.py new file mode 100644 index 00000000..0ba5c02f --- /dev/null +++ b/challenges/medium/74_n_body_force/starter/starter.triton.py @@ -0,0 +1,8 @@ +import torch +import triton +import triton.language as tl + + +# positions, masses, forces are tensors on the GPU +def solve(positions: torch.Tensor, masses: torch.Tensor, forces: torch.Tensor, N: int): + pass