Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions challenges/medium/74_n_body_force/challenge.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<p>
Given <code>N</code> bodies in 3D space, each with a position vector
<code>(x, y, z)</code> and a scalar mass, compute the gravitational force acting on each body
due to all other bodies. The softened gravitational force on body <code>i</code> is:
</p>
<p>
\[
\mathbf{F}_i = \sum_{j \neq i} m_j
\frac{\mathbf{r}_j - \mathbf{r}_i}
{\left(|\mathbf{r}_j - \mathbf{r}_i|^2 + \varepsilon^2\right)^{3/2}}
\]
</p>
<p>
where \(\varepsilon = 10^{-3}\) is a softening factor that prevents singularities when two
bodies occupy the same position. Positions and forces are stored in row-major order:
<code>array[i * 3 + k]</code> is the <em>k</em>-th coordinate of body <code>i</code>.
</p>

<h2>Implementation Requirements</h2>
<ul>
<li>Use only native GPU features (external libraries are not permitted)</li>
<li>The <code>solve</code> function signature must remain unchanged</li>
<li>The computed force for each body must be written to <code>forces</code></li>
</ul>

<h2>Example</h2>
<p>
Input (<code>N</code> = 2):<br>
positions:
\[
\begin{bmatrix}
0.0 & 0.0 & 0.0 \\
0.0 & 3.0 & 4.0
\end{bmatrix}
\]
masses:
\[
\begin{bmatrix}
2.0 & 1.0
\end{bmatrix}
\]
Output:<br>
forces:
\[
\begin{bmatrix}
0.000 & 0.024 & 0.032 \\
0.000 & -0.048 & -0.064
\end{bmatrix}
\]
</p>

<h2>Constraints</h2>
<ul>
<li>1 &le; <code>N</code> &le; 65,536</li>
<li>Positions and forces are <code>float32</code> arrays of shape <code>N</code> &times; 3 in row-major order</li>
<li>0.0 &lt; <code>masses[i]</code> &le; 100.0</li>
<li>Performance is measured with <code>N</code> = 8,192</li>
</ul>
131 changes: 131 additions & 0 deletions challenges/medium/74_n_body_force/challenge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import ctypes
from typing import Any, Dict, List

import torch
from core.challenge_base import ChallengeBase

_EPS = 1e-3


class Challenge(ChallengeBase):
def __init__(self):
super().__init__(
name="N-body Gravitational Force",
atol=1e-2,
rtol=1e-2,
num_gpus=1,
access_tier="free",
)

def reference_impl(
self,
positions: torch.Tensor,
masses: torch.Tensor,
forces: torch.Tensor,
N: int,
):
assert positions.shape == (
N,
3,
), f"Expected positions.shape=({N}, 3), got {positions.shape}"
assert masses.shape == (N,), f"Expected masses.shape=({N},), got {masses.shape}"
assert forces.shape == (N, 3), f"Expected forces.shape=({N}, 3), got {forces.shape}"
assert positions.dtype == torch.float32
assert masses.dtype == torch.float32
assert forces.dtype == torch.float32
assert positions.device.type == "cuda"

CHUNK = 1024
result = torch.zeros((N, 3), device=positions.device, dtype=positions.dtype)
for start in range(0, N, CHUNK):
end = min(start + CHUNK, N)
# r[i, k] = positions[start+k] - positions[i]: vector from particle i to source k
# positions[start:end].unsqueeze(0): [1, C, 3] (C = end-start)
# positions.unsqueeze(1): [N, 1, 3]
# broadcast result: [N, C, 3]
r = positions[start:end].unsqueeze(0) - positions.unsqueeze(1) # [N, C, 3]
dist_sq = (r * r).sum(dim=2) # [N, C]
denom = (dist_sq + _EPS * _EPS) ** 1.5 # [N, C]
mass_chunk = masses[start:end] # [C]
result += (mass_chunk.view(1, -1, 1) * r / denom.unsqueeze(2)).sum(dim=1)
forces.copy_(result)

def get_solve_signature(self) -> Dict[str, tuple]:
return {
"positions": (ctypes.POINTER(ctypes.c_float), "in"),
"masses": (ctypes.POINTER(ctypes.c_float), "in"),
"forces": (ctypes.POINTER(ctypes.c_float), "out"),
"N": (ctypes.c_int, "in"),
}

def generate_example_test(self) -> Dict[str, Any]:
dtype = torch.float32
positions = torch.tensor([[0.0, 0.0, 0.0], [0.0, 3.0, 4.0]], device="cuda", dtype=dtype)
masses = torch.tensor([2.0, 1.0], device="cuda", dtype=dtype)
forces = torch.zeros((2, 3), device="cuda", dtype=dtype)
return {"positions": positions, "masses": masses, "forces": forces, "N": 2}

def _make_test(self, N: int, pos_range: float = 5.0) -> Dict[str, Any]:
dtype = torch.float32
positions = torch.empty((N, 3), device="cuda", dtype=dtype).uniform_(-pos_range, pos_range)
masses = torch.empty(N, device="cuda", dtype=dtype).uniform_(0.5, 2.0)
forces = torch.zeros((N, 3), device="cuda", dtype=dtype)
return {"positions": positions, "masses": masses, "forces": forces, "N": N}

def generate_functional_test(self) -> List[Dict[str, Any]]:
dtype = torch.float32
tests = []

# N=1: single particle — no other bodies, forces must be zero
tests.append(
{
"positions": torch.tensor([[1.0, 2.0, 3.0]], device="cuda", dtype=dtype),
"masses": torch.tensor([1.5], device="cuda", dtype=dtype),
"forces": torch.zeros((1, 3), device="cuda", dtype=dtype),
"N": 1,
}
)

# N=2: one particle at a negative position
tests.append(
{
"positions": torch.tensor(
[[-3.0, 0.0, 0.0], [0.0, 0.0, 0.0]], device="cuda", dtype=dtype
),
"masses": torch.tensor([1.0, 2.0], device="cuda", dtype=dtype),
"forces": torch.zeros((2, 3), device="cuda", dtype=dtype),
"N": 2,
}
)

# N=4: all particles co-located at the origin — zero displacement gives zero forces
tests.append(
{
"positions": torch.zeros((4, 3), device="cuda", dtype=dtype),
"masses": torch.tensor([1.0, 2.0, 3.0, 4.0], device="cuda", dtype=dtype),
"forces": torch.zeros((4, 3), device="cuda", dtype=dtype),
"N": 4,
}
)

# Power-of-2 sizes
tests.append(self._make_test(16))
tests.append(self._make_test(256))

# Non-power-of-2 sizes
tests.append(self._make_test(30))
tests.append(self._make_test(100))
tests.append(self._make_test(255))

# Realistic size
tests.append(self._make_test(1024))

return tests

def generate_performance_test(self) -> Dict[str, Any]:
N = 8192
dtype = torch.float32
positions = torch.empty((N, 3), device="cuda", dtype=dtype).uniform_(-10.0, 10.0)
masses = torch.empty(N, device="cuda", dtype=dtype).uniform_(0.1, 10.0)
forces = torch.zeros((N, 3), device="cuda", dtype=dtype)
return {"positions": positions, "masses": masses, "forces": forces, "N": N}
4 changes: 4 additions & 0 deletions challenges/medium/74_n_body_force/starter/starter.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#include <cuda_runtime.h>

// positions, masses, forces are device pointers
extern "C" void solve(const float* positions, const float* masses, float* forces, int N) {}
8 changes: 8 additions & 0 deletions challenges/medium/74_n_body_force/starter/starter.cute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import cutlass
import cutlass.cute as cute


# positions, masses, forces are tensors on the GPU
@cute.jit
def solve(positions: cute.Tensor, masses: cute.Tensor, forces: cute.Tensor, N: cute.Uint32):
pass
9 changes: 9 additions & 0 deletions challenges/medium/74_n_body_force/starter/starter.jax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import jax
import jax.numpy as jnp


# positions, masses are tensors on GPU
@jax.jit
def solve(positions: jax.Array, masses: jax.Array, N: int) -> jax.Array:
# return output tensor directly
pass
7 changes: 7 additions & 0 deletions challenges/medium/74_n_body_force/starter/starter.mojo
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from gpu.host import DeviceContext
from memory import UnsafePointer

# positions, masses, forces are device pointers
@export
def solve(positions: UnsafePointer[Float32], masses: UnsafePointer[Float32], forces: UnsafePointer[Float32], N: Int32):
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import torch


# positions, masses, forces are tensors on the GPU
def solve(positions: torch.Tensor, masses: torch.Tensor, forces: torch.Tensor, N: int):
pass
8 changes: 8 additions & 0 deletions challenges/medium/74_n_body_force/starter/starter.triton.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import torch
import triton
import triton.language as tl


# positions, masses, forces are tensors on the GPU
def solve(positions: torch.Tensor, masses: torch.Tensor, forces: torch.Tensor, N: int):
pass