diff --git a/challenges/medium/74_n_body_force/challenge.html b/challenges/medium/74_n_body_force/challenge.html
new file mode 100644
index 00000000..144e40f5
--- /dev/null
+++ b/challenges/medium/74_n_body_force/challenge.html
@@ -0,0 +1,58 @@
+
+ Given N bodies in 3D space, each with a position vector
+ (x, y, z) and a scalar mass, compute the gravitational force acting on each body
+ due to all other bodies. The softened gravitational force on body i is:
+
+
+ \[
+ \mathbf{F}_i = \sum_{j \neq i} m_j
+ \frac{\mathbf{r}_j - \mathbf{r}_i}
+ {\left(|\mathbf{r}_j - \mathbf{r}_i|^2 + \varepsilon^2\right)^{3/2}}
+ \]
+
+
+ where \(\varepsilon = 10^{-3}\) is a softening factor that prevents singularities when two
+ bodies occupy the same position. Positions and forces are stored in row-major order:
+ array[i * 3 + k] is the k-th coordinate of body i.
+
+
+Implementation Requirements
+
+ - Use only native GPU features (external libraries are not permitted)
+ - The
solve function signature must remain unchanged
+ - The computed force for each body must be written to
forces
+
+
+Example
+
+ Input (N = 2):
+ positions:
+ \[
+ \begin{bmatrix}
+ 0.0 & 0.0 & 0.0 \\
+ 0.0 & 3.0 & 4.0
+ \end{bmatrix}
+ \]
+ masses:
+ \[
+ \begin{bmatrix}
+ 2.0 & 1.0
+ \end{bmatrix}
+ \]
+ Output:
+ forces:
+ \[
+ \begin{bmatrix}
+ 0.000 & 0.024 & 0.032 \\
+ 0.000 & -0.048 & -0.064
+ \end{bmatrix}
+ \]
+
+
+Constraints
+
+ - 1 ≤
N ≤ 65,536
+ - Positions and forces are
float32 arrays of shape N × 3 in row-major order
+ - 0.0 <
masses[i] ≤ 100.0
+ - Performance is measured with
N = 8,192
+
diff --git a/challenges/medium/74_n_body_force/challenge.py b/challenges/medium/74_n_body_force/challenge.py
new file mode 100644
index 00000000..b4557028
--- /dev/null
+++ b/challenges/medium/74_n_body_force/challenge.py
@@ -0,0 +1,131 @@
+import ctypes
+from typing import Any, Dict, List
+
+import torch
+from core.challenge_base import ChallengeBase
+
+_EPS = 1e-3
+
+
+class Challenge(ChallengeBase):
+ def __init__(self):
+ super().__init__(
+ name="N-body Gravitational Force",
+ atol=1e-2,
+ rtol=1e-2,
+ num_gpus=1,
+ access_tier="free",
+ )
+
+ def reference_impl(
+ self,
+ positions: torch.Tensor,
+ masses: torch.Tensor,
+ forces: torch.Tensor,
+ N: int,
+ ):
+ assert positions.shape == (
+ N,
+ 3,
+ ), f"Expected positions.shape=({N}, 3), got {positions.shape}"
+ assert masses.shape == (N,), f"Expected masses.shape=({N},), got {masses.shape}"
+ assert forces.shape == (N, 3), f"Expected forces.shape=({N}, 3), got {forces.shape}"
+ assert positions.dtype == torch.float32
+ assert masses.dtype == torch.float32
+ assert forces.dtype == torch.float32
+ assert positions.device.type == "cuda"
+
+ CHUNK = 1024
+ result = torch.zeros((N, 3), device=positions.device, dtype=positions.dtype)
+ for start in range(0, N, CHUNK):
+ end = min(start + CHUNK, N)
+ # r[i, k] = positions[start+k] - positions[i]: vector from particle i to source k
+ # positions[start:end].unsqueeze(0): [1, C, 3] (C = end-start)
+ # positions.unsqueeze(1): [N, 1, 3]
+ # broadcast result: [N, C, 3]
+ r = positions[start:end].unsqueeze(0) - positions.unsqueeze(1) # [N, C, 3]
+ dist_sq = (r * r).sum(dim=2) # [N, C]
+ denom = (dist_sq + _EPS * _EPS) ** 1.5 # [N, C]
+ mass_chunk = masses[start:end] # [C]
+ result += (mass_chunk.view(1, -1, 1) * r / denom.unsqueeze(2)).sum(dim=1)
+ forces.copy_(result)
+
+ def get_solve_signature(self) -> Dict[str, tuple]:
+ return {
+ "positions": (ctypes.POINTER(ctypes.c_float), "in"),
+ "masses": (ctypes.POINTER(ctypes.c_float), "in"),
+ "forces": (ctypes.POINTER(ctypes.c_float), "out"),
+ "N": (ctypes.c_int, "in"),
+ }
+
+ def generate_example_test(self) -> Dict[str, Any]:
+ dtype = torch.float32
+ positions = torch.tensor([[0.0, 0.0, 0.0], [0.0, 3.0, 4.0]], device="cuda", dtype=dtype)
+ masses = torch.tensor([2.0, 1.0], device="cuda", dtype=dtype)
+ forces = torch.zeros((2, 3), device="cuda", dtype=dtype)
+ return {"positions": positions, "masses": masses, "forces": forces, "N": 2}
+
+ def _make_test(self, N: int, pos_range: float = 5.0) -> Dict[str, Any]:
+ dtype = torch.float32
+ positions = torch.empty((N, 3), device="cuda", dtype=dtype).uniform_(-pos_range, pos_range)
+ masses = torch.empty(N, device="cuda", dtype=dtype).uniform_(0.5, 2.0)
+ forces = torch.zeros((N, 3), device="cuda", dtype=dtype)
+ return {"positions": positions, "masses": masses, "forces": forces, "N": N}
+
+ def generate_functional_test(self) -> List[Dict[str, Any]]:
+ dtype = torch.float32
+ tests = []
+
+ # N=1: single particle — no other bodies, forces must be zero
+ tests.append(
+ {
+ "positions": torch.tensor([[1.0, 2.0, 3.0]], device="cuda", dtype=dtype),
+ "masses": torch.tensor([1.5], device="cuda", dtype=dtype),
+ "forces": torch.zeros((1, 3), device="cuda", dtype=dtype),
+ "N": 1,
+ }
+ )
+
+ # N=2: one particle at a negative position
+ tests.append(
+ {
+ "positions": torch.tensor(
+ [[-3.0, 0.0, 0.0], [0.0, 0.0, 0.0]], device="cuda", dtype=dtype
+ ),
+ "masses": torch.tensor([1.0, 2.0], device="cuda", dtype=dtype),
+ "forces": torch.zeros((2, 3), device="cuda", dtype=dtype),
+ "N": 2,
+ }
+ )
+
+ # N=4: all particles co-located at the origin — zero displacement gives zero forces
+ tests.append(
+ {
+ "positions": torch.zeros((4, 3), device="cuda", dtype=dtype),
+ "masses": torch.tensor([1.0, 2.0, 3.0, 4.0], device="cuda", dtype=dtype),
+ "forces": torch.zeros((4, 3), device="cuda", dtype=dtype),
+ "N": 4,
+ }
+ )
+
+ # Power-of-2 sizes
+ tests.append(self._make_test(16))
+ tests.append(self._make_test(256))
+
+ # Non-power-of-2 sizes
+ tests.append(self._make_test(30))
+ tests.append(self._make_test(100))
+ tests.append(self._make_test(255))
+
+ # Realistic size
+ tests.append(self._make_test(1024))
+
+ return tests
+
+ def generate_performance_test(self) -> Dict[str, Any]:
+ N = 8192
+ dtype = torch.float32
+ positions = torch.empty((N, 3), device="cuda", dtype=dtype).uniform_(-10.0, 10.0)
+ masses = torch.empty(N, device="cuda", dtype=dtype).uniform_(0.1, 10.0)
+ forces = torch.zeros((N, 3), device="cuda", dtype=dtype)
+ return {"positions": positions, "masses": masses, "forces": forces, "N": N}
diff --git a/challenges/medium/74_n_body_force/starter/starter.cu b/challenges/medium/74_n_body_force/starter/starter.cu
new file mode 100644
index 00000000..c75b13fc
--- /dev/null
+++ b/challenges/medium/74_n_body_force/starter/starter.cu
@@ -0,0 +1,4 @@
+#include
+
+// positions, masses, forces are device pointers
+extern "C" void solve(const float* positions, const float* masses, float* forces, int N) {}
diff --git a/challenges/medium/74_n_body_force/starter/starter.cute.py b/challenges/medium/74_n_body_force/starter/starter.cute.py
new file mode 100644
index 00000000..096aa529
--- /dev/null
+++ b/challenges/medium/74_n_body_force/starter/starter.cute.py
@@ -0,0 +1,8 @@
+import cutlass
+import cutlass.cute as cute
+
+
+# positions, masses, forces are tensors on the GPU
+@cute.jit
+def solve(positions: cute.Tensor, masses: cute.Tensor, forces: cute.Tensor, N: cute.Uint32):
+ pass
diff --git a/challenges/medium/74_n_body_force/starter/starter.jax.py b/challenges/medium/74_n_body_force/starter/starter.jax.py
new file mode 100644
index 00000000..73bdd177
--- /dev/null
+++ b/challenges/medium/74_n_body_force/starter/starter.jax.py
@@ -0,0 +1,9 @@
+import jax
+import jax.numpy as jnp
+
+
+# positions, masses are tensors on GPU
+@jax.jit
+def solve(positions: jax.Array, masses: jax.Array, N: int) -> jax.Array:
+ # return output tensor directly
+ pass
diff --git a/challenges/medium/74_n_body_force/starter/starter.mojo b/challenges/medium/74_n_body_force/starter/starter.mojo
new file mode 100644
index 00000000..f5d974c7
--- /dev/null
+++ b/challenges/medium/74_n_body_force/starter/starter.mojo
@@ -0,0 +1,7 @@
+from gpu.host import DeviceContext
+from memory import UnsafePointer
+
+# positions, masses, forces are device pointers
+@export
+def solve(positions: UnsafePointer[Float32], masses: UnsafePointer[Float32], forces: UnsafePointer[Float32], N: Int32):
+ pass
diff --git a/challenges/medium/74_n_body_force/starter/starter.pytorch.py b/challenges/medium/74_n_body_force/starter/starter.pytorch.py
new file mode 100644
index 00000000..1998585f
--- /dev/null
+++ b/challenges/medium/74_n_body_force/starter/starter.pytorch.py
@@ -0,0 +1,6 @@
+import torch
+
+
+# positions, masses, forces are tensors on the GPU
+def solve(positions: torch.Tensor, masses: torch.Tensor, forces: torch.Tensor, N: int):
+ pass
diff --git a/challenges/medium/74_n_body_force/starter/starter.triton.py b/challenges/medium/74_n_body_force/starter/starter.triton.py
new file mode 100644
index 00000000..0ba5c02f
--- /dev/null
+++ b/challenges/medium/74_n_body_force/starter/starter.triton.py
@@ -0,0 +1,8 @@
+import torch
+import triton
+import triton.language as tl
+
+
+# positions, masses, forces are tensors on the GPU
+def solve(positions: torch.Tensor, masses: torch.Tensor, forces: torch.Tensor, N: int):
+ pass