From c697e81ab5a11eb3903a79a45875f6d21795cc0f Mon Sep 17 00:00:00 2001 From: Sunny <99381187+sunnyc0206@users.noreply.github.com> Date: Wed, 22 Apr 2026 20:48:32 +0530 Subject: [PATCH 1/2] Update spectral radius logging in README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index afc5517..08ac50f 100644 --- a/README.md +++ b/README.md @@ -91,8 +91,9 @@ out = model.generate(ids, max_new_tokens=8, n_loops=8) print(f"[{attn_type.upper()}] Generated shape: {out.shape}") A = model.recurrent.injection.get_A() +rho = torch.linalg.eigvals(A).abs().max().item() print( - f"[{attn_type.upper()}] Spectral radius ρ(A) max: {A.max().item():.4f} (must be < 1)" + f"[{attn_type.upper()}] Spectral radius ρ(A): {rho:.4f} (must be < 1)" ) ``` From 7eb3a5bc9a96467922ba7b96b7b401fae25a2cc4 Mon Sep 17 00:00:00 2001 From: Sunny <99381187+sunnyc0206@users.noreply.github.com> Date: Thu, 23 Apr 2026 10:15:34 +0530 Subject: [PATCH 2/2] Add assertion to check for NaN loss --- examples/moda_example.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/moda_example.py b/examples/moda_example.py index bffc92e..c9a8ff9 100644 --- a/examples/moda_example.py +++ b/examples/moda_example.py @@ -39,6 +39,7 @@ labels = torch.randint(0, cfg.vocab_size, (B, T), device=device) logits, loss = model(input_ids, labels) + assert not torch.isnan(loss), "Loss is NaN!" assert logits.shape == (B, T, cfg.vocab_size) print(f"Logits shape : {logits.shape}") print(f"Loss (LM + balance): {loss.item():.4f}")