update adamax optimizer question to new format

mavleo96 · mavleo96 · commit 1033d3ab0af5 · 2025-06-20T22:42:38.000-04:00
diff --git a/Problems/X_adamax_optimizer/solution.py b/Problems/X_adamax_optimizer/solution.py
diff --git a/questions/x_adamax-optimizer/description.md b/questions/x_adamax-optimizer/description.md
@@ -0,0 +1 @@
+Implement the Adamax optimizer update step function. Your function should take the current parameter value, gradient, and moving averages as inputs, and return the updated parameter value and new moving averages. The function should also handle scalar and array inputs and include bias correction for the moving averages.
diff --git a/questions/x_adamax-optimizer/example.json b/questions/x_adamax-optimizer/example.json
@@ -0,0 +1,5 @@
+{
+  "input": "parameter = 1.0, grad = 0.1, m = 0.0, u = 0.0, t = 1",
+  "output": "(0.998, 0.01, 0.1)",
+  "reasoning": "The Adamax optimizer computes updated values for the parameter, first moment (m), and infinity norm (u) using bias-corrected estimates of gradients. With input values parameter=1.0, grad=0.1, m=0.0, u=0.0, and t=1, the updated parameter becomes 0.998, the updated m becomes 0.01, and the updated u becomes 0.1."
+}
diff --git a/questions/x_adamax-optimizer/learn.md b/questions/x_adamax-optimizer/learn.md
diff --git a/questions/x_adamax-optimizer/meta.json b/questions/x_adamax-optimizer/meta.json
@@ -0,0 +1,17 @@
+{
+  "id": "X",
+  "title": "Adamax Optimizer",
+  "difficulty": "easy",
+  "category": "Deep Learning",
+  "video": "",
+  "likes": "0",
+  "dislikes": "0",
+  "contributor": [
+    {
+      "profile_link": "https://github.com/mavleo96",
+      "name": "Vijayabharathi Murugan"
+    }
+  ],
+  "tinygrad_difficulty": null,
+  "pytorch_difficulty": null
+}
diff --git a/questions/x_adamax-optimizer/solution.py b/questions/x_adamax-optimizer/solution.py
@@ -0,0 +1,43 @@
+import numpy as np
+
+def adamax_optimizer(parameter, grad, m, u, t, learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8):
+    """
+    Update parameters using the Adamax optimizer.
+    Adamax is a variant of Adam based on the infinity norm.
+    It uses the maximum of past squared gradients instead of the exponential moving average.
+
+    Args:
+        parameter: Current parameter value
+        grad: Current gradient
+        m: First moment estimate
+        u: Infinity norm estimate
+        t: Current timestep
+        learning_rate: Learning rate (default=0.002)
+        beta1: First moment decay rate (default=0.9)
+        beta2: Infinity norm decay rate (default=0.999)
+        epsilon: Small constant for numerical stability (default=1e-8)
+
+    Returns:
+        tuple: (updated_parameter, updated_m, updated_u)
+    """
+    assert learning_rate > 0, "Learning rate must be positive"
+    assert 0 <= beta1 < 1, "Beta1 must be between 0 and 1"
+    assert 0 <= beta2 < 1, "Beta2 must be between 0 and 1"
+    assert epsilon > 0, "Epsilon must be positive"
+    assert all(m >= 0) if isinstance(m, np.ndarray) else m >= 0, "m must be non-negative"
+    assert all(u >= 0) if isinstance(u, np.ndarray) else u >= 0, "u must be non-negative"
+
+    # Update biased first moment estimate
+    m = beta1 * m + (1 - beta1) * grad
+
+    # Update infinity norm estimate
+    u = np.maximum(beta2 * u, np.abs(grad))
+
+    # Compute bias-corrected first moment estimate
+    m_hat = m / (1 - beta1**t)
+
+    # Update parameters
+    update = learning_rate * m_hat / (u + epsilon)
+    parameter = parameter - update
+
+    return np.round(parameter, 5), np.round(m, 5), np.round(u, 5)
diff --git a/questions/x_adamax-optimizer/starter_code.py b/questions/x_adamax-optimizer/starter_code.py
@@ -0,0 +1,24 @@
+import numpy as np
+
+def adamax_optimizer(parameter, grad, m, u, t, learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8):
+    """
+    Update parameters using the Adamax optimizer.
+    Adamax is a variant of Adam based on the infinity norm.
+    It uses the maximum of past squared gradients instead of the exponential moving average.
+
+    Args:
+        parameter: Current parameter value
+        grad: Current gradient
+        m: First moment estimate
+        u: Infinity norm estimate
+        t: Current timestep
+        learning_rate: Learning rate (default=0.002)
+        beta1: First moment decay rate (default=0.9)
+        beta2: Infinity norm decay rate (default=0.999)
+        epsilon: Small constant for numerical stability (default=1e-8)
+
+    Returns:
+        tuple: (updated_parameter, updated_m, updated_u)
+    """
+	# Your code here
+    return np.round(parameter, 5), np.round(m, 5), np.round(u, 5)
diff --git a/questions/x_adamax-optimizer/tests.json b/questions/x_adamax-optimizer/tests.json
@@ -0,0 +1,18 @@
+[
+  {
+    "test": "print(adamax_optimizer(1., 0.1, 1., 1., 1, 0.002, 0.9, 0.999, 1e-8))",
+    "expected_output": "(0.98178, 0.91, 0.999)"
+  },
+  {
+    "test": "print(adamax_optimizer(np.array([1., 2.]), np.array([0.1, 0.2]), np.array([1., 1.]), np.array([1., 1.]), 1, 0.002, 0.9, 0.999, 1e-8))",
+    "expected_output": "(array([0.98178, 1.98158]), array([0.91, 0.92]), array([0.999, 0.999]))"
+  },
+  {
+    "test": "print(adamax_optimizer(np.array([1., 2.]), np.array([0.0, 0.0]), np.array([0.1, 0.1]), np.array([0., 0.]), 1, 0.002, 0.9, 0.999, 1e-8))",
+    "expected_output": "(array([-179999., -179998.]), array([0.09, 0.09]), array([0., 0.]))"
+  },
+  {
+    "test": "print(adamax_optimizer(1., 0.1, 1., 1., 1, 0.002, 0., 0., 1e-8))",
+    "expected_output": "(0.998, 0.1, 0.1)"
+  }
+]

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Implement the Adamax optimizer update step function. Your function should take the current parameter value, gradient, and moving averages as inputs, and return the updated parameter value and new moving averages. The function should also handle scalar and array inputs and include bias correction for the moving averages.`