update nesterov optimizer to new format

mavleo96 · mavleo96 · commit 6e18f91fb4c1 · 2025-06-28T16:42:06.000-04:00
diff --git a/Problems/X_nag_optimizer/solution.py b/Problems/X_nag_optimizer/solution.py
diff --git a/questions/x_nesterov-accelerated-gradient-optimizer/description.md b/questions/x_nesterov-accelerated-gradient-optimizer/description.md
@@ -0,0 +1 @@
+Implement the Nesterov Accelerated Gradient (NAG) optimizer update step function. Your function should take the current parameter value, gradient function, and velocity as inputs, and return the updated parameter value and new velocity. The function should use the "look-ahead" approach where momentum is applied before computing the gradient, and should handle both scalar and array inputs.
diff --git a/questions/x_nesterov-accelerated-gradient-optimizer/example.json b/questions/x_nesterov-accelerated-gradient-optimizer/example.json
@@ -0,0 +1,5 @@
+{
+  "input": "parameter = 1.0, grad_fn = lambda x: x, velocity = 0.1",
+  "output": "(0.9009, 0.0991)",
+  "reasoning": "The Nesterov Accelerated Gradient optimizer computes updated values for the parameter and velocity using a look-ahead approach. With input values parameter=1.0, grad_fn=lambda x: x, and velocity=0.1, the updated parameter becomes 0.9009 and the updated velocity becomes 0.0991."
+}
diff --git a/questions/x_nesterov-accelerated-gradient-optimizer/learn.md b/questions/x_nesterov-accelerated-gradient-optimizer/learn.md
@@ -54,7 +54,7 @@ def grad_func(parameter):
     pass
 
 parameter = 1.0
-velocity = 0.0
+velocity = 0.1
 
 new_param, new_velocity = nag_optimizer(parameter, grad_func, velocity)
 ```
diff --git a/questions/x_nesterov-accelerated-gradient-optimizer/meta.json b/questions/x_nesterov-accelerated-gradient-optimizer/meta.json
@@ -0,0 +1,17 @@
+{
+  "id": "X",
+  "title": "Nesterov Accelerated Gradient Optimizer",
+  "difficulty": "easy",
+  "category": "Deep Learning",
+  "video": "",
+  "likes": "0",
+  "dislikes": "0",
+  "contributor": [
+    {
+      "profile_link": "https://github.com/mavleo96",
+      "name": "Vijayabharathi Murugan"
+    }
+  ],
+  "tinygrad_difficulty": null,
+  "pytorch_difficulty": null
+}
diff --git a/questions/x_nesterov-accelerated-gradient-optimizer/solution.py b/questions/x_nesterov-accelerated-gradient-optimizer/solution.py
@@ -0,0 +1,33 @@
+import numpy as np
+
+def nag_optimizer(parameter, grad_fn, velocity, learning_rate=0.01, momentum=0.9):
+    """
+    Update parameters using the Nesterov Accelerated Gradient optimizer.
+    Uses a "look-ahead" approach to improve convergence by applying momentum before computing the gradient.
+
+    Args:
+        parameter: Current parameter value
+        grad_fn: Function that computes the gradient at a given position
+        velocity: Current velocity (momentum term)
+        learning_rate: Learning rate (default=0.01)
+        momentum: Momentum coefficient (default=0.9)
+
+    Returns:
+        tuple: (updated_parameter, updated_velocity)
+    """
+    assert 0 <= momentum < 1, "Momentum must be between 0 and 1"
+    assert learning_rate > 0, "Learning rate must be positive"
+
+    # Compute look-ahead position
+    look_ahead = parameter - momentum * velocity
+    
+    # Compute gradient at look-ahead position
+    grad = grad_fn(look_ahead)
+    
+    # Update velocity using momentum and gradient
+    velocity = momentum * velocity + learning_rate * grad
+    
+    # Update parameters using the new velocity
+    parameter = parameter - velocity
+
+    return np.round(parameter, 5), np.round(velocity, 5)
diff --git a/questions/x_nesterov-accelerated-gradient-optimizer/starter_code.py b/questions/x_nesterov-accelerated-gradient-optimizer/starter_code.py
@@ -0,0 +1,19 @@
+import numpy as np
+
+def nag_optimizer(parameter, grad_fn, velocity, learning_rate=0.01, momentum=0.9):
+    """
+    Update parameters using the Nesterov Accelerated Gradient optimizer.
+    Uses a "look-ahead" approach to improve convergence by applying momentum before computing the gradient.
+
+    Args:
+        parameter: Current parameter value
+        grad_fn: Function that computes the gradient at a given position
+        velocity: Current velocity (momentum term)
+        learning_rate: Learning rate (default=0.01)
+        momentum: Momentum coefficient (default=0.9)
+
+    Returns:
+        tuple: (updated_parameter, updated_velocity)
+    """
+    # Your code here
+    return np.round(parameter, 5), np.round(velocity, 5)
diff --git a/questions/x_nesterov-accelerated-gradient-optimizer/tests.json b/questions/x_nesterov-accelerated-gradient-optimizer/tests.json
@@ -0,0 +1,18 @@
+[
+  {
+    "test": "import numpy as np\ndef gradient_function(x):\n    if isinstance(x, np.ndarray):\n        n = len(x)\n        return x - np.arange(n)\n    else:\n        return x - 0\nprint(nag_optimizer(1., gradient_function, 0.5, 0.01, 0.9))",
+    "expected_output": "(0.5445, 0.4555)"
+  },
+  {
+    "test": "import numpy as np\ndef gradient_function(x):\n    if isinstance(x, np.ndarray):\n        n = len(x)\n        return x - np.arange(n)\n    else:\n        return x - 0\nprint(nag_optimizer(np.array([1.0, 2.0]), gradient_function, np.array([0.5, 1.0]), 0.01, 0.9))",
+    "expected_output": "(array([0.5445, 1.099]), array([0.4555, 0.901]))"
+  },
+  {
+    "test": "import numpy as np\ndef gradient_function(x):\n    if isinstance(x, np.ndarray):\n        n = len(x)\n        return x - np.arange(n)\n    else:\n        return x - 0\nprint(nag_optimizer(np.array([1.0, 2.0]), gradient_function, np.array([0.5, 1.0]), 0.01, 0.0))",
+    "expected_output": "(array([0.99, 1.99]), array([0.01, 0.01]))"
+  },
+  {
+    "test": "import numpy as np\ndef gradient_function(x):\n    if isinstance(x, np.ndarray):\n        n = len(x)\n        return x - np.arange(n)\n    else:\n        return x - 0\nprint(nag_optimizer(0.9, gradient_function, 1, 0.01, 0.9))",
+    "expected_output": "(0.0, 0.9)"
+  }
+]

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Implement the Nesterov Accelerated Gradient (NAG) optimizer update step function. Your function should take the current parameter value, gradient function, and velocity as inputs, and return the updated parameter value and new velocity. The function should use the "look-ahead" approach where momentum is applied before computing the gradient, and should handle both scalar and array inputs.`