Skip to content

Commit 1033d3a

Browse files
committed
update adamax optimizer question to new format
1 parent 5f21241 commit 1033d3a

File tree

8 files changed

+108
-122
lines changed

8 files changed

+108
-122
lines changed

Problems/X_adamax_optimizer/solution.py

Lines changed: 0 additions & 122 deletions
This file was deleted.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implement the Adamax optimizer update step function. Your function should take the current parameter value, gradient, and moving averages as inputs, and return the updated parameter value and new moving averages. The function should also handle scalar and array inputs and include bias correction for the moving averages.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"input": "parameter = 1.0, grad = 0.1, m = 0.0, u = 0.0, t = 1",
3+
"output": "(0.998, 0.01, 0.1)",
4+
"reasoning": "The Adamax optimizer computes updated values for the parameter, first moment (m), and infinity norm (u) using bias-corrected estimates of gradients. With input values parameter=1.0, grad=0.1, m=0.0, u=0.0, and t=1, the updated parameter becomes 0.998, the updated m becomes 0.01, and the updated u becomes 0.1."
5+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"id": "X",
3+
"title": "Adamax Optimizer",
4+
"difficulty": "easy",
5+
"category": "Deep Learning",
6+
"video": "",
7+
"likes": "0",
8+
"dislikes": "0",
9+
"contributor": [
10+
{
11+
"profile_link": "https://github.com/mavleo96",
12+
"name": "Vijayabharathi Murugan"
13+
}
14+
],
15+
"tinygrad_difficulty": null,
16+
"pytorch_difficulty": null
17+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import numpy as np
2+
3+
def adamax_optimizer(parameter, grad, m, u, t, learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8):
4+
"""
5+
Update parameters using the Adamax optimizer.
6+
Adamax is a variant of Adam based on the infinity norm.
7+
It uses the maximum of past squared gradients instead of the exponential moving average.
8+
9+
Args:
10+
parameter: Current parameter value
11+
grad: Current gradient
12+
m: First moment estimate
13+
u: Infinity norm estimate
14+
t: Current timestep
15+
learning_rate: Learning rate (default=0.002)
16+
beta1: First moment decay rate (default=0.9)
17+
beta2: Infinity norm decay rate (default=0.999)
18+
epsilon: Small constant for numerical stability (default=1e-8)
19+
20+
Returns:
21+
tuple: (updated_parameter, updated_m, updated_u)
22+
"""
23+
assert learning_rate > 0, "Learning rate must be positive"
24+
assert 0 <= beta1 < 1, "Beta1 must be between 0 and 1"
25+
assert 0 <= beta2 < 1, "Beta2 must be between 0 and 1"
26+
assert epsilon > 0, "Epsilon must be positive"
27+
assert all(m >= 0) if isinstance(m, np.ndarray) else m >= 0, "m must be non-negative"
28+
assert all(u >= 0) if isinstance(u, np.ndarray) else u >= 0, "u must be non-negative"
29+
30+
# Update biased first moment estimate
31+
m = beta1 * m + (1 - beta1) * grad
32+
33+
# Update infinity norm estimate
34+
u = np.maximum(beta2 * u, np.abs(grad))
35+
36+
# Compute bias-corrected first moment estimate
37+
m_hat = m / (1 - beta1**t)
38+
39+
# Update parameters
40+
update = learning_rate * m_hat / (u + epsilon)
41+
parameter = parameter - update
42+
43+
return np.round(parameter, 5), np.round(m, 5), np.round(u, 5)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import numpy as np
2+
3+
def adamax_optimizer(parameter, grad, m, u, t, learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8):
4+
"""
5+
Update parameters using the Adamax optimizer.
6+
Adamax is a variant of Adam based on the infinity norm.
7+
It uses the maximum of past squared gradients instead of the exponential moving average.
8+
9+
Args:
10+
parameter: Current parameter value
11+
grad: Current gradient
12+
m: First moment estimate
13+
u: Infinity norm estimate
14+
t: Current timestep
15+
learning_rate: Learning rate (default=0.002)
16+
beta1: First moment decay rate (default=0.9)
17+
beta2: Infinity norm decay rate (default=0.999)
18+
epsilon: Small constant for numerical stability (default=1e-8)
19+
20+
Returns:
21+
tuple: (updated_parameter, updated_m, updated_u)
22+
"""
23+
# Your code here
24+
return np.round(parameter, 5), np.round(m, 5), np.round(u, 5)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[
2+
{
3+
"test": "print(adamax_optimizer(1., 0.1, 1., 1., 1, 0.002, 0.9, 0.999, 1e-8))",
4+
"expected_output": "(0.98178, 0.91, 0.999)"
5+
},
6+
{
7+
"test": "print(adamax_optimizer(np.array([1., 2.]), np.array([0.1, 0.2]), np.array([1., 1.]), np.array([1., 1.]), 1, 0.002, 0.9, 0.999, 1e-8))",
8+
"expected_output": "(array([0.98178, 1.98158]), array([0.91, 0.92]), array([0.999, 0.999]))"
9+
},
10+
{
11+
"test": "print(adamax_optimizer(np.array([1., 2.]), np.array([0.0, 0.0]), np.array([0.1, 0.1]), np.array([0., 0.]), 1, 0.002, 0.9, 0.999, 1e-8))",
12+
"expected_output": "(array([-179999., -179998.]), array([0.09, 0.09]), array([0., 0.]))"
13+
},
14+
{
15+
"test": "print(adamax_optimizer(1., 0.1, 1., 1., 1, 0.002, 0., 0., 1e-8))",
16+
"expected_output": "(0.998, 0.1, 0.1)"
17+
}
18+
]

0 commit comments

Comments
 (0)