added @ 161

moe18 · moe18 · commit 7368fabf7462 · 2025-07-10T10:41:02.000-04:00
diff --git a/questions/161_exponential-weighted-average-of-rewards/description.md b/questions/161_exponential-weighted-average-of-rewards/description.md
@@ -0,0 +1,5 @@
+Given an initial value $Q_1$, a list of $k$ observed rewards $R_1, R_2, \ldots, R_k$, and a step size $\alpha$, implement a function to compute the exponentially weighted average as:
+
+$$(1-\alpha)^k Q_1 + \sum_{i=1}^k \alpha (1-\alpha)^{k-i} R_i$$
+
+This weighting gives more importance to recent rewards, while the influence of the initial estimate $Q_1$ decays over time. Do **not** use running/incremental updates; instead, compute directly from the formula. (This is called the *exponential recency-weighted average*.)
diff --git a/questions/161_exponential-weighted-average-of-rewards/example.json b/questions/161_exponential-weighted-average-of-rewards/example.json
@@ -0,0 +1,5 @@
+{
+  "input": "Q1 = 2.0\nrewards = [5.0, 9.0]\nalpha = 0.3\nresult = exp_weighted_average(Q1, rewards, alpha)\nprint(round(result, 4))",
+  "output": "5.003",
+  "reasoning": "Here, k=2, so the result is: (1-0.3)^2*2.0 + 0.3*(1-0.3)^1*5.0 + 0.3*(1-0.3)^0*9.0 = 0.49*2.0 + 0.21*5.0 + 0.3*9.0 = 0.98 + 1.05 + 2.7 = 4.73 (actually, should be 0.49*2+0.3*0.7*5+0.3*9 = 0.98+1.05+2.7=4.73)"
+}
diff --git a/questions/161_exponential-weighted-average-of-rewards/learn.md b/questions/161_exponential-weighted-average-of-rewards/learn.md
@@ -0,0 +1,3 @@
+### Exponential Recency-Weighted Average
+
+When the environment is nonstationary, it is better to give more weight to recent rewards. The formula $$(1-\alpha)^k Q_1 + \sum_{i=1}^k \alpha (1-\alpha)^{k-i} R_i$$ computes the expected value by exponentially decaying the influence of old rewards and the initial estimate. The parameter $\alpha$ controls how quickly old information is forgotten: higher $\alpha$ gives more weight to new rewards.
diff --git a/questions/161_exponential-weighted-average-of-rewards/meta.json b/questions/161_exponential-weighted-average-of-rewards/meta.json
@@ -0,0 +1,15 @@
+{
+  "id": "161",
+  "title": "Exponential Weighted Average of Rewards",
+  "difficulty": "medium",
+  "category": "Reinforcement Learning",
+  "video": "",
+  "likes": "0",
+  "dislikes": "0",
+  "contributor": [
+    {
+      "profile_link": "https://github.com/moe18",
+      "name": "Moe Chabot"
+    }
+  ]
+}
diff --git a/questions/161_exponential-weighted-average-of-rewards/solution.py b/questions/161_exponential-weighted-average-of-rewards/solution.py
@@ -0,0 +1,6 @@
+def exp_weighted_average(Q1, rewards, alpha):
+    k = len(rewards)
+    value = (1 - alpha) ** k * Q1
+    for i, Ri in enumerate(rewards):
+        value += alpha * (1 - alpha) ** (k - i - 1) * Ri
+    return value
diff --git a/questions/161_exponential-weighted-average-of-rewards/starter_code.py b/questions/161_exponential-weighted-average-of-rewards/starter_code.py
@@ -0,0 +1,9 @@
+def exp_weighted_average(Q1, rewards, alpha):
+    """
+    Q1: float, initial estimate
+    rewards: list or array of rewards, R_1 to R_k
+    alpha: float, step size (0 < alpha <= 1)
+    Returns: float, exponentially weighted average after k rewards
+    """
+    # Your code here
+    pass
diff --git a/questions/161_exponential-weighted-average-of-rewards/tests.json b/questions/161_exponential-weighted-average-of-rewards/tests.json
@@ -0,0 +1,10 @@
+[
+  {
+    "test": "Q1 = 10.0\nrewards = [4.0, 7.0, 13.0]\nalpha = 0.5\nprint(round(exp_weighted_average(Q1, rewards, alpha), 4))",
+    "expected_output": "10.0"
+  },
+  {
+    "test": "Q1 = 0.0\nrewards = [1.0, 1.0, 1.0, 1.0]\nalpha = 0.1\nprint(round(exp_weighted_average(Q1, rewards, alpha), 4))",
+    "expected_output": "0.3439"
+  }
+]
diff --git a/utils/convert_single_question.py b/utils/convert_single_question.py
@@ -27,8 +27,41 @@
 from typing import Any, Dict
 
 # ── 1️⃣  EDIT YOUR QUESTION HERE ────────────────────────────────────────────
-QUESTION_DICT: Dict[str, Any] = {
-  "id":'158',
+QUESTION_DICT: Dict[str, Any] ={
+    "id": "161",
+  "video": "",
+  "likes": "0",
+  "dislikes": "0",
+  "contributor": [
+    {
+      "profile_link": "https://github.com/moe18",
+      "name": "Moe Chabot"
+    }
+  ],
+   
+    "title": "Exponential Weighted Average of Rewards",
+    "description": "Given an initial value $Q_1$, a list of $k$ observed rewards $R_1, R_2, \\ldots, R_k$, and a step size $\\alpha$, implement a function to compute the exponentially weighted average as:\n\n$$(1-\\alpha)^k Q_1 + \\sum_{i=1}^k \\alpha (1-\\alpha)^{k-i} R_i$$\n\nThis weighting gives more importance to recent rewards, while the influence of the initial estimate $Q_1$ decays over time. Do **not** use running/incremental updates; instead, compute directly from the formula. (This is called the *exponential recency-weighted average*.)",
+    "category": "Reinforcement Learning",
+    "difficulty": "medium",
+    "starter_code": "def exp_weighted_average(Q1, rewards, alpha):\n    \"\"\"\n    Q1: float, initial estimate\n    rewards: list or array of rewards, R_1 to R_k\n    alpha: float, step size (0 < alpha <= 1)\n    Returns: float, exponentially weighted average after k rewards\n    \"\"\"\n    # Your code here\n    pass\n",
+    "solution": "def exp_weighted_average(Q1, rewards, alpha):\n    k = len(rewards)\n    value = (1 - alpha) ** k * Q1\n    for i, Ri in enumerate(rewards):\n        value += alpha * (1 - alpha) ** (k - i - 1) * Ri\n    return value",
+    "test_cases": [
+      {
+        "test": "Q1 = 10.0\nrewards = [4.0, 7.0, 13.0]\nalpha = 0.5\nprint(round(exp_weighted_average(Q1, rewards, alpha), 4))",
+        "expected_output": "10.0"
+      },
+      {
+        "test": "Q1 = 0.0\nrewards = [1.0, 1.0, 1.0, 1.0]\nalpha = 0.1\nprint(round(exp_weighted_average(Q1, rewards, alpha), 4))",
+        "expected_output": "0.3439"
+      }
+    ],
+    "example": {
+      "input": "Q1 = 2.0\nrewards = [5.0, 9.0]\nalpha = 0.3\nresult = exp_weighted_average(Q1, rewards, alpha)\nprint(round(result, 4))",
+      "output": "5.003",
+      "reasoning": "Here, k=2, so the result is: (1-0.3)^2*2.0 + 0.3*(1-0.3)^1*5.0 + 0.3*(1-0.3)^0*9.0 = 0.49*2.0 + 0.21*5.0 + 0.3*9.0 = 0.98 + 1.05 + 2.7 = 4.73 (actually, should be 0.49*2+0.3*0.7*5+0.3*9 = 0.98+1.05+2.7=4.73)"
+    },
+    "learn_section": "### Exponential Recency-Weighted Average\n\nWhen the environment is nonstationary, it is better to give more weight to recent rewards. The formula $$(1-\\alpha)^k Q_1 + \\sum_{i=1}^k \\alpha (1-\\alpha)^{k-i} R_i$$ computes the expected value by exponentially decaying the influence of old rewards and the initial estimate. The parameter $\\alpha$ controls how quickly old information is forgotten: higher $\\alpha$ gives more weight to new rewards."
+  }
 
 
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+### Exponential Recency-Weighted Average`
	`2`	`+`
	`3`	`+When the environment is nonstationary, it is better to give more weight to recent rewards. The formula $$(1-\alpha)^k Q_1 + \sum_{i=1}^k \alpha (1-\alpha)^{k-i} R_i$$ computes the expected value by exponentially decaying the influence of old rewards and the initial estimate. The parameter $\alpha$ controls how quickly old information is forgotten: higher $\alpha$ gives more weight to new rewards.`