From 55aa0a97b1f8279d0f408f6e67634ada22572f3e Mon Sep 17 00:00:00 2001 From: komaksym Date: Mon, 29 Sep 2025 11:22:43 +0200 Subject: [PATCH 01/10] init new question --- .../180_gradient_clipping/description.md | 3 ++ questions/180_gradient_clipping/example.json | 5 ++ questions/180_gradient_clipping/learn.md | 47 +++++++++++++++++++ questions/180_gradient_clipping/meta.json | 12 +++++ .../180_gradient_clipping/pytorch/solution.py | 2 + .../pytorch/starter_code.py | 2 + .../180_gradient_clipping/pytorch/tests.json | 6 +++ questions/180_gradient_clipping/solution.py | 3 ++ .../180_gradient_clipping/starter_code.py | 4 ++ questions/180_gradient_clipping/tests.json | 6 +++ .../tinygrad/solution.py | 2 + .../tinygrad/starter_code.py | 2 + .../180_gradient_clipping/tinygrad/tests.json | 6 +++ 13 files changed, 100 insertions(+) create mode 100644 questions/180_gradient_clipping/description.md create mode 100644 questions/180_gradient_clipping/example.json create mode 100644 questions/180_gradient_clipping/learn.md create mode 100644 questions/180_gradient_clipping/meta.json create mode 100644 questions/180_gradient_clipping/pytorch/solution.py create mode 100644 questions/180_gradient_clipping/pytorch/starter_code.py create mode 100644 questions/180_gradient_clipping/pytorch/tests.json create mode 100644 questions/180_gradient_clipping/solution.py create mode 100644 questions/180_gradient_clipping/starter_code.py create mode 100644 questions/180_gradient_clipping/tests.json create mode 100644 questions/180_gradient_clipping/tinygrad/solution.py create mode 100644 questions/180_gradient_clipping/tinygrad/starter_code.py create mode 100644 questions/180_gradient_clipping/tinygrad/tests.json diff --git a/questions/180_gradient_clipping/description.md b/questions/180_gradient_clipping/description.md new file mode 100644 index 00000000..684dbcd6 --- /dev/null +++ b/questions/180_gradient_clipping/description.md @@ -0,0 +1,3 @@ +## Problem + +Write a concise problem description here. diff --git a/questions/180_gradient_clipping/example.json b/questions/180_gradient_clipping/example.json new file mode 100644 index 00000000..4e7fdd99 --- /dev/null +++ b/questions/180_gradient_clipping/example.json @@ -0,0 +1,5 @@ +{ + "input": "...", + "output": "...", + "reasoning": "Explain why the output follows from the input." +} diff --git a/questions/180_gradient_clipping/learn.md b/questions/180_gradient_clipping/learn.md new file mode 100644 index 00000000..31c0cec5 --- /dev/null +++ b/questions/180_gradient_clipping/learn.md @@ -0,0 +1,47 @@ +## Solution Explanation + +Add intuition, math, and step-by-step reasoning here. + +### Writing Mathematical Expressions with LaTeX + +This editor supports LaTeX for rendering mathematical equations and expressions. Here's how you can use it: + +1. **Inline Math**: + - Wrap your expression with single `$` symbols. + - Example: `$E = mc^2$` → Renders as: ( $E = mc^2$ ) + +2. **Block Math**: + - Wrap your expression with double `$$` symbols. + - Example: + ``` + $$ + \int_a^b f(x) \, dx + $$ + ``` + Renders as: + $$ + \int_a^b f(x) \, dx + $$ + +3. **Math Functions**: + - Use standard LaTeX functions like `\frac`, `\sqrt`, `\sum`, etc. + - Examples: + - `$\frac{a}{b}$` → ( $\frac{a}{b}$ ) + - `$\sqrt{x}$` → ( $\sqrt{x}$ ) + +4. **Greek Letters and Symbols**: + - Use commands like `\alpha`, `\beta`, etc., for Greek letters. + - Example: `$\alpha + \beta = \gamma$` → ( $\alpha + \beta = \gamma$ ) + +5. **Subscripts and Superscripts**: + - Use `_{}` for subscripts and `^{}` for superscripts. + - Examples: + - `$x_i$` → ( $x_i$ ) + - `$x^2$` → ( $x^2$ ) + +6. **Combined Examples**: + - `$\sum_{i=1}^n i^2 = \frac{n(n+1)(2n+1)}{6}$` + Renders as: + $\sum_{i=1}^n i^2 = \frac{n(n+1)(2n+1)}{6}$ + +Feel free to write your own mathematical expressions, and they will be rendered beautifully in the preview! diff --git a/questions/180_gradient_clipping/meta.json b/questions/180_gradient_clipping/meta.json new file mode 100644 index 00000000..9db2e26a --- /dev/null +++ b/questions/180_gradient_clipping/meta.json @@ -0,0 +1,12 @@ +{ + "id": "XXX", + "title": "TITLE GOES HERE", + "difficulty": "medium", + "category": "Machine Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [], + "tinygrad_difficulty": "", + "pytorch_difficulty": "" +} diff --git a/questions/180_gradient_clipping/pytorch/solution.py b/questions/180_gradient_clipping/pytorch/solution.py new file mode 100644 index 00000000..9b74bcbd --- /dev/null +++ b/questions/180_gradient_clipping/pytorch/solution.py @@ -0,0 +1,2 @@ +def your_function(...): + ... diff --git a/questions/180_gradient_clipping/pytorch/starter_code.py b/questions/180_gradient_clipping/pytorch/starter_code.py new file mode 100644 index 00000000..d3e5beb5 --- /dev/null +++ b/questions/180_gradient_clipping/pytorch/starter_code.py @@ -0,0 +1,2 @@ +def your_function(...): + pass diff --git a/questions/180_gradient_clipping/pytorch/tests.json b/questions/180_gradient_clipping/pytorch/tests.json new file mode 100644 index 00000000..e4e4b180 --- /dev/null +++ b/questions/180_gradient_clipping/pytorch/tests.json @@ -0,0 +1,6 @@ +[ + { + "test": "print(your_function(...))", + "expected_output": "..." + } +] diff --git a/questions/180_gradient_clipping/solution.py b/questions/180_gradient_clipping/solution.py new file mode 100644 index 00000000..b1ff1c5b --- /dev/null +++ b/questions/180_gradient_clipping/solution.py @@ -0,0 +1,3 @@ +def your_function(...): + # reference implementation + ... diff --git a/questions/180_gradient_clipping/starter_code.py b/questions/180_gradient_clipping/starter_code.py new file mode 100644 index 00000000..564b3118 --- /dev/null +++ b/questions/180_gradient_clipping/starter_code.py @@ -0,0 +1,4 @@ +# Implement your function below. + +def your_function(...): + pass diff --git a/questions/180_gradient_clipping/tests.json b/questions/180_gradient_clipping/tests.json new file mode 100644 index 00000000..e4e4b180 --- /dev/null +++ b/questions/180_gradient_clipping/tests.json @@ -0,0 +1,6 @@ +[ + { + "test": "print(your_function(...))", + "expected_output": "..." + } +] diff --git a/questions/180_gradient_clipping/tinygrad/solution.py b/questions/180_gradient_clipping/tinygrad/solution.py new file mode 100644 index 00000000..9b74bcbd --- /dev/null +++ b/questions/180_gradient_clipping/tinygrad/solution.py @@ -0,0 +1,2 @@ +def your_function(...): + ... diff --git a/questions/180_gradient_clipping/tinygrad/starter_code.py b/questions/180_gradient_clipping/tinygrad/starter_code.py new file mode 100644 index 00000000..d3e5beb5 --- /dev/null +++ b/questions/180_gradient_clipping/tinygrad/starter_code.py @@ -0,0 +1,2 @@ +def your_function(...): + pass diff --git a/questions/180_gradient_clipping/tinygrad/tests.json b/questions/180_gradient_clipping/tinygrad/tests.json new file mode 100644 index 00000000..e4e4b180 --- /dev/null +++ b/questions/180_gradient_clipping/tinygrad/tests.json @@ -0,0 +1,6 @@ +[ + { + "test": "print(your_function(...))", + "expected_output": "..." + } +] From ad8e57807cbb5af0e1180ceb2588905c27a0cd1b Mon Sep 17 00:00:00 2001 From: komaksym Date: Mon, 29 Sep 2025 11:32:00 +0200 Subject: [PATCH 02/10] add description --- questions/180_gradient_clipping/description.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/questions/180_gradient_clipping/description.md b/questions/180_gradient_clipping/description.md index 684dbcd6..1f6aa67f 100644 --- a/questions/180_gradient_clipping/description.md +++ b/questions/180_gradient_clipping/description.md @@ -1,3 +1,3 @@ ## Problem -Write a concise problem description here. +Write a Python function `clip_gradients` that takes a numpy array of gradients and a float `max_norm`, and returns a new numpy array where the gradients are clipped so that their L2 norm does not exceed `max_norm`. If the L2 norm of the input gradients is less than or equal to `max_norm`, return the gradients unchanged. If it exceeds `max_norm`, scale all gradients so that their L2 norm equals `max_norm`. Only use standard Python and numpy. The returned array should be of type float and have the same shape as the input. From 16bb095cc2f91ca44357c762bf22b85bce513905 Mon Sep 17 00:00:00 2001 From: komaksym Date: Mon, 29 Sep 2025 11:34:10 +0200 Subject: [PATCH 03/10] add metadata --- questions/180_gradient_clipping/meta.json | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/questions/180_gradient_clipping/meta.json b/questions/180_gradient_clipping/meta.json index 9db2e26a..33c1eb4c 100644 --- a/questions/180_gradient_clipping/meta.json +++ b/questions/180_gradient_clipping/meta.json @@ -1,12 +1,10 @@ { - "id": "XXX", - "title": "TITLE GOES HERE", - "difficulty": "medium", + "id": "180", + "title": "Gradient Clipping (L2 Norm)", + "difficulty": "easy", "category": "Machine Learning", "video": "", "likes": "0", "dislikes": "0", - "contributor": [], - "tinygrad_difficulty": "", - "pytorch_difficulty": "" + "contributor": ["https://github.com/komaksym"] } From 78461e1bc86369dfa7cd4314d9d4a1057e05ba70 Mon Sep 17 00:00:00 2001 From: komaksym Date: Mon, 29 Sep 2025 11:36:32 +0200 Subject: [PATCH 04/10] add learn about problem section --- questions/180_gradient_clipping/learn.md | 88 +++++++++++------------- 1 file changed, 41 insertions(+), 47 deletions(-) diff --git a/questions/180_gradient_clipping/learn.md b/questions/180_gradient_clipping/learn.md index 31c0cec5..2afb4fe7 100644 --- a/questions/180_gradient_clipping/learn.md +++ b/questions/180_gradient_clipping/learn.md @@ -1,47 +1,41 @@ -## Solution Explanation - -Add intuition, math, and step-by-step reasoning here. - -### Writing Mathematical Expressions with LaTeX - -This editor supports LaTeX for rendering mathematical equations and expressions. Here's how you can use it: - -1. **Inline Math**: - - Wrap your expression with single `$` symbols. - - Example: `$E = mc^2$` → Renders as: ( $E = mc^2$ ) - -2. **Block Math**: - - Wrap your expression with double `$$` symbols. - - Example: - ``` - $$ - \int_a^b f(x) \, dx - $$ - ``` - Renders as: - $$ - \int_a^b f(x) \, dx - $$ - -3. **Math Functions**: - - Use standard LaTeX functions like `\frac`, `\sqrt`, `\sum`, etc. - - Examples: - - `$\frac{a}{b}$` → ( $\frac{a}{b}$ ) - - `$\sqrt{x}$` → ( $\sqrt{x}$ ) - -4. **Greek Letters and Symbols**: - - Use commands like `\alpha`, `\beta`, etc., for Greek letters. - - Example: `$\alpha + \beta = \gamma$` → ( $\alpha + \beta = \gamma$ ) - -5. **Subscripts and Superscripts**: - - Use `_{}` for subscripts and `^{}` for superscripts. - - Examples: - - `$x_i$` → ( $x_i$ ) - - `$x^2$` → ( $x^2$ ) - -6. **Combined Examples**: - - `$\sum_{i=1}^n i^2 = \frac{n(n+1)(2n+1)}{6}$` - Renders as: - $\sum_{i=1}^n i^2 = \frac{n(n+1)(2n+1)}{6}$ - -Feel free to write your own mathematical expressions, and they will be rendered beautifully in the preview! +# **Gradient Clipping** + +## **1. Definition** +Gradient clipping is a technique used in machine learning to prevent the gradients from becoming too large during training, which can destabilize the learning process. It is especially important in training deep neural networks, where gradients can sometimes explode to very large values (the "exploding gradients" problem). + +**Gradient clipping** works by scaling the gradients if their norm exceeds a specified threshold (max_norm). The most common form is L2-norm clipping, where the entire gradient vector is rescaled so that its L2 norm is at most `max_norm`. + +## **2. Why Use Gradient Clipping?** +* **Stabilizes Training:** Prevents the optimizer from making excessively large updates, which can cause the loss to diverge or become NaN. +* **Enables Deeper Networks:** Makes it feasible to train deeper or recurrent neural networks, where exploding gradients are more likely. +* **Improves Convergence:** Helps the model converge more reliably by keeping updates within a reasonable range. + +## **3. Gradient Clipping Mechanism** +Given a gradient vector $g$ and a maximum norm $M$ (max_norm), the clipped gradient $g'$ is computed as: + +$$ +\text{if } \|g\|_2 \leq M: \\ +\quad g' = g \\ +\text{else:} \\ +\quad g' = g \times \frac{M}{\|g\|_2} +$$ + +Where: +* $g$: The original gradient vector (numpy array) +* $M$: The maximum allowed L2 norm (max_norm) +* $\|g\|_2$: The L2 norm of $g$ +* $g'$: The clipped gradient vector + +**Example:** +If $g = [6, 8]$ and $M = 5$: +* $\|g\|_2 = \sqrt{6^2 + 8^2} = 10$ +* Since $10 > 5$, we scale $g$ by $5/10 = 0.5$, so $g' = [3, 4]$ + +## **4. Applications of Gradient Clipping** +Gradient clipping is widely used in training: +* **Recurrent Neural Networks (RNNs):** To prevent exploding gradients in long sequences. +* **Deep Neural Networks:** For stable training of very deep architectures. +* **Reinforcement Learning:** Where gradients can be highly variable. +* **Any optimization problem** where gradient explosion is a risk. + +Gradient clipping is a simple yet powerful tool to ensure stable and effective training in modern machine learning workflows. From 38af96abb0a6115b5c258b3b379906efe44a9466 Mon Sep 17 00:00:00 2001 From: komaksym Date: Mon, 29 Sep 2025 11:37:12 +0200 Subject: [PATCH 05/10] add solution --- questions/180_gradient_clipping/solution.py | 23 ++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/questions/180_gradient_clipping/solution.py b/questions/180_gradient_clipping/solution.py index b1ff1c5b..2bb651cd 100644 --- a/questions/180_gradient_clipping/solution.py +++ b/questions/180_gradient_clipping/solution.py @@ -1,3 +1,20 @@ -def your_function(...): - # reference implementation - ... +import numpy as np + +def clip_gradients(gradients, max_norm): + """ + Clips the gradients so that their L2 norm does not exceed max_norm. + If the L2 norm is less than or equal to max_norm, returns the gradients unchanged. + Otherwise, scales the gradients so that their L2 norm equals max_norm. + + Args: + gradients (np.ndarray): The input gradients (any shape). + max_norm (float): The maximum allowed L2 norm. + + Returns: + np.ndarray: The clipped gradients, same shape as input. + """ + norm = np.linalg.norm(gradients) + if norm <= max_norm or norm == 0: + return gradients.astype(float) + else: + return (gradients * (max_norm / norm)).astype(float) From 1764d171d0bddd291ebad8978cbdefbc0f563bd8 Mon Sep 17 00:00:00 2001 From: komaksym Date: Mon, 29 Sep 2025 11:40:18 +0200 Subject: [PATCH 06/10] add starter code --- questions/180_gradient_clipping/starter_code.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/questions/180_gradient_clipping/starter_code.py b/questions/180_gradient_clipping/starter_code.py index 564b3118..a9d03cd2 100644 --- a/questions/180_gradient_clipping/starter_code.py +++ b/questions/180_gradient_clipping/starter_code.py @@ -1,4 +1,17 @@ +import numpy as np + # Implement your function below. +def clip_gradients(gradients, max_norm): + """ + Clips the gradients so that their L2 norm does not exceed max_norm. + If the L2 norm is less than or equal to max_norm, returns the gradients unchanged. + Otherwise, scales the gradients so that their L2 norm equals max_norm. + + Args: + gradients (np.ndarray): The input gradients (any shape). + max_norm (float): The maximum allowed L2 norm. -def your_function(...): + Returns: + np.ndarray: The clipped gradients, same shape as input. + """ pass From 06bf9c84577c398c818c688a38515b687c3a7597 Mon Sep 17 00:00:00 2001 From: komaksym Date: Mon, 29 Sep 2025 11:40:38 +0200 Subject: [PATCH 07/10] add example --- questions/180_gradient_clipping/example.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/questions/180_gradient_clipping/example.json b/questions/180_gradient_clipping/example.json index 4e7fdd99..88cdd989 100644 --- a/questions/180_gradient_clipping/example.json +++ b/questions/180_gradient_clipping/example.json @@ -1,5 +1,5 @@ { - "input": "...", - "output": "...", - "reasoning": "Explain why the output follows from the input." + "input": "import numpy as np\ngradients = np.array([3.0, 4.0])\nmax_norm = 5.0\nclipped = clip_gradients(gradients, max_norm)\nprint(clipped)", + "output": "[3. 4.]", + "reasoning": "The L2 norm of [3.0, 4.0] is 5.0, which is equal to max_norm, so the gradients are returned unchanged." } From 3eaf7c3aa8a1b2735bd91e236f096efd5584e4ef Mon Sep 17 00:00:00 2001 From: komaksym Date: Mon, 29 Sep 2025 11:41:40 +0200 Subject: [PATCH 08/10] add tests --- questions/180_gradient_clipping/tests.json | 28 ++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/questions/180_gradient_clipping/tests.json b/questions/180_gradient_clipping/tests.json index e4e4b180..8efd3dc8 100644 --- a/questions/180_gradient_clipping/tests.json +++ b/questions/180_gradient_clipping/tests.json @@ -1,6 +1,30 @@ [ { - "test": "print(your_function(...))", - "expected_output": "..." + "test": "import numpy as np\ngradients = np.array([3.0, 4.0])\nmax_norm = 5.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[3. 4.]" + }, + { + "test": "import numpy as np\ngradients = np.array([6.0, 8.0])\nmax_norm = 5.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[3. 4.]" + }, + { + "test": "import numpy as np\ngradients = np.array([0.0, 0.0])\nmax_norm = 1.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[0. 0.]" + }, + { + "test": "import numpy as np\ngradients = np.array([1.0, 2.0, 2.0])\nmax_norm = 3.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[1. 2. 2.]" + }, + { + "test": "import numpy as np\ngradients = np.array([10.0, 0.0])\nmax_norm = 5.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[5. 0.]" + }, + { + "test": "import numpy as np\ngradients = np.array([-3.0, -4.0])\nmax_norm = 5.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[-3. -4.]" + }, + { + "test": "import numpy as np\ngradients = np.array([-6.0, -8.0])\nmax_norm = 5.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[-3. -4.]" } ] From 2a1b29b27383d378f79435856546ffddb88a4552 Mon Sep 17 00:00:00 2001 From: komaksym Date: Mon, 29 Sep 2025 11:42:20 +0200 Subject: [PATCH 09/10] add minor formatting --- questions/180_gradient_clipping/solution.py | 1 + questions/180_gradient_clipping/starter_code.py | 1 + 2 files changed, 2 insertions(+) diff --git a/questions/180_gradient_clipping/solution.py b/questions/180_gradient_clipping/solution.py index 2bb651cd..9a588833 100644 --- a/questions/180_gradient_clipping/solution.py +++ b/questions/180_gradient_clipping/solution.py @@ -1,5 +1,6 @@ import numpy as np + def clip_gradients(gradients, max_norm): """ Clips the gradients so that their L2 norm does not exceed max_norm. diff --git a/questions/180_gradient_clipping/starter_code.py b/questions/180_gradient_clipping/starter_code.py index a9d03cd2..8da6391a 100644 --- a/questions/180_gradient_clipping/starter_code.py +++ b/questions/180_gradient_clipping/starter_code.py @@ -1,5 +1,6 @@ import numpy as np + # Implement your function below. def clip_gradients(gradients, max_norm): """ From c73bfe9b8a68bea57c90e310af590c0640608d5e Mon Sep 17 00:00:00 2001 From: komaksym Date: Fri, 17 Oct 2025 13:29:17 +0200 Subject: [PATCH 10/10] fix contributor --- questions/180_gradient_clipping/meta.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/questions/180_gradient_clipping/meta.json b/questions/180_gradient_clipping/meta.json index 33c1eb4c..f4ca3b27 100644 --- a/questions/180_gradient_clipping/meta.json +++ b/questions/180_gradient_clipping/meta.json @@ -6,5 +6,10 @@ "video": "", "likes": "0", "dislikes": "0", - "contributor": ["https://github.com/komaksym"] + "contributor": [ + { + "profile_link": "https://github.com/komaksym", + "name": "komaksym" + } + ] }