Skip to content

Commit f0ede5a

Browse files
authored
Merge pull request #471 from Open-Deep-ML/add-q-140
Add-q-140
2 parents ecfcb09 + 4364717 commit f0ede5a

File tree

9 files changed

+347
-0
lines changed

9 files changed

+347
-0
lines changed

json_to_files.py

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Write a Python class to implement the Bernoulli Naive Bayes classifier for binary (0/1) feature data. Your class should have two methods: `forward(self, X, y)` to train on the input data (X: 2D NumPy array of binary features, y: 1D NumPy array of class labels) and `predict(self, X)` to output predicted labels for a 2D test matrix X. Use Laplace smoothing (parameter: smoothing=1.0). Return predictions as a NumPy array. Only use NumPy. Predictions must be binary (0 or 1) and you must handle cases where the training data contains only one class. All log/likelihood calculations should use log probabilities for numerical stability.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"input": "X = np.array([[1, 0, 1], [1, 1, 0], [0, 0, 1], [0, 1, 0], [1, 1, 1]]); y = np.array([1, 1, 0, 0, 1])\nmodel = NaiveBayes(smoothing=1.0)\nmodel.forward(X, y)\nprint(model.predict(np.array([[1, 0, 1]])))",
3+
"output": "[1]",
4+
"reasoning": "The model learns class priors and feature probabilities with Laplace smoothing. For [1, 0, 1], the posterior for class 1 is higher, so the model predicts 1."
5+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# **Naive Bayes Classifier**
2+
3+
## **1. Definition**
4+
5+
Naive Bayes is a **probabilistic machine learning algorithm** used for **classification tasks**. It is based on **Bayes' Theorem**, which describes the probability of an event based on prior knowledge of related events.
6+
7+
The algorithm assumes that:
8+
- **Features are conditionally independent** given the class label (the "naive" assumption).
9+
- It calculates the posterior probability for each class and assigns the class with the **highest posterior** to the sample.
10+
11+
---
12+
13+
## **2. Bayes' Theorem**
14+
15+
Bayes' Theorem is given by:
16+
17+
$$
18+
P(C | X) = \frac{P(X | C) \times P(C)}{P(X)}
19+
$$
20+
21+
Where:
22+
- $P(C | X)$ **Posterior** probability: the probability of class $C $ given the feature vector $X$
23+
- $P(X | C)$ → **Likelihood**: the probability of the data $X$ given the class
24+
- $P(C)$ → **Prior** probability: the initial probability of class $C$ before observing any data
25+
- $ P(X)$ → **Evidence**: the total probability of the data across all classes (acts as a normalizing constant)
26+
27+
Since $P(X)$ is the same for all classes during comparison, it can be ignored, simplifying the formula to:
28+
29+
$$
30+
P(C | X) \propto P(X | C) \times P(C)
31+
$$
32+
---
33+
34+
### 3 **Bernoulli Naive Bayes**
35+
- Used for **binary data** (features take only 0 or 1 values).
36+
- The likelihood is given by:
37+
38+
$$
39+
P(X | C) = \prod_{i=1}^{n} P(x_i | C)^{x_i} \cdot (1 - P(x_i | C))^{1 - x_i}
40+
$$
41+
42+
---
43+
44+
## **4. Applications of Naive Bayes**
45+
46+
- **Text Classification:** Spam detection, sentiment analysis, and news categorization.
47+
- **Document Categorization:** Sorting documents by topic.
48+
- **Fraud Detection:** Identifying fraudulent transactions or behaviors.
49+
- **Recommender Systems:** Classifying users into preference groups.
50+
51+
---
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"id": "140",
3+
"title": "Bernoulli Naive Bayes Classifier",
4+
"difficulty": "medium",
5+
"category": "Machine Learning",
6+
"video": "",
7+
"likes": "0",
8+
"dislikes": "0",
9+
"contributor": [
10+
{
11+
"profile_link": "https://github.com/Coder1010ayush",
12+
"name": "Coder1010ayush"
13+
}
14+
]
15+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
3+
class NaiveBayes():
4+
def __init__(self, smoothing=1.0):
5+
self.smoothing = smoothing
6+
self.classes = None
7+
self.priors = None
8+
self.likelihoods = None
9+
10+
def forward(self, X, y):
11+
self.classes, class_counts = np.unique(y, return_counts=True)
12+
self.priors = {cls: np.log(class_counts[i] / len(y)) for i, cls in enumerate(self.classes)}
13+
self.likelihoods = {}
14+
for cls in self.classes:
15+
X_cls = X[y == cls]
16+
prob = (np.sum(X_cls, axis=0) + self.smoothing) / (X_cls.shape[0] + 2 * self.smoothing)
17+
self.likelihoods[cls] = (np.log(prob), np.log(1 - prob))
18+
19+
def _compute_posterior(self, sample):
20+
posteriors = {}
21+
for cls in self.classes:
22+
posterior = self.priors[cls]
23+
prob_1, prob_0 = self.likelihoods[cls]
24+
likelihood = np.sum(sample * prob_1 + (1 - sample) * prob_0)
25+
posterior += likelihood
26+
posteriors[cls] = posterior
27+
return max(posteriors, key=posteriors.get)
28+
29+
def predict(self, X):
30+
return np.array([self._compute_posterior(sample) for sample in X])
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import numpy as np
2+
3+
class NaiveBayes():
4+
def __init__(self, smoothing=1.0):
5+
# Initialize smoothing
6+
pass
7+
8+
def forward(self, X, y):
9+
# Fit model to binary features X and labels y
10+
pass
11+
12+
def predict(self, X):
13+
# Predict class labels for test set X
14+
pass
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[
2+
{
3+
"test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.array([[1, 0, 1], [1, 1, 0], [0, 0, 1], [0, 1, 0], [1, 1, 1]])\ny = np.array([1, 1, 0, 0, 1])\nmodel.forward(X, y)\nprint(model.predict(np.array([[1, 0, 1]])))",
4+
"expected_output": "[1]"
5+
},
6+
{
7+
"test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.array([[0], [1], [0], [1]])\ny = np.array([0, 1, 0, 1])\nmodel.forward(X, y)\nprint(model.predict(np.array([[0], [1]])))",
8+
"expected_output": "[0 1]"
9+
},
10+
{
11+
"test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.array([[0, 0], [1, 0], [0, 1]])\ny = np.array([0, 1, 0])\nmodel.forward(X, y)\nprint(model.predict(np.array([[1, 1]])))",
12+
"expected_output": "[0]"
13+
},
14+
{
15+
"test": "import numpy as np\nnp.random.seed(42)\nmodel = NaiveBayes(smoothing=1.0)\nX = np.random.randint(0, 2, (100, 5))\ny = np.random.choice([0, 1], size=100)\nmodel.forward(X, y)\nX_test = np.random.randint(0, 2, (10, 5))\npred = model.predict(X_test)\nprint(pred.shape)",
16+
"expected_output": "(10,)"
17+
},
18+
{
19+
"test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.random.randint(0, 2, (10, 3))\ny = np.zeros(10)\nmodel.forward(X, y)\nX_test = np.random.randint(0, 2, (3, 3))\nprint(model.predict(X_test))",
20+
"expected_output": "[0, 0, 0]"
21+
}
22+
]

utils/convert_single_question.py

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#!/usr/bin/env python
2+
"""
3+
convert_single_question.py
4+
──────────────────────────
5+
Paste ONE question dict into QUESTION_DICT and run:
6+
7+
python utils/convert_single_question.py
8+
9+
The script splits it into:
10+
11+
questions/<id>_<slug>/
12+
├─ meta.json
13+
├─ description.md
14+
├─ learn.md
15+
├─ starter_code.py
16+
├─ solution.py
17+
├─ example.json
18+
├─ tests.json
19+
├─ tinygrad/ (optional)
20+
└─ pytorch/ (optional)
21+
"""
22+
23+
import base64
24+
import json
25+
import pathlib
26+
import re
27+
from typing import Any, Dict
28+
29+
# ── 1️⃣ EDIT YOUR QUESTION HERE ────────────────────────────────────────────
30+
QUESTION_DICT: Dict[str, Any] = {
31+
'id':'140',
32+
"description": "Write a Python class to implement the Bernoulli Naive Bayes classifier for binary (0/1) feature data. Your class should have two methods: `forward(self, X, y)` to train on the input data (X: 2D NumPy array of binary features, y: 1D NumPy array of class labels) and `predict(self, X)` to output predicted labels for a 2D test matrix X. Use Laplace smoothing (parameter: smoothing=1.0). Return predictions as a NumPy array. Only use NumPy. Predictions must be binary (0 or 1) and you must handle cases where the training data contains only one class. All log/likelihood calculations should use log probabilities for numerical stability.",
33+
"test_cases": [
34+
{
35+
"test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.array([[1, 0, 1], [1, 1, 0], [0, 0, 1], [0, 1, 0], [1, 1, 1]])\ny = np.array([1, 1, 0, 0, 1])\nmodel.forward(X, y)\nprint(model.predict(np.array([[1, 0, 1]])))",
36+
"expected_output": "[1]"
37+
},
38+
{
39+
"test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.array([[0], [1], [0], [1]])\ny = np.array([0, 1, 0, 1])\nmodel.forward(X, y)\nprint(model.predict(np.array([[0], [1]])))",
40+
"expected_output": "[0 1]"
41+
},
42+
{
43+
"test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.array([[0, 0], [1, 0], [0, 1]])\ny = np.array([0, 1, 0])\nmodel.forward(X, y)\nprint(model.predict(np.array([[1, 1]])))",
44+
"expected_output": "[0]"
45+
},
46+
{
47+
"test": "import numpy as np\nnp.random.seed(42)\nmodel = NaiveBayes(smoothing=1.0)\nX = np.random.randint(0, 2, (100, 5))\ny = np.random.choice([0, 1], size=100)\nmodel.forward(X, y)\nX_test = np.random.randint(0, 2, (10, 5))\npred = model.predict(X_test)\nprint(pred.shape)",
48+
"expected_output": "(10,)"
49+
},
50+
{
51+
"test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.random.randint(0, 2, (10, 3))\ny = np.zeros(10)\nmodel.forward(X, y)\nX_test = np.random.randint(0, 2, (3, 3))\nprint(model.predict(X_test))",
52+
"expected_output": "[0, 0, 0]"
53+
}
54+
],
55+
"solution": "import numpy as np\n\nclass NaiveBayes():\n def __init__(self, smoothing=1.0):\n self.smoothing = smoothing\n self.classes = None\n self.priors = None\n self.likelihoods = None\n\n def forward(self, X, y):\n self.classes, class_counts = np.unique(y, return_counts=True)\n self.priors = {cls: np.log(class_counts[i] / len(y)) for i, cls in enumerate(self.classes)}\n self.likelihoods = {}\n for cls in self.classes:\n X_cls = X[y == cls]\n prob = (np.sum(X_cls, axis=0) + self.smoothing) / (X_cls.shape[0] + 2 * self.smoothing)\n self.likelihoods[cls] = (np.log(prob), np.log(1 - prob))\n\n def _compute_posterior(self, sample):\n posteriors = {}\n for cls in self.classes:\n posterior = self.priors[cls]\n prob_1, prob_0 = self.likelihoods[cls]\n likelihood = np.sum(sample * prob_1 + (1 - sample) * prob_0)\n posterior += likelihood\n posteriors[cls] = posterior\n return max(posteriors, key=posteriors.get)\n\n def predict(self, X):\n return np.array([self._compute_posterior(sample) for sample in X])",
56+
"example": {
57+
"input": "X = np.array([[1, 0, 1], [1, 1, 0], [0, 0, 1], [0, 1, 0], [1, 1, 1]]); y = np.array([1, 1, 0, 0, 1])\nmodel = NaiveBayes(smoothing=1.0)\nmodel.forward(X, y)\nprint(model.predict(np.array([[1, 0, 1]])))",
58+
"output": "[1]",
59+
"reasoning": "The model learns class priors and feature probabilities with Laplace smoothing. For [1, 0, 1], the posterior for class 1 is higher, so the model predicts 1."
60+
},
61+
"category": "Machine Learning",
62+
"starter_code": "import numpy as np\n\nclass NaiveBayes():\n def __init__(self, smoothing=1.0):\n # Initialize smoothing\n pass\n\n def forward(self, X, y):\n # Fit model to binary features X and labels y\n pass\n\n def predict(self, X):\n # Predict class labels for test set X\n pass",
63+
"title": "Bernoulli Naive Bayes Classifier",
64+
"learn_section":r"""# **Naive Bayes Classifier**
65+
66+
## **1. Definition**
67+
68+
Naive Bayes is a **probabilistic machine learning algorithm** used for **classification tasks**. It is based on **Bayes' Theorem**, which describes the probability of an event based on prior knowledge of related events.
69+
70+
The algorithm assumes that:
71+
- **Features are conditionally independent** given the class label (the "naive" assumption).
72+
- It calculates the posterior probability for each class and assigns the class with the **highest posterior** to the sample.
73+
74+
---
75+
76+
## **2. Bayes' Theorem**
77+
78+
Bayes' Theorem is given by:
79+
80+
$$
81+
P(C | X) = \frac{P(X | C) \times P(C)}{P(X)}
82+
$$
83+
84+
Where:
85+
- $P(C | X)$ **Posterior** probability: the probability of class $C $ given the feature vector $X$
86+
- $P(X | C)$ → **Likelihood**: the probability of the data $X$ given the class
87+
- $P(C)$ → **Prior** probability: the initial probability of class $C$ before observing any data
88+
- $ P(X)$ → **Evidence**: the total probability of the data across all classes (acts as a normalizing constant)
89+
90+
Since $P(X)$ is the same for all classes during comparison, it can be ignored, simplifying the formula to:
91+
92+
$$
93+
P(C | X) \propto P(X | C) \times P(C)
94+
$$
95+
---
96+
97+
### 3 **Bernoulli Naive Bayes**
98+
- Used for **binary data** (features take only 0 or 1 values).
99+
- The likelihood is given by:
100+
101+
$$
102+
P(X | C) = \prod_{i=1}^{n} P(x_i | C)^{x_i} \cdot (1 - P(x_i | C))^{1 - x_i}
103+
$$
104+
105+
---
106+
107+
## **4. Applications of Naive Bayes**
108+
109+
- **Text Classification:** Spam detection, sentiment analysis, and news categorization.
110+
- **Document Categorization:** Sorting documents by topic.
111+
- **Fraud Detection:** Identifying fraudulent transactions or behaviors.
112+
- **Recommender Systems:** Classifying users into preference groups.
113+
114+
--- """,
115+
"contributor": [
116+
{
117+
"profile_link": "https://github.com/moe18",
118+
"name": "Moe Chabot"
119+
}
120+
],
121+
"likes": "0",
122+
"dislikes": "0",
123+
"difficulty": "medium",
124+
"video":''
125+
}
126+
127+
# ────────────────────────────────────────────────────────────────────────────
128+
129+
130+
# ---------- helpers ---------------------------------------------------------
131+
def slugify(text: str) -> str:
132+
text = re.sub(r"[^0-9A-Za-z]+", "-", text.lower())
133+
return re.sub(r"-{2,}", "-", text).strip("-")[:50]
134+
135+
136+
def maybe_b64(s: str) -> str:
137+
try:
138+
if len(s) % 4 == 0 and re.fullmatch(r"[0-9A-Za-z+/=\n\r]+", s):
139+
return base64.b64decode(s).decode("utf-8")
140+
except Exception:
141+
pass
142+
return s
143+
144+
145+
def write_text(path: pathlib.Path, content: str) -> None:
146+
path.parent.mkdir(parents=True, exist_ok=True)
147+
path.write_text(content.rstrip("\n") + "\n", encoding="utf-8")
148+
149+
150+
def write_json(path: pathlib.Path, obj: Any) -> None:
151+
write_text(path, json.dumps(obj, indent=2, ensure_ascii=False))
152+
153+
154+
# ---------- converter -------------------------------------------------------
155+
def convert_one(q: Dict[str, Any]) -> None:
156+
folder = pathlib.Path("questions") / f"{q['id']}_{slugify(q['title'])}"
157+
folder.mkdir(parents=True, exist_ok=True)
158+
159+
# meta.json
160+
meta = {
161+
"id": q["id"],
162+
"title": q["title"],
163+
"difficulty": q["difficulty"],
164+
"category": q["category"],
165+
"video": q.get("video", ""),
166+
"likes": q.get("likes", "0"),
167+
"dislikes": q.get("dislikes", "0"),
168+
"contributor": q.get("contributor", []),
169+
}
170+
for opt in ("tinygrad_difficulty", "pytorch_difficulty", "marimo_link"):
171+
if opt in q:
172+
meta[opt] = q[opt]
173+
write_json(folder / "meta.json", meta)
174+
175+
# core files
176+
write_text(folder / "description.md", q["description"])
177+
write_text(folder / "learn.md", q["learn_section"])
178+
write_text(folder / "starter_code.py", q["starter_code"])
179+
write_text(folder / "solution.py", q["solution"])
180+
write_json(folder / "example.json", q["example"])
181+
write_json(folder / "tests.json", q["test_cases"])
182+
183+
# optional language-specific extras
184+
for lang in ("tinygrad", "pytorch"):
185+
sc, so, tc = (f"{lang}_starter_code", f"{lang}_solution", f"{lang}_test_cases")
186+
if any(k in q for k in (sc, so, tc)):
187+
sub = folder / lang
188+
if sc in q:
189+
write_text(sub / "starter_code.py", maybe_b64(q[sc]))
190+
if so in q:
191+
write_text(sub / "solution.py", maybe_b64(q[so]))
192+
if tc in q:
193+
write_json(sub / "tests.json", q[tc])
194+
195+
# success message (relative if possible)
196+
try:
197+
rel = folder.relative_to(pathlib.Path.cwd())
198+
except ValueError:
199+
rel = folder
200+
print(f"✓ Created {rel}")
201+
202+
203+
# ---------- main ------------------------------------------------------------
204+
def main() -> None:
205+
convert_one(QUESTION_DICT)
206+
207+
208+
if __name__ == "__main__":
209+
main()

0 commit comments

Comments
 (0)