|
28 | 28 |
|
29 | 29 | # ── 1️⃣ EDIT YOUR QUESTION HERE ──────────────────────────────────────────── |
30 | 30 | QUESTION_DICT: Dict[str, Any] = { |
31 | | - "id": "140", |
32 | | - "title": "Bernoulli Naive Bayes Classifier", |
33 | | - "difficulty": "medium", |
34 | | - "category": "Machine Learning", |
35 | | - "description": "Write a Python class …", |
36 | | - "learn_section": "# Learn section …", |
37 | | - "starter_code": "import numpy as np\n\nclass NaiveBayes():\n pass", |
38 | | - "solution": "import numpy as np\n\nclass NaiveBayes():\n pass", |
| 31 | + 'id':'140', |
| 32 | + "description": "Write a Python class to implement the Bernoulli Naive Bayes classifier for binary (0/1) feature data. Your class should have two methods: `forward(self, X, y)` to train on the input data (X: 2D NumPy array of binary features, y: 1D NumPy array of class labels) and `predict(self, X)` to output predicted labels for a 2D test matrix X. Use Laplace smoothing (parameter: smoothing=1.0). Return predictions as a NumPy array. Only use NumPy. Predictions must be binary (0 or 1) and you must handle cases where the training data contains only one class. All log/likelihood calculations should use log probabilities for numerical stability.", |
| 33 | + "test_cases": [ |
| 34 | + { |
| 35 | + "test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.array([[1, 0, 1], [1, 1, 0], [0, 0, 1], [0, 1, 0], [1, 1, 1]])\ny = np.array([1, 1, 0, 0, 1])\nmodel.forward(X, y)\nprint(model.predict(np.array([[1, 0, 1]])))", |
| 36 | + "expected_output": "[1]" |
| 37 | + }, |
| 38 | + { |
| 39 | + "test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.array([[0], [1], [0], [1]])\ny = np.array([0, 1, 0, 1])\nmodel.forward(X, y)\nprint(model.predict(np.array([[0], [1]])))", |
| 40 | + "expected_output": "[0 1]" |
| 41 | + }, |
| 42 | + { |
| 43 | + "test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.array([[0, 0], [1, 0], [0, 1]])\ny = np.array([0, 1, 0])\nmodel.forward(X, y)\nprint(model.predict(np.array([[1, 1]])))", |
| 44 | + "expected_output": "[0]" |
| 45 | + }, |
| 46 | + { |
| 47 | + "test": "import numpy as np\nnp.random.seed(42)\nmodel = NaiveBayes(smoothing=1.0)\nX = np.random.randint(0, 2, (100, 5))\ny = np.random.choice([0, 1], size=100)\nmodel.forward(X, y)\nX_test = np.random.randint(0, 2, (10, 5))\npred = model.predict(X_test)\nprint(pred.shape)", |
| 48 | + "expected_output": "(10,)" |
| 49 | + }, |
| 50 | + { |
| 51 | + "test": "import numpy as np\nmodel = NaiveBayes(smoothing=1.0)\nX = np.random.randint(0, 2, (10, 3))\ny = np.zeros(10)\nmodel.forward(X, y)\nX_test = np.random.randint(0, 2, (3, 3))\nprint(model.predict(X_test))", |
| 52 | + "expected_output": "[0, 0, 0]" |
| 53 | + } |
| 54 | + ], |
| 55 | + "solution": "import numpy as np\n\nclass NaiveBayes():\n def __init__(self, smoothing=1.0):\n self.smoothing = smoothing\n self.classes = None\n self.priors = None\n self.likelihoods = None\n\n def forward(self, X, y):\n self.classes, class_counts = np.unique(y, return_counts=True)\n self.priors = {cls: np.log(class_counts[i] / len(y)) for i, cls in enumerate(self.classes)}\n self.likelihoods = {}\n for cls in self.classes:\n X_cls = X[y == cls]\n prob = (np.sum(X_cls, axis=0) + self.smoothing) / (X_cls.shape[0] + 2 * self.smoothing)\n self.likelihoods[cls] = (np.log(prob), np.log(1 - prob))\n\n def _compute_posterior(self, sample):\n posteriors = {}\n for cls in self.classes:\n posterior = self.priors[cls]\n prob_1, prob_0 = self.likelihoods[cls]\n likelihood = np.sum(sample * prob_1 + (1 - sample) * prob_0)\n posterior += likelihood\n posteriors[cls] = posterior\n return max(posteriors, key=posteriors.get)\n\n def predict(self, X):\n return np.array([self._compute_posterior(sample) for sample in X])", |
39 | 56 | "example": { |
40 | | - "input": "…", |
| 57 | + "input": "X = np.array([[1, 0, 1], [1, 1, 0], [0, 0, 1], [0, 1, 0], [1, 1, 1]]); y = np.array([1, 1, 0, 0, 1])\nmodel = NaiveBayes(smoothing=1.0)\nmodel.forward(X, y)\nprint(model.predict(np.array([[1, 0, 1]])))", |
41 | 58 | "output": "[1]", |
42 | | - "reasoning": "…" |
| 59 | + "reasoning": "The model learns class priors and feature probabilities with Laplace smoothing. For [1, 0, 1], the posterior for class 1 is higher, so the model predicts 1." |
43 | 60 | }, |
44 | | - "test_cases": [ |
45 | | - {"test": "print(1+1)", "expected_output": "2"} |
| 61 | + "category": "Machine Learning", |
| 62 | + "starter_code": "import numpy as np\n\nclass NaiveBayes():\n def __init__(self, smoothing=1.0):\n # Initialize smoothing\n pass\n\n def forward(self, X, y):\n # Fit model to binary features X and labels y\n pass\n\n def predict(self, X):\n # Predict class labels for test set X\n pass", |
| 63 | + "title": "Bernoulli Naive Bayes Classifier", |
| 64 | + "learn_section":r"""# **Naive Bayes Classifier** |
| 65 | +
|
| 66 | +## **1. Definition** |
| 67 | +
|
| 68 | +Naive Bayes is a **probabilistic machine learning algorithm** used for **classification tasks**. It is based on **Bayes' Theorem**, which describes the probability of an event based on prior knowledge of related events. |
| 69 | +
|
| 70 | +The algorithm assumes that: |
| 71 | +- **Features are conditionally independent** given the class label (the "naive" assumption). |
| 72 | +- It calculates the posterior probability for each class and assigns the class with the **highest posterior** to the sample. |
| 73 | +
|
| 74 | +--- |
| 75 | +
|
| 76 | +## **2. Bayes' Theorem** |
| 77 | +
|
| 78 | +Bayes' Theorem is given by: |
| 79 | +
|
| 80 | +$$ |
| 81 | +P(C | X) = \frac{P(X | C) \times P(C)}{P(X)} |
| 82 | +$$ |
| 83 | +
|
| 84 | +Where: |
| 85 | +- $P(C | X)$ **Posterior** probability: the probability of class $C $ given the feature vector $X$ |
| 86 | +- $P(X | C)$ → **Likelihood**: the probability of the data $X$ given the class |
| 87 | +- $P(C)$ → **Prior** probability: the initial probability of class $C$ before observing any data |
| 88 | +- $ P(X)$ → **Evidence**: the total probability of the data across all classes (acts as a normalizing constant) |
| 89 | +
|
| 90 | +Since $P(X)$ is the same for all classes during comparison, it can be ignored, simplifying the formula to: |
| 91 | +
|
| 92 | +$$ |
| 93 | +P(C | X) \propto P(X | C) \times P(C) |
| 94 | +$$ |
| 95 | +--- |
| 96 | +
|
| 97 | +### 3 **Bernoulli Naive Bayes** |
| 98 | +- Used for **binary data** (features take only 0 or 1 values). |
| 99 | +- The likelihood is given by: |
| 100 | +
|
| 101 | +$$ |
| 102 | +P(X | C) = \prod_{i=1}^{n} P(x_i | C)^{x_i} \cdot (1 - P(x_i | C))^{1 - x_i} |
| 103 | +$$ |
| 104 | +
|
| 105 | +--- |
| 106 | +
|
| 107 | +## **4. Applications of Naive Bayes** |
| 108 | +
|
| 109 | +- **Text Classification:** Spam detection, sentiment analysis, and news categorization. |
| 110 | +- **Document Categorization:** Sorting documents by topic. |
| 111 | +- **Fraud Detection:** Identifying fraudulent transactions or behaviors. |
| 112 | +- **Recommender Systems:** Classifying users into preference groups. |
| 113 | +
|
| 114 | +--- """, |
| 115 | + "contributor": [ |
| 116 | + { |
| 117 | + "profile_link": "https://github.com/moe18", |
| 118 | + "name": "Moe Chabot" |
| 119 | + } |
46 | 120 | ], |
47 | | - "video": "", |
48 | 121 | "likes": "0", |
49 | 122 | "dislikes": "0", |
50 | | - "contributor": [ |
51 | | - {"name": "Moe Chabot", "profile_link": "https://github.com/moe18"} |
52 | | - ] |
53 | | - # Optional extras: |
54 | | - # "marimo_link": "https://…", |
55 | | - # "tinygrad_difficulty": "medium", |
56 | | - # "tinygrad_starter_code": "BASE64…", |
57 | | - # "tinygrad_solution": "BASE64…", |
58 | | - # "tinygrad_test_cases": [], |
59 | | - # "pytorch_difficulty": "medium", |
60 | | - # "pytorch_starter_code": "BASE64…", |
61 | | - # "pytorch_solution": "BASE64…", |
62 | | - # "pytorch_test_cases": [] |
| 123 | + "difficulty": "medium", |
| 124 | + "video":'' |
63 | 125 | } |
| 126 | + |
64 | 127 | # ──────────────────────────────────────────────────────────────────────────── |
65 | 128 |
|
66 | 129 |
|
|
0 commit comments