PEC-CSS · abhinavmalhotra01 · Feb 12, 2023 · Feb 12, 2023 · Feb 12, 2023 · Feb 12, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -62,4 +62,6 @@ add_library(slowmokit
         src/slowmokit/methods/metrics/f1score.hpp
         src/slowmokit/methods/metrics/f1score.cpp
         src/slowmokit/methods/metrics/mean_squared_error.hpp
-        src/slowmokit/methods/metrics/mean_squared_error.cpp)
+        src/slowmokit/methods/metrics/mean_squared_error.cpp
+        src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.cpp
+        src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.hpp)
diff --git a/docs/methods/neighbors/categorical_nb/categorical_nb.md b/docs/methods/neighbors/categorical_nb/categorical_nb.md
@@ -0,0 +1,43 @@
+# Categorical Naive Bayes
+
+Categorical Naive Bayes model.
+
+Categorical Naive Bayes computes likelihood by : occurences of instance in current label divided by total occurences of instance
+
+It computes the prior probability by : occurence of current class divided by size of training array
+
+And finally prior*likelihood gives the probabilty of class according to which output class is predicted.
+
+
+## Parameters
+
+| Name   | Definition                             | Type              |
+|--------|----------------------------------------|-------------------|
+| `xTrain` |The training set containing the features|`vector<vector<T>>`|
+| `yTrain` |The set containing the class correspoding to respective xTrain instance|`vector<string`|
+| `xTest` |The sample set , whose class will be predicted|`vector<T>`|
+
+## Methods
+
+| Name                                             | Definition                                  | Return value |
+|--------------------------------------------------|---------------------------------------------|-----------|
+| `fit(vector<vector<T>> xTrain,vector<T> yTrain)` |fit the class instance with the training data|`NULL`|
+| `predict(vector<T> xTest`                        |predict the label for xTest vector of features|`string`|
+
+## Example
+
+```cpp
+std::vector <std::vector<std::string>> xTrain = {{"fifa", "yes", "no",  "no"},
+                                                     {"fifa", "no",  "yes", "no"},
+                                                     {"fifa", "no",  "no",  "yes"},
+                                                     {"cc",   "no",  "no",  "yes"},
+                                                     {"fifa", "yes", "yes", "yes"},
+                                                     {"cc",   "yes", "yes", "yes"},
+                                                     {"cc",   "no",  "no",  "yes"},
+                                                     {"cc",   "yes", "no",  "no"}};
+std::vector <std::string> yTrain = {"m", "m", "m", "m", "f", "f", "f", "f"};
+std::vector <std::string> xTest = {"fifa", "no", "yes", "yes"};
+categoricalNB<std::string> classifier;
+classifier.fit(xTrain,yTrain);
+std::cout<<classifier.predict(xTest);
+```
diff --git a/examples/neighbors/categorical_nb.cpp b/examples/neighbors/categorical_nb.cpp
@@ -0,0 +1,20 @@
+//#include "../../src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.hpp"
+//#include "../../src/slowmokit/core.hpp"
+//
+//signed main() {
+//
+//    std::vector <std::vector<std::string>> xTrain = {{"fifa", "yes", "no",  "no"},
+//                                                     {"fifa", "no",  "yes", "no"},
+//                                                     {"fifa", "no",  "no",  "yes"},
+//                                                     {"cc",   "no",  "no",  "yes"},
+//                                                     {"fifa", "yes", "yes", "yes"},
+//                                                     {"cc",   "yes", "yes", "yes"},
+//                                                     {"cc",   "no",  "no",  "yes"},
+//                                                     {"cc",   "yes", "no",  "no"}};
+//    std::vector <std::string> yTrain = {"m", "m", "m", "m", "f", "f", "f", "f"};
+//    std::vector <std::string> xTest = {"fifa", "no", "yes", "yes"};
+//    categoricalNB<std::string> classifier;
+//    classifier.fit(xTrain,yTrain)
+//    std::cout<<classifier.predict(xTest);
+//
+//}
diff --git a/src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.cpp b/src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.cpp
@@ -0,0 +1,107 @@
+/**
+ * @file methods/neighbors/gaussian_nb/categorical_nb.cpp
+ *
+ * Implementation of the Categorical Naive Bayes main program
+ */
+
+#include "categorical_nb.hpp"
+template<class T>
+void CategoricalNB<T>::fit(std::vector<std::vector<T>> xTrain,
+                           std::vector<std::string> yTrain)
+{
+  // posterior = (prior * likelihood)/evidence
+  // since, evidence is same among all instances -> we can ignore it
+
+  if (xTrain.size() == 0 || yTrain.size() == 0)
+  {
+    throw "Make sure that you have atleast one train example";
+  }
+  if (xTrain.size() != yTrain.size())
+  {
+    throw "Number of instances and target values must be equal";
+  }
+  featureSize = xTrain[0].size();
+  std::map<std::string, int>
+      occurences; // to store the occurences of each label in the training set
+                  // and then use it in finding the priors
+
+  for (auto category : yTrain)
+  {
+    occurences[category]++; // incrementing the occurence of each label
+  }
+  for (auto current : occurences)
+  {
+    priors[current.first] =
+        double(current.second) /
+        yTrain.size(); // calculating the priors of each label, dividing the
+                       // occurence by the size of the total set
+  }
+
+  std::map<std::string, std::map<std::pair<T, int>, int>>
+      counts; // to store the count of each cell corresponding to it's label,
+              // it's row and it's category
+
+  for (int i = 0; i < (int) (xTrain.size());
+       i++) // iterating over the training data grid
+  {
+    std::vector<T> current = xTrain[i]; // current row
+    int j = 0; // to keep track of the current column for each cell
+    for (auto curr : current) // iterating over current row
+    {
+      counts[yTrain[i]]
+            [{curr, j}]++; // incrementing the value based on it's label , it's
+                           // category and it's column(denoting it's feature)
+      j++;
+    }
+  }
+  for (auto current :
+       counts) // iterating over the counts map , to calculate likelihoods
+  {
+    for (auto e :
+         current.second) // iterating over the map corresponding to each label
+                         // to find the likelihood of each entry
+    {
+      likelihoods[current.first][{e.first.first, e.first.second}] =
+          ((double(e.second)) / (occurences[current.first]));
+      // likelihood[label][current feature]=occ in current/total no of occ
+    }
+  }
+}
+
+template<class T>
+
+std::string CategoricalNB<T>::predict(
+    std::vector<T> xTest) // predicting the label on the basis of training set
+{
+  if (((int) (xTest.size())) != featureSize)
+  {
+    throw "The number of features in training and testing set must be same";
+  }
+  std::map<std::string, double> probs;
+  for (auto curr : priors) // since, posterior = prior*likelihood , we will
+                           // calculate the same for each label to give the
+                           // label with highest probability as the output
+  {
+    probs[curr.first] = curr.second;
+    int j = 0;
+    for (auto feature : xTest)
+    {
+      probs[curr.first] *=
+          likelihoods[curr.first][{feature, j}]; // calculating posterior for
+                                                 // each feature in Testing data
+      j++;
+    }
+  }
+  double maxProb = 0;
+  std::string out;
+  for (auto prob : probs)
+  {
+    if (prob.second >
+        maxProb) // finding the highest probability among all options
+    {
+      maxProb = prob.second;
+      out = prob.first;
+    }
+  }
+  return out;
+}
diff --git a/src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.hpp b/src/slowmokit/methods/neighbors/categorical_nb/categorical_nb.hpp
@@ -0,0 +1,37 @@
+/**
+ * @file methods/neighbors/categorical_nb/categorical_naive_bayes.hpp
+ *
+ * The header file including the Categorical Naive Bayes algorithm
+ */
+
+#ifndef SLOWMOKIT_CATEGORICAL_NB_HPP
+#define SLOWMOKIT_CATEGORICAL_NB_HPP
+
+#include "../../../core.hpp"
+
+template<class T> class CategoricalNB
+{
+
+  public:
+  /**
+   * @brief Fitting the training set into instance of class
+   * @param xTrain all training 2-d feature x values
+   * @param yTrain all training 1-d string y values
+   * @return NULL
+   */
+  void fit(std::vector<std::vector<T>> xTrain, std::vector<std::string> yTrain);
+
+  /**
+   * @brief Predicting the class for xTest on the basis of training set
+   * @param xTest all testing feature x values
+   * @return string denoting the class label of xTest
+   */
+  std::string predict(std::vector<T> xTest);
+
+  private:
+  std::map<std::string, double> priors;
+  std::map<std::string, std::map<std::pair<T, int>, double>> likelihoods;
+  int featureSize;
+};
+
+#endif //   SLOWMOKIT_CATEGORICAL_NB_HPP