diff --git a/examples/methods/neighbors/multinomial_nb.cpp b/examples/methods/neighbors/multinomial_nb.cpp new file mode 100644 index 0000000..ea448c5 --- /dev/null +++ b/examples/methods/neighbors/multinomial_nb.cpp @@ -0,0 +1,23 @@ +// #include "../../src/slowmokit/methods/neighbors/multinomial_nb.hpp" +// #include "../../src/slowmokit/core.hpp" + +// signed main(){ +// std::vector> xTrain{ +// {0,0,0}, +// {1,0,0}, +// {1,0,1}, +// {1,0,0}, +// {0,1,1}, +// {1,0,0}, +// {1,1,0}, +// {0,1,1}, +// {1,0,1}, +// {1,1,1} +// }; +// std::vector yTrain{2,2,2,2,1,2,2,1,2,0}; +// std::vector xTest{0,0,0}; +// std::vector classes{0,1,2}; +// MultinomialNB multinomial; +// std::cout< +double MultinomialNB::priorProb(std::vector yTrain, int label) +{ // Prior-Probability P(y) + int sum = 0; + for (int i = 0; i < yTrain.size(); i++) + { + if (yTrain[i] == label) + { + sum += 1; + } + } + return sum / double(yTrain.size()); +} + +template +double MultinomialNB::conditionalProb(std::vector> xTrain, + std::vector yTrain, int featureCol, + int featureVal, int label) +{ // Conditional-Probabilty P(x=f1 / y=class) + int denominator = 0, numerator = 0; + std::set varies_values; + for (int i = 0; i < yTrain.size(); i++) + { + if (yTrain[i] == label) + { // Check if label of feature is same as output looking for + denominator++; // All values with output_class we are looking for + varies_values.insert(xTrain[i][featureCol]); + if (xTrain[i][featureCol] == featureVal) + { + numerator++; // numerator increase only when that feature consists of + // value same as value in testing feature + } + } + } + return (numerator + 1) / + double(denominator + + varies_values + .size()); // as it is multinomial nb so to avoid overfitting + // numerator+1 -> because if word do not exist in training but exist in + // testing denominator + number_of_different_values_in_feature +} + +template +int MultinomialNB::fit_predict(std::vector> xTrain, + std::vector yTrain, std::vector classes, + std::vector xTest) +{ + int n_features = xTrain[0].size(); + + std::vector postProbs; + + std::vector::iterator label; // different ouput labels + for (label = classes.begin(); label != classes.end(); label++) + { // iterating over all labels (P(y=y1,y2,y3....)) + double likelihood = 1.0; + for (int i = 0; i < n_features; i++) + { + double cond = conditionalProb(xTrain, yTrain, i, xTest[i], *label); + likelihood += log10(cond); // Log10 of computed conditionl probabilty => + // Log10(P(x=x1 / y=label)) + } + + double prior = priorProb(yTrain, *label); + double post = prior * likelihood; + postProbs.push_back(post); + } + double sumpropProbs = 0.0; + int max = 0; + for (int i = 0; i < postProbs.size(); i++) + { // Iterating over all labels computed values to check if any negative value + // exists, if so then direclty values are compared else we will compute + // value/sum + if (postProbs[i] < 0) + { + sumpropProbs = 1; + break; + } + sumpropProbs += postProbs[i]; + } + for (int i = 0; i < postProbs.size(); i++) + { + postProbs[i] /= sumpropProbs; + if (postProbs[i] > postProbs[max]) + { + max = i; + } + } + return max; +}; \ No newline at end of file diff --git a/src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.hpp b/src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.hpp new file mode 100644 index 0000000..04df347 --- /dev/null +++ b/src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.hpp @@ -0,0 +1,51 @@ +/** + * @file methods/neighbors/multinomial_nb/multinomial_nb.hpp + * + * The header file including the Multinomial Naive Bayes algorithm + */ + +#ifndef SLOWMOKIT_MULTINOMIAL_NB_HPP +#define SLOWMOKIT_MULTINOMIAL_NB_HPP + +#include "../../../core.hpp" + +template class MultinomialNB +{ + private: + /** + * @brief Prior probability of multinomial naive bayes + * + * @param yTrain All y training values + * @param label output we are looking in prior probability + * @return double prior Probability + */ + double priorProb(std::vector yTrain, int label); + /** + * @brief Conditional Probability + * + * @param xTrain all x training int values + * @param yTrain all y training int values + * @param featureCol column for which we are computing conditional prob + * @param featureVal value in that column + * @param label y value + * @return double conditional probability + */ + double conditionalProb(std::vector> xTrain, + std::vector yTrain, int featureCol, int featureVal, + int label); + + public: + /** + * @brief fit_predict function + * + * @param xTrain all x training values + * @param yTrain all y training values int + * @param classes classes of y + * @param xTest testing values int + * @return int output predicted value + */ + int fit_predict(std::vector> xTrain, std::vector yTrain, + std::vector classes, std::vector xTest); +}; + +#endif \ No newline at end of file