diff --git a/docs/methods/preprocessing/csv_reader.md b/docs/methods/preprocessing/csv_reader.md new file mode 100644 index 0000000..05b5fa0 --- /dev/null +++ b/docs/methods/preprocessing/csv_reader.md @@ -0,0 +1,24 @@ +# Csv Reader + +It reads the csv File provided and accordingly set the data type in that particular column double/string + +After computing the data type whole file is converted to string vector with their data types in first row of vector. Those values can be then converted to data-type on the runtime whenever required. + +## Parameters + +| Name | Definition | Type | +|--------------|--------------------------------------------|--------------| +| file | Csv file which needs to be read | `ifstream` | + + +## Methods + +| Name | Definition | Return value | +|----------------------------------------|-----------------------------------------------|---------------| +| `readCsv(string name,bool index)` | To read csv file and allocate which column is double or string | `vector>` | + +## Example + +``` +std::vector>reader = readCsv("../../lol.csv",1); +``` \ No newline at end of file diff --git a/examples/methods/neighbors/multinomial_nb.cpp b/examples/methods/neighbors/multinomial_nb.cpp new file mode 100644 index 0000000..ea448c5 --- /dev/null +++ b/examples/methods/neighbors/multinomial_nb.cpp @@ -0,0 +1,23 @@ +// #include "../../src/slowmokit/methods/neighbors/multinomial_nb.hpp" +// #include "../../src/slowmokit/core.hpp" + +// signed main(){ +// std::vector> xTrain{ +// {0,0,0}, +// {1,0,0}, +// {1,0,1}, +// {1,0,0}, +// {0,1,1}, +// {1,0,0}, +// {1,1,0}, +// {0,1,1}, +// {1,0,1}, +// {1,1,1} +// }; +// std::vector yTrain{2,2,2,2,1,2,2,1,2,0}; +// std::vector xTest{0,0,0}; +// std::vector classes{0,1,2}; +// MultinomialNB multinomial; +// std::cout<>reader = readCsv("../../lol.csv",1); +// return 0; +// } \ No newline at end of file diff --git a/src/slowmokit.hpp b/src/slowmokit.hpp index 6b8ac82..0c72ec2 100644 --- a/src/slowmokit.hpp +++ b/src/slowmokit.hpp @@ -20,5 +20,6 @@ #include "slowmokit/methods/neighbors/bernoulli_nb.hpp" #include "slowmokit/methods/neighbors/gaussian_nb.hpp" #include "slowmokit/methods/neighbors/knn.hpp" +#include "slowmokit/methods/neighbors/multinomial_nb.hpp" #endif // SLOWMOKIT_HPP diff --git a/src/slowmokit/methods/neighbors/multinomial_nb.hpp b/src/slowmokit/methods/neighbors/multinomial_nb.hpp new file mode 100644 index 0000000..6c11eb3 --- /dev/null +++ b/src/slowmokit/methods/neighbors/multinomial_nb.hpp @@ -0,0 +1,13 @@ +/** + * @file methods/neighbors/multinomial_nb.hpp + * + * Easy include for Multinomial Nb algorithm + */ + + +#ifndef SLOWMOKIT_MULTINOMIAL_NB_HPP +#define SLOWMOKIT_MULTINOMIAL_NB_HPP + +#include "multinomial_nb/multinomial_nb.hpp" + +#endif // SLOWMOKIT_MULTINOMIAL_NB_HPP \ No newline at end of file diff --git a/src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.cpp b/src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.cpp new file mode 100644 index 0000000..29f04bf --- /dev/null +++ b/src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.cpp @@ -0,0 +1,98 @@ +/** + * @file methods/neighbors/multinomial_nb/multinomial_nb.cpp + * + * Implementation of the Multinomial Naive Bayes main program + */ +#include "multinomial_nb.hpp" + +template +double MultinomialNB::priorProb(std::vector yTrain, int label) +{ // Prior-Probability P(y) + int sum = 0; + for (int i = 0; i < yTrain.size(); i++) + { + if (yTrain[i] == label) + { + sum += 1; + } + } + return sum / double(yTrain.size()); +} + +template +double MultinomialNB::conditionalProb(std::vector> xTrain, + std::vector yTrain, int featureCol, + int featureVal, int label) +{ // Conditional-Probabilty P(x=f1 / y=class) + int denominator = 0, numerator = 0; + std::set varies_values; + for (int i = 0; i < yTrain.size(); i++) + { + if (yTrain[i] == label) + { // Check if label of feature is same as output looking for + denominator++; // All values with output_class we are looking for + varies_values.insert(xTrain[i][featureCol]); + if (xTrain[i][featureCol] == featureVal) + { + numerator++; // numerator increase only when that feature + // consists of value same as value in testing + // feature + } + } + } + return (numerator + 1) / + double(denominator + + varies_values + .size()); // as it is multinomial nb so to avoid overfitting + // numerator+1 -> because if word do not exist in training but exist in + // testing denominator + number_of_different_values_in_feature +} + +template +int MultinomialNB::fit_predict(std::vector> xTrain, + std::vector yTrain, std::vector classes, + std::vector xTest) +{ + int n_features = xTrain[0].size(); + + std::vector postProbs; + + std::vector::iterator label; // different ouput labels + for (label = classes.begin(); label != classes.end(); label++) + { // iterating over all labels (P(y=y1,y2,y3....)) + double likelihood = 1.0; + for (int i = 0; i < n_features; i++) + { + double cond = conditionalProb(xTrain, yTrain, i, xTest[i], *label); + likelihood += log10(cond); // Log10 of computed conditionl + // probabilty => Log10(P(x=x1 / y=label)) + } + + double prior = priorProb(yTrain, *label); + double post = prior * likelihood; + postProbs.push_back(post); + } + double sumpropProbs = 0.0; + int max = 0; + for (int i = 0; i < postProbs.size(); i++) + { // Iterating over all labels computed values to check if any negative + // value + // exists, if so then direclty values are compared else we will compute + // value/sum + if (postProbs[i] < 0) + { + sumpropProbs = 1; + break; + } + sumpropProbs += postProbs[i]; + } + for (int i = 0; i < postProbs.size(); i++) + { + postProbs[i] /= sumpropProbs; + if (postProbs[i] > postProbs[max]) + { + max = i; + } + } + return max; +}; \ No newline at end of file diff --git a/src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.hpp b/src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.hpp new file mode 100644 index 0000000..b69d9d0 --- /dev/null +++ b/src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.hpp @@ -0,0 +1,52 @@ +/** + * @file methods/neighbors/multinomial_nb/multinomial_nb.hpp + * + * The header file including the Multinomial Naive Bayes algorithm + */ + +#ifndef SLOWMOKIT_MULTINOMIAL_NB_HPP +#define SLOWMOKIT_MULTINOMIAL_NB_HPP + +#include "../../../core.hpp" + +template +class MultinomialNB +{ + private: + /** + * @brief Prior probability of multinomial naive bayes + * + * @param yTrain All y training values + * @param label output we are looking in prior probability + * @return double prior Probability + */ + double priorProb(std::vector yTrain, int label); + /** + * @brief Conditional Probability + * + * @param xTrain all x training int values + * @param yTrain all y training int values + * @param featureCol column for which we are computing conditional prob + * @param featureVal value in that column + * @param label y value + * @return double conditional probability + */ + double conditionalProb(std::vector> xTrain, + std::vector yTrain, int featureCol, + int featureVal, int label); + + public: + /** + * @brief fit_predict function + * + * @param xTrain all x training values + * @param yTrain all y training values int + * @param classes classes of y + * @param xTest testing values int + * @return int output predicted value + */ + int fit_predict(std::vector> xTrain, std::vector yTrain, + std::vector classes, std::vector xTest); +}; + +#endif \ No newline at end of file diff --git a/src/slowmokit/methods/preprocessing/csv_reader.cpp b/src/slowmokit/methods/preprocessing/csv_reader.cpp new file mode 100644 index 0000000..4a3f039 --- /dev/null +++ b/src/slowmokit/methods/preprocessing/csv_reader.cpp @@ -0,0 +1,83 @@ +/** + * @file methods/preprocessing/csv_reader.cpp + * + * Implementation of Csv reader + */ + +#include "csv_reader.hpp" + +template +std::vector> readCsv(std::string name, bool index) +{ + std::ifstream file; + file.open(name); + std::string output; + if (index) + { + getline(file, output); + } + getline(file, output); + bool duble = true; + std::vector isDouble; + std::string current = ""; + for (int i = 0; i < output.size(); i++) + { + if (output[i] == ',') + { + if (duble and current.size() > 0) + { + isDouble.push_back(1); + } + else + { + isDouble.push_back(0); + } + duble = true; + current = ""; + } + else + { + current += output[i]; + if ((output[i] >= '0' and output[i] <= '9') or output[i] == '.') + { + continue; + } + else + { + duble = false; + } + } + } + file.close(); + + // Storing all values as string with their data type at top of them + std::vector> csvFile; + std::vector first(isDouble.size()); + for (int i = 0; i < isDouble.size(); i++) + { + first[i] = std::to_string(isDouble[i]); + } + csvFile.push_back(first); + file.open(name); + + while (getline(file, output)) + { + first.clear(); + current = ""; + for (int i = 0; i < output.size(); i++) + { + if (output[i] == ',') + { + first.push_back(current); + current = ""; + } + else + { + current += output[i]; + } + } + csvFile.push_back(first); + } + + return csvFile; +} \ No newline at end of file diff --git a/src/slowmokit/methods/preprocessing/csv_reader.hpp b/src/slowmokit/methods/preprocessing/csv_reader.hpp new file mode 100644 index 0000000..56ab025 --- /dev/null +++ b/src/slowmokit/methods/preprocessing/csv_reader.hpp @@ -0,0 +1,15 @@ +/** + * @file methods/preprocessing/csv_reader.hpp + * + * Easy include To Return the csv read data + */ + +#ifndef SLOWMOKIT_CSV_READER_HPP +#define SLOWMOKIT_CSV_READER_HPP + +#include "../../core.hpp" + +template +std::vector> readCsv(std::string name, bool index); + +#endif // SLOWMOKIT_CSV_READER_HPP \ No newline at end of file