Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions docs/methods/preprocessing/csv_reader.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Csv Reader

It reads the csv File provided and accordingly set the data type in that particular column double/string

After computing the data type whole file is converted to string vector with their data types in first row of vector. Those values can be then converted to data-type on the runtime whenever required.

## Parameters

| Name | Definition | Type |
|--------------|--------------------------------------------|--------------|
| file | Csv file which needs to be read | `ifstream` |


## Methods

| Name | Definition | Return value |
|----------------------------------------|-----------------------------------------------|---------------|
| `readCsv(string name,bool index)` | To read csv file and allocate which column is double or string | `vector<vector<string>>` |

## Example

```
std::vector<std::vector<std::string>>reader = readCsv("../../lol.csv",1);
```
23 changes: 23 additions & 0 deletions examples/methods/neighbors/multinomial_nb.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// #include "../../src/slowmokit/methods/neighbors/multinomial_nb.hpp"
// #include "../../src/slowmokit/core.hpp"

// signed main(){
// std::vector<std::vector<int>> xTrain{
// {0,0,0},
// {1,0,0},
// {1,0,1},
// {1,0,0},
// {0,1,1},
// {1,0,0},
// {1,1,0},
// {0,1,1},
// {1,0,1},
// {1,1,1}
// };
// std::vector<int> yTrain{2,2,2,2,1,2,2,1,2,0};
// std::vector<int> xTest{0,0,0};
// std::vector<int> classes{0,1,2};
// MultinomialNB<int> multinomial;
// std::cout<<multinomial.predict(xTrain,yTrain,classes,xTest);
// return 0;
// }
6 changes: 6 additions & 0 deletions examples/methods/preprocessing/csv_reader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// #include "../../src/slowmokit/methods/preprocessing/csv_reader.hpp"

// signed main(){
// std::vector<std::vector<std::string>>reader = readCsv("../../lol.csv",1);
// return 0;
// }
1 change: 1 addition & 0 deletions src/slowmokit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@
#include "slowmokit/methods/neighbors/bernoulli_nb.hpp"
#include "slowmokit/methods/neighbors/gaussian_nb.hpp"
#include "slowmokit/methods/neighbors/knn.hpp"
#include "slowmokit/methods/neighbors/multinomial_nb.hpp"

#endif // SLOWMOKIT_HPP
13 changes: 13 additions & 0 deletions src/slowmokit/methods/neighbors/multinomial_nb.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* @file methods/neighbors/multinomial_nb.hpp
*
* Easy include for Multinomial Nb algorithm
*/


#ifndef SLOWMOKIT_MULTINOMIAL_NB_HPP
#define SLOWMOKIT_MULTINOMIAL_NB_HPP

#include "multinomial_nb/multinomial_nb.hpp"

#endif // SLOWMOKIT_MULTINOMIAL_NB_HPP
98 changes: 98 additions & 0 deletions src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* @file methods/neighbors/multinomial_nb/multinomial_nb.cpp
*
* Implementation of the Multinomial Naive Bayes main program
*/
#include "multinomial_nb.hpp"

template <class T>
double MultinomialNB<T>::priorProb(std::vector<T> yTrain, int label)
{ // Prior-Probability P(y)
int sum = 0;
for (int i = 0; i < yTrain.size(); i++)
{
if (yTrain[i] == label)
{
sum += 1;
}
}
return sum / double(yTrain.size());
}

template <class T>
double MultinomialNB<T>::conditionalProb(std::vector<std::vector<T>> xTrain,
std::vector<T> yTrain, int featureCol,
int featureVal, int label)
{ // Conditional-Probabilty P(x=f1 / y=class)
int denominator = 0, numerator = 0;
std::set<int> varies_values;
for (int i = 0; i < yTrain.size(); i++)
{
if (yTrain[i] == label)
{ // Check if label of feature is same as output looking for
denominator++; // All values with output_class we are looking for
varies_values.insert(xTrain[i][featureCol]);
if (xTrain[i][featureCol] == featureVal)
{
numerator++; // numerator increase only when that feature
// consists of value same as value in testing
// feature
}
}
}
return (numerator + 1) /
double(denominator +
varies_values
.size()); // as it is multinomial nb so to avoid overfitting
// numerator+1 -> because if word do not exist in training but exist in
// testing denominator + number_of_different_values_in_feature
}

template <class T>
int MultinomialNB<T>::fit_predict(std::vector<std::vector<T>> xTrain,
std::vector<T> yTrain, std::vector<T> classes,
std::vector<T> xTest)
{
int n_features = xTrain[0].size();

std::vector<double> postProbs;

std::vector<int>::iterator label; // different ouput labels
for (label = classes.begin(); label != classes.end(); label++)
{ // iterating over all labels (P(y=y1,y2,y3....))
double likelihood = 1.0;
for (int i = 0; i < n_features; i++)
{
double cond = conditionalProb(xTrain, yTrain, i, xTest[i], *label);
likelihood += log10(cond); // Log10 of computed conditionl
// probabilty => Log10(P(x=x1 / y=label))
}

double prior = priorProb(yTrain, *label);
double post = prior * likelihood;
postProbs.push_back(post);
}
double sumpropProbs = 0.0;
int max = 0;
for (int i = 0; i < postProbs.size(); i++)
{ // Iterating over all labels computed values to check if any negative
// value
// exists, if so then direclty values are compared else we will compute
// value/sum
if (postProbs[i] < 0)
{
sumpropProbs = 1;
break;
}
sumpropProbs += postProbs[i];
}
for (int i = 0; i < postProbs.size(); i++)
{
postProbs[i] /= sumpropProbs;
if (postProbs[i] > postProbs[max])
{
max = i;
}
}
return max;
};
52 changes: 52 additions & 0 deletions src/slowmokit/methods/neighbors/multinomial_nb/multinomial_nb.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/**
* @file methods/neighbors/multinomial_nb/multinomial_nb.hpp
*
* The header file including the Multinomial Naive Bayes algorithm
*/

#ifndef SLOWMOKIT_MULTINOMIAL_NB_HPP
#define SLOWMOKIT_MULTINOMIAL_NB_HPP

#include "../../../core.hpp"

template <class T>
class MultinomialNB
{
private:
/**
* @brief Prior probability of multinomial naive bayes
*
* @param yTrain All y training values
* @param label output we are looking in prior probability
* @return double prior Probability
*/
double priorProb(std::vector<T> yTrain, int label);
/**
* @brief Conditional Probability
*
* @param xTrain all x training int values
* @param yTrain all y training int values
* @param featureCol column for which we are computing conditional prob
* @param featureVal value in that column
* @param label y value
* @return double conditional probability
*/
double conditionalProb(std::vector<std::vector<T>> xTrain,
std::vector<T> yTrain, int featureCol,
int featureVal, int label);

public:
/**
* @brief fit_predict function
*
* @param xTrain all x training values
* @param yTrain all y training values int
* @param classes classes of y
* @param xTest testing values int
* @return int output predicted value
*/
int fit_predict(std::vector<std::vector<T>> xTrain, std::vector<T> yTrain,
std::vector<T> classes, std::vector<T> xTest);
};

#endif
83 changes: 83 additions & 0 deletions src/slowmokit/methods/preprocessing/csv_reader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/**
* @file methods/preprocessing/csv_reader.cpp
*
* Implementation of Csv reader
*/

#include "csv_reader.hpp"

template <class T>
std::vector<std::vector<std::string>> readCsv(std::string name, bool index)
{
std::ifstream file;
file.open(name);
std::string output;
if (index)
{
getline(file, output);
}
getline(file, output);
bool duble = true;
std::vector<int> isDouble;
std::string current = "";
for (int i = 0; i < output.size(); i++)
{
if (output[i] == ',')
{
if (duble and current.size() > 0)
{
isDouble.push_back(1);
}
else
{
isDouble.push_back(0);
}
duble = true;
current = "";
}
else
{
current += output[i];
if ((output[i] >= '0' and output[i] <= '9') or output[i] == '.')
{
continue;
}
else
{
duble = false;
}
}
}
file.close();

// Storing all values as string with their data type at top of them
std::vector<std::vector<std::string>> csvFile;
std::vector<std::string> first(isDouble.size());
for (int i = 0; i < isDouble.size(); i++)
{
first[i] = std::to_string(isDouble[i]);
}
csvFile.push_back(first);
file.open(name);

while (getline(file, output))
{
first.clear();
current = "";
for (int i = 0; i < output.size(); i++)
{
if (output[i] == ',')
{
first.push_back(current);
current = "";
}
else
{
current += output[i];
}
}
csvFile.push_back(first);
}

return csvFile;
}
15 changes: 15 additions & 0 deletions src/slowmokit/methods/preprocessing/csv_reader.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/**
* @file methods/preprocessing/csv_reader.hpp
*
* Easy include To Return the csv read data
*/

#ifndef SLOWMOKIT_CSV_READER_HPP
#define SLOWMOKIT_CSV_READER_HPP

#include "../../core.hpp"

template <class T>
std::vector<std::vector<std::string>> readCsv(std::string name, bool index);

#endif // SLOWMOKIT_CSV_READER_HPP