From 06c3650b1aaa0a6fe9b96476b77dc115e40c7291 Mon Sep 17 00:00:00 2001 From: SARTHAK Date: Mon, 6 Feb 2023 23:15:21 +0530 Subject: [PATCH 1/8] Errors of decision tree removed --- .../neighbors/decision_tree/decision_tree.cpp | 36 +++++++++---------- .../neighbors/decision_tree/decision_tree.hpp | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp b/src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp index 38a388c..38efb97 100644 --- a/src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp +++ b/src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp @@ -7,7 +7,7 @@ #include "decision_tree.hpp" template - double entropy(std::vector col){ + double DecisionTree::entropy(std::vector col){ std::set unique; for(int i=0;i } template - std::vector>> divideData(std::vector> xData,int fkey,int fval){ + std::vector>> DecisionTree::divideData(std::vector> xData,int fkey,int fval){ std::vector> xLeft; std::vector> xRight; for(int i=0;i } template - double infoGain(std::vector> xData,int fkey,int fval){ + double DecisionTree::infoGain(std::vector> xData,int fkey,int fval){ // Splitting data std::vector> left,right; std::vector>> temp = divideData(xData,fkey,fval); @@ -68,30 +68,30 @@ template for(int i=0;i DecisionTree::DecisionTree(int maxD,int minSamplesL,int maxF){ - maxDepth = maxD; - minSamplesLeaf = minSamplesL; - max Features = maxF; + this->maxDepth = maxD; + this->minSamplesLeaf = minSamplesL; + this->maxFeatures = maxF; } template - void train(std::vector> xData){ + void DecisionTree::train(std::vector> xData){ int max = xData[0].size()-2; int range = max+1; features.clear(); - while(features.size()!=max Features){ + while(features.size()!=maxFeatures){ features.insert(rand()%range); } - std::vector infoGains(max Features); - std::vector mean(max Features); + std::vector infoGains(maxFeatures); + std::vector mean(maxFeatures); int cnt=0; for(int i=0;i for(int j=0;jinfoGains[max]){ max=i; } @@ -148,10 +148,10 @@ template return; } - left = new DecisionTree(maxDepth,minSamplesLeaf,max Features); + left = new DecisionTree(maxDepth,minSamplesLeaf,maxFeatures); left->depth = depth+1; left->train(dataLeft); - right = new DecisionTree(maxDepth,minSamplesLeaf,max Features); + right = new DecisionTree(maxDepth,minSamplesLeaf,maxFeatures); right->depth = depth+1; right->train(dataRight); @@ -170,7 +170,7 @@ template } template - int predict(std::vector test){ + int DecisionTree::predict(std::vector test){ if(test[fkey]>fval){ if(right==NULL){ return target; diff --git a/src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp b/src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp index eb293b0..1816503 100644 --- a/src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp +++ b/src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp @@ -20,7 +20,7 @@ class DecisionTree{ int depth=0; int minSamplesLeaf=1; int target=-1; - int max Features; + int maxFeatures; std::set features; double entropy(std::vector col); std::vector>> divideData(std::vector> x_data,int fkey,int fval); From 49b04ebf4061c0c2060786bee64c6f1baa840ce0 Mon Sep 17 00:00:00 2001 From: SARTHAK Date: Tue, 7 Feb 2023 09:11:29 +0530 Subject: [PATCH 2/8] Changes in dt --- .../neighbors/decision_tree/decision_tree.hpp | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp b/src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp index 1816503..359cdd5 100644 --- a/src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp +++ b/src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp @@ -22,13 +22,53 @@ class DecisionTree{ int target=-1; int maxFeatures; std::set features; + /** + * @brief calculate entropy + * + * @param col + * @return double entropy value + */ double entropy(std::vector col); + /** + * @brief divides the data according to decided feature + * + * @param x_data training data + * @param fkey column on basis of which data is to be divided + * @param fval value on basis of which data is to be divided + * @return std::vector>> + */ std::vector>> divideData(std::vector> x_data,int fkey,int fval); + /** + * @brief information gain + * + * @param x_data training data + * @param fkey column to check highest info gain + * @param fval value to check highest info gain + * @return double value of information gain + */ double infoGain(std::vector> x_data,int fkey,int fval); public: + /** + * @brief Construct a new Decision Tree object + * + * @param max_d maximum depth + * @param minSamplesL minimum samples to be in leaf node by default 1 + * @param max_f maximum samples to be in leaf nodes + */ DecisionTree(int max_d,int minSamplesL,int max_f); + /** + * @brief prepare decision tree + * + * @param x_data all training values + */ void train(std::vector> x_data); + /** + * @brief predict the testing values + * + * @param test testing values + * @return int class to which test value belongs + */ int predict(std::vector test); }; From 5ced617c952423a414bca2303a0ff91f4e8903fe Mon Sep 17 00:00:00 2001 From: SARTHAK Date: Tue, 7 Feb 2023 17:10:32 +0530 Subject: [PATCH 3/8] changes --- .../methods/neighbors/decision_tree/decision_tree.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp b/src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp index 38efb97..2245d08 100644 --- a/src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp +++ b/src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp @@ -46,7 +46,7 @@ template } template - double DecisionTree::infoGain(std::vector> xData,int fkey,int fval){ + double DecisionTree::infoGain(std::vector> xData,int fkey,int fval){ // fkey refers to the feature/column index; fval refers to the mean value/splitting value // Splitting data std::vector> left,right; std::vector>> temp = divideData(xData,fkey,fval); @@ -76,7 +76,7 @@ template DecisionTree::DecisionTree(int maxD,int minSamplesL,int maxF){ this->maxDepth = maxD; this->minSamplesLeaf = minSamplesL; - this->maxFeatures = maxF; + this->maxFeatures = maxF; // maxFeatures is the number of features to be considered to do splitting at each node } template @@ -84,10 +84,10 @@ template int max = xData[0].size()-2; int range = max+1; - features.clear(); + features.clear(); // clearing features set to select features to be considered while(features.size()!=maxFeatures){ - features.insert(rand()%range); + features.insert(rand()%range); // Inserting features } std::vector infoGains(maxFeatures); From 64e89626db7a7f644c0b2aa93e55faf44155730e Mon Sep 17 00:00:00 2001 From: SARTHAK Date: Tue, 14 Feb 2023 22:47:18 +0530 Subject: [PATCH 4/8] cmake texts --- CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c458659..36e1b43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,6 @@ add_subdirectory(src/slowmokit) add_library(slowmokit src/slowmokit.hpp -<<<<<<< HEAD # base files src/slowmokit/base.hpp @@ -88,6 +87,4 @@ add_library(slowmokit src/slowmokit/methods/neighbors/decision_tree.hpp src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp) -======= src/slowmokit.cpp) ->>>>>>> a4af99b7c4f0a059aa30cc5d8d5fc22572e9db64 From 7c63ffbd682c62f350f6ab9045e09b2ad358648c Mon Sep 17 00:00:00 2001 From: SARTHAK Date: Tue, 14 Feb 2023 23:01:31 +0530 Subject: [PATCH 5/8] cmake texts --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 36e1b43..8a97849 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ add_subdirectory(src/slowmokit) add_library(slowmokit src/slowmokit.hpp + src/slowmokit.cpp # base files src/slowmokit/base.hpp From f49b5b47deedfea038352f53b2949fc6d25c0b09 Mon Sep 17 00:00:00 2001 From: SARTHAK Date: Wed, 15 Feb 2023 22:40:37 +0530 Subject: [PATCH 6/8] cmake errors last line --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a97849..b818f1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,5 +87,4 @@ add_library(slowmokit src/slowmokit/methods/metrics/silhouette_score.hpp src/slowmokit/methods/neighbors/decision_tree.hpp src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp - src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp) - src/slowmokit.cpp) + src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp) \ No newline at end of file From 398432f0aaf0ace07a08b75e2bca7181768f49e3 Mon Sep 17 00:00:00 2001 From: SARTHAK Date: Wed, 15 Feb 2023 22:50:39 +0530 Subject: [PATCH 7/8] matrix.cpp missing in ducks. Done manually --- src/slowmokit/ducks/matrix/matrix.cpp | 180 ++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 src/slowmokit/ducks/matrix/matrix.cpp diff --git a/src/slowmokit/ducks/matrix/matrix.cpp b/src/slowmokit/ducks/matrix/matrix.cpp new file mode 100644 index 0000000..a3311c1 --- /dev/null +++ b/src/slowmokit/ducks/matrix/matrix.cpp @@ -0,0 +1,180 @@ +/** + * @file ducks/matrix/matrix.cpp + * + * Implementation of the matrix main program + */ + +#include "matrix.hpp" + +template Matrix::Matrix(int n, int m) : n(n), m(m) +{ + if (n <= 0 or m <= 0) + throw std::out_of_range("\nCannot have non-positive dimension."); + + mat.resize(n, std::vector(m, T(0))); +} + +template Matrix::Matrix(const std::vector> in) +{ + if (std::size(in) <= 0 or std::size(in[0]) <= 0) + throw std::out_of_range("\nCannot have non-positive dimension."); + + n = std::size(in); + m = std::size(in[0]); + mat.resize(n, std::vector(m)); + + for (int i = 0; i < n; i++) + { + if (std::size(in[i]) != m) + throw std::invalid_argument("\nAll rows must have same dimension"); + + for (int j = 0; j < m; j++) + this->mat[i][j] = in[i][j]; + } +} + +template Matrix &Matrix::operator*=(const T &scalar) +{ + for (int i = 0; i < n; i++) + { + for (int j = 0; j < m; j++) + mat[i][j] *= scalar; + } + + return *this; +} + +template Matrix &Matrix::operator*=(const Matrix &rhs) +{ + auto [n2, m2] = rhs.getShape(); + + if (n2 <= 0 or m2 <= 0) + throw std::out_of_range("\nCannot have non-positive dimension."); + + if (m != n2) + throw std::invalid_argument("\nColumn dimension of matrix-1 must be equal " + "to row dimension of matrix-2"); + + auto lhs = this->mat; + std::vector res(n, std::vector(m2, T(0))); + + for (int i = 0; i < n; i++) + { + for (int j = 0; j < m2; j++) + { + for (int k = 0; k < m; k++) + res[i][j] += lhs[i][k] * rhs[k][j]; + } + } + + this->mat = res; + updateShape(); + + return *this; +} + +template Matrix &Matrix::operator+=(const Matrix &rhs) +{ + auto [n2, m2] = rhs.getShape(); + + if (n2 <= 0 or m2 <= 0) + throw std::out_of_range("\nCannot have non-positive dimension."); + + if (n != n2 or m != m2) + throw std::invalid_argument( + "\nBoth Dimension of matrix-1 must be equal to that of matrix-2"); + + for (int i = 0; i < n; i++) + { + for (int j = 0; j < m; j++) + this->mat[i][j] += rhs[i][j]; + } + + return *this; +} + +template Matrix &Matrix::operator-=(const Matrix &rhs) +{ + auto [n2, m2] = rhs.getShape(); + + if (n2 <= 0 or m2 <= 0) + throw std::out_of_range("\nCannot have non-positive dimension."); + + if (n != n2 or m != m2) + throw std::invalid_argument( + "\nBoth Dimension of matrix-1 must be equal to that of matrix-2"); + + for (int i = 0; i < n; i++) + { + for (int j = 0; j < m; j++) + this->mat[i][j] -= rhs[i][j]; + } + + return *this; +} + +template std::array Matrix::getShape() const +{ + return {this->n, this->m}; +} + +template T &Matrix::operator()(int i, int j) +{ + if (i >= n or i < 0) + throw std::out_of_range("\ni should be between 0 and " + + std::to_string(n - 1) + " inclusive"); + if (j >= m or j < 0) + throw std::out_of_range("\nj should be between 0 and " + + std::to_string(m - 1) + " inclusive"); + + return mat[i][j]; +} + +template const std::vector &Matrix::operator[](int i) const +{ + if (i >= n or i < 0) + throw std::out_of_range("\ni should be between 0 and " + + std::to_string(n - 1) + " inclusive"); + + return this->mat[i]; +} + +template +std::ostream &operator<<(std::ostream &os, const Matrix &matrix) +{ + int n = std::size(matrix); + int m = std::size(matrix[0]); + + for (int i = 0; i < n; i++) + { + for (int j = 0; j < m; j++) + { + if (j > 0) + os << " "; + os << matrix[i][j]; + } + + if (i != n - 1) + os << "\n"; + } + + return os; +} + +template Matrix operator*(Matrix lhs, const Matrix &rhs) +{ + lhs *= rhs; + return lhs; +} + +template Matrix operator+(Matrix lhs, const Matrix &rhs) +{ + lhs += rhs; + return lhs; +} + +template Matrix operator-(Matrix lhs, const Matrix &rhs) +{ + lhs -= rhs; + return lhs; +} \ No newline at end of file From 2dd60d54516e40d4033f39f9e600ef02912f6aab Mon Sep 17 00:00:00 2001 From: SARTHAK Date: Fri, 17 Feb 2023 19:16:23 +0530 Subject: [PATCH 8/8] cmake till slowmokit.cpp --- CMakeLists.txt | 62 +------------------------------------------------- 1 file changed, 1 insertion(+), 61 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bba666..0151956 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,64 +25,4 @@ add_subdirectory(src/slowmokit) add_library(slowmokit src/slowmokit.hpp - src/slowmokit.cpp - - # base files - src/slowmokit/base.hpp - src/slowmokit/core.hpp - src/slowmokit/prereqs.hpp - - # ducks - src/slowmokit/ducks/ducks.hpp - src/slowmokit/ducks/io/io.hpp - src/slowmokit/ducks/io/io.cpp - src/slowmokit/ducks/matrix/matrix.hpp - src/slowmokit/ducks/matrix/matrix.cpp - - # methods - src/slowmokit/models/model.hpp - src/slowmokit/methods/neighbors/knn/knn.cpp - src/slowmokit/methods/neighbors/knn/knn.hpp - src/slowmokit/methods/linear_model/linear_regression.hpp - src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp - src/slowmokit/methods/linear_model/linear_regression/linear_regression.cpp - src/slowmokit/methods/cluster/kMeans/kMeans.cpp - src/slowmokit/methods/cluster/kMeans/kMeans.hpp - src/slowmokit/methods/cluster/kMeans.hpp - src/slowmokit/methods/metrics/accuracy.hpp - src/slowmokit/methods/metrics/accuracy.cpp - src/slowmokit/methods/preprocessing/normalization.hpp - src/slowmokit/methods/preprocessing/normalization.cpp - src/slowmokit/methods/preprocessing/standardization.hpp - src/slowmokit/methods/preprocessing/standardization.cpp - src/slowmokit/methods/neighbors/bernoulli_nb.hpp - src/slowmokit/methods/neighbors/bernoulli_nb/bernoulli_nb.hpp - src/slowmokit/methods/neighbors/bernoulli_nb/bernoulli_nb.cpp - src/slowmokit/methods/linear_model/logistic_regression.hpp - src/slowmokit/methods/linear_model/logistic_regression/logistic_regression.hpp - src/slowmokit/methods/linear_model/logistic_regression/logistic_regression.cpp - src/slowmokit/methods/preprocessing/label_encoder.cpp - src/slowmokit/methods/preprocessing/label_encoder.hpp - src/slowmokit/methods/metrics/classification_report.hpp - src/slowmokit/methods/metrics/classification_report.cpp - src/slowmokit/methods/neighbors/gaussian_nb.hpp - src/slowmokit/methods/neighbors/gaussian_nb/gaussian_nb.cpp - src/slowmokit/methods/neighbors/gaussian_nb/gaussian_nb.hpp - src/slowmokit/methods/neighbors/knn.hpp - src/slowmokit/methods/neighbors/knn/knn.hpp - src/slowmokit/methods/neighbors/knn/knn.cpp - src/slowmokit/methods/preprocessing/one_hot_encoder.hpp - src/slowmokit/methods/preprocessing/one_hot_encoder.cpp - src/slowmokit/methods/metrics/precision.hpp - src/slowmokit/methods/metrics/precision.cpp - src/slowmokit/methods/metrics/recall.hpp - src/slowmokit/methods/metrics/recall.cpp - src/slowmokit/methods/metrics/f1score.hpp - src/slowmokit/methods/metrics/f1score.cpp - src/slowmokit/methods/metrics/mean_squared_error.hpp - src/slowmokit/methods/metrics/mean_squared_error.cpp - src/slowmokit/methods/metrics/silhouette_score.cpp - src/slowmokit/methods/metrics/silhouette_score.hpp - src/slowmokit/methods/neighbors/decision_tree.hpp - src/slowmokit/methods/neighbors/decision_tree/decision_tree.hpp - src/slowmokit/methods/neighbors/decision_tree/decision_tree.cpp) \ No newline at end of file + src/slowmokit.cpp) \ No newline at end of file