diff --git a/docs/methods/linear_model/lasso_regularization.md b/docs/methods/linear_model/lasso_regularization.md new file mode 100644 index 0000000..72a19ad --- /dev/null +++ b/docs/methods/linear_model/lasso_regularization.md @@ -0,0 +1,50 @@ +# Lasso Regularization + +It stands for Least Absolute Shrinkage and Selection Operator. It adds L1 the penalty. + +L1 is the sum of the absolute value of the beta coefficients + +## Parameters + +| Name | Definition | Defaults | Type | +| ------------- | ------------------------------------------------------------------------------------------- | -------- | ---------| +| lambda | Constant that multiplies the L1 term, controlling regularization strength | 0.01 | `double` | + + +## Attributes + +| Name | Definition | Shape | +| ------------ | --------------------------------------------------------- | ---------- | +| Coefficients | Estimated coefficients for the lasso regularization | n_features | + +## Methods + +| Name | Definition | Return value | +| ------------------------------- | ----------------------------------------------------- | ----------------- | +| `lossFunction(vector> x, vector y)` |regularize loss function | `double` | +| `gradient(vector> x,vector y)` | Find gradient | `vector` | +| `gradientDescent(vector> x, vector y, double alpha)` | gradient optimization algorithm | `void` | +| `fit(vector> x, vector y,int epochs,double alpha)` | Fit linear model | `vector` | +| `predict(vector> x)` | Predict using the linear model | `vector` | +| `printCoefficients()` | Print coefficient of determination of the prediction | `void` | + +## Example + +```cpp +double alpha = 0.01; +std::vector> x = {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}, {4, 5, 6}}; +std::vector y = {2, 3, 4, 5}; +double lambda = 0.1; +int epochs = 1000; +RidgeRegularization model(alpha); +model.fit(x, y, epochs, alpha); +model.printCoefficients(); +std::vector yPred; +for (int i = 0; i < x.size(); i++) +{ + yPred.push_back(predict(x[i])); +} +for (int i = 0; i < y.size(); i++) + std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i]<< std::endl; + +``` diff --git a/docs/methods/linear_model/ridge_regularization.md b/docs/methods/linear_model/ridge_regularization.md new file mode 100644 index 0000000..eadd22f --- /dev/null +++ b/docs/methods/linear_model/ridge_regularization.md @@ -0,0 +1,50 @@ +# Ridge Regularization + +It adds L2 as the penalty. + +L2 is the sum of the square of the magnitude of beta coefficients. + +## Parameters + +| Name | Definition | Defaults | Type | +| ------------- | ------------------------------------------------------------------------------------------- | -------- | ---------| +| lambda | Constant that multiplies the L1 term, controlling regularization strength | 0.01 | `double` | + + +## Attributes + +| Name | Definition | Shape | +| ------------ | --------------------------------------------------------- | ---------- | +| Coefficients | Estimated coefficients for the ridge regularization | n_features | + +## Methods + +| Name | Definition | Return value | +| ------------------------------- | ----------------------------------------------------- | ----------------- | +| `lossFunction(vector x, double y)` |regularize loss function | `double` | +| `gradient(vector x,double y)` | Find gradient | `vector` | +| `gradientDescent(vector> x, vector y, double alpha,int epochs)` | gradient optimization algorithm | `void` | +| `fit(vector> x, vector y,int epochs,double alpha)` | Fit linear model | `vector` | +| `predict(vector x)` | Predict using the linear model | `double` | +| `printCoefficients()` | Print coefficient of determination of the prediction | `void` | + +## Example + +```cpp +double alpha = 0.01; +std::vector> x = {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}, {4, 5, 6}}; +std::vector y = {2, 3, 4, 5}; +double lambda = 0.1; +int epochs = 1000; +RidgeRegularization model(alpha); +model.fit(x, y, epochs, alpha); +model.printCoefficients(); +std::vector yPred; +for (int i = 0; i < x.size(); i++) +{ + yPred.push_back(predict(x[i])); +} +for (int i = 0; i < y.size(); i++) + std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] << std::endl; + +``` diff --git a/docs/methods/metrics/silhouette_score.md b/docs/methods/metrics/silhouette_score.md deleted file mode 100644 index 27d8b78..0000000 --- a/docs/methods/metrics/silhouette_score.md +++ /dev/null @@ -1,33 +0,0 @@ -# Silhouette Score - -The silhouette score is calculated as the average of all points si, where si is the difference of minimum of inter cluster distance to average of intra cluster distance divided by maximum of both. -The silhouette Score reflects how good the clusters are. - -## Parameters - -| Name | Definition | Type | -| ------------- | ------------------------------------------------------------------------------------------- | ----------------| -| numClusters | Takes a number of clusters | `int` | -| typeDist | You require euclidean/manhattan distance to compute silhoutte Score | `string` | -| x | Values of various points | `vector>` | -| y | Cluster number to which each x value belongs to | `vector` | - - -## Methods - -| Name | Definition | Return value | -| -----------------------------------------------| ----------------------------------------------------- | ----------------- | -| `silhouetteScore(vector>x,vector y,int numClusters,string typeDist)`|To find the silhoutte score| `double` | - -## Example - -```cpp - -std::vector> x{{1,2,3},{1.21,2.32,3.24},{5.56,5.45,5.23},{5.35,5.00,5.78}}; -std::vector y{0,0,1,1}; -int numClusters=2; -std::string s = "euclidean"; -double score = silhoutteScore(x,y,numClusters,s); -std::cout< int main() { diff --git a/examples/linear_model/lasso_regression.cpp b/examples/linear_model/lasso_regression.cpp new file mode 100644 index 0000000..5048129 --- /dev/null +++ b/examples/linear_model/lasso_regression.cpp @@ -0,0 +1,22 @@ +// #include "../src/slowmokit/methods/linear_model/lasso_regression.hpp" + +// int main() +// { +// std::vector> x = { +// {1.0, 2.0, 3.0}, {2.0, 3.0, 4.0}, {3.0, 4.0, 5.0}}; +// std::vector y = {1.0, 2.0, 3.0}; +// double alpha = 0.01; +// double lambda = 0.1; +// int epochs = 100; + +// LassoRegularization model(alpha); +// model.fit(x, y, epochs, alpha); +// model.printCoefficients(); + +// std::vector yPred = model.predict(x); +// for (int i = 0; i < y.size(); i++) +// std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] +// << std::endl; + +// return 0; +// } \ No newline at end of file diff --git a/examples/linear_model/linear_regression.cpp b/examples/linear_model/linear_regression.cpp new file mode 100644 index 0000000..95e8807 --- /dev/null +++ b/examples/linear_model/linear_regression.cpp @@ -0,0 +1,18 @@ +#include "../../src/slowmokit/methods/linear_model/linear_regression.hpp" +#include "../../src/slowmokit/core.hpp" + +int main() +{ + LinearRegression model; + std::vector> x = {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}; + std::vector y = {2, 3, 4, 5, 6}; + + model.fit(x, y); + model.printCoefficients(); + + std::vector yPred = model.predict(x); + for (int i = 0; i < y.size(); i++) + cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] << endl; + + return 0; +} diff --git a/examples/methods/linear_model/logistic_regression.cpp b/examples/linear_model/logistic_regression.cpp similarity index 85% rename from examples/methods/linear_model/logistic_regression.cpp rename to examples/linear_model/logistic_regression.cpp index ada7f4e..67792c5 100644 --- a/examples/methods/linear_model/logistic_regression.cpp +++ b/examples/linear_model/logistic_regression.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/linear_model/logistic_regression.hpp" +// #include "../../src/slowmokit/methods/linear_model/logistic_regression.hpp" +// #include "../../src/slowmokit/core.hpp" // int main() // { diff --git a/examples/linear_model/ridge_regression.cpp b/examples/linear_model/ridge_regression.cpp new file mode 100644 index 0000000..c4afe1e --- /dev/null +++ b/examples/linear_model/ridge_regression.cpp @@ -0,0 +1,25 @@ +// #include "../src/slowmokit/methods/linear_model/ridge_regression.hpp" + +// int main() +// { +// double alpha = 0.01; +// std::vector> x = { +// {1, 2, 3}, {2, 3, 4}, {3, 4, 5}, {4, 5, 6}}; +// std::vector y = {2, 3, 4, 5}; + +// double lambda = 0.1; +// int epochs = 1000; +// RidgeRegularization model(alpha); +// model.fit(x, y, epochs, alpha); +// model.printCoefficients(); +// std::vector yPred; +// for (int i = 0; i < x.size(); i++) +// { +// yPred.push_back(predict(x[i])); +// } +// for (int i = 0; i < y.size(); i++) +// std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] +// << std::endl; + +// return 0; +// } diff --git a/examples/matrix.cpp b/examples/matrix.cpp index 677311c..e5a3707 100644 --- a/examples/matrix.cpp +++ b/examples/matrix.cpp @@ -3,9 +3,7 @@ int main() { int n = 3, m = 3; - Matrix mat(2, 2); - // std::cout << mat << " 2"; return 0; -} \ No newline at end of file +} diff --git a/examples/methods/linear_model/linear_regression.cpp b/examples/methods/linear_model/linear_regression.cpp deleted file mode 100644 index dc58d05..0000000 --- a/examples/methods/linear_model/linear_regression.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// #include "../../../src/slowmokit/methods/linear_model/linear_regression.hpp" - -// int main() -// { -// LinearRegression model; -// std::vector> x = {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}; -// std::vector y = {2, 3, 4, 5, 6}; - -// model.fit(x, y); -// model.printCoefficients(); - -// std::vector yPred = model.predict(x); -// for (int i = 0; i < y.size(); i++) -// cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] << endl; - -// return 0; -// } diff --git a/examples/methods/metrics/silhouette_score.cpp b/examples/methods/metrics/silhouette_score.cpp deleted file mode 100644 index 29120a8..0000000 --- a/examples/methods/metrics/silhouette_score.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// #include "../../src/slowmokit/methods/cluster/silhouette_score.hpp" -// #include "../../../core.hpp" - -// signed main(){ -// std::vector> x{ -// {1,2,3}, -// {1.21,2.32,3.24}, -// {5.56,5.45,5.23}, -// {5.35,5.00,5.78} -// }; -// std::vector y{0,0,1,1}; -// int numClusters=2; -// std::string s = "euclidean"; -// double score = silhouetteScore(x,y,numClusters,s); - // std::cout< pred = {1, 0, 1, 1, 0, 1}; diff --git a/examples/methods/metrics/classification_report_eg.cpp b/examples/metrics/classification_report_eg.cpp similarity index 78% rename from examples/methods/metrics/classification_report_eg.cpp rename to examples/metrics/classification_report_eg.cpp index 5b68a3c..605da7a 100644 --- a/examples/methods/metrics/classification_report_eg.cpp +++ b/examples/metrics/classification_report_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/classification_report.hpp" +// #include "../src/slowmokit/methods/metrics/classification_report.hpp" // int main() // { diff --git a/examples/methods/metrics/f1score_eg.cpp b/examples/metrics/f1score_eg.cpp similarity index 100% rename from examples/methods/metrics/f1score_eg.cpp rename to examples/metrics/f1score_eg.cpp diff --git a/examples/methods/metrics/mean_squared_error_eg.cpp b/examples/metrics/mean_squared_error_eg.cpp similarity index 78% rename from examples/methods/metrics/mean_squared_error_eg.cpp rename to examples/metrics/mean_squared_error_eg.cpp index a427daf..3fe8489 100644 --- a/examples/methods/metrics/mean_squared_error_eg.cpp +++ b/examples/metrics/mean_squared_error_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/mean_squared_error.hpp" +// #include "../src/slowmokit/methods/metrics/mean_squared_error.hpp" // int main() { // std::vector actual = {1.0, 2.0, 3.0}; // std::vector pred = {0.5, 1.5, 2.5}; diff --git a/examples/methods/metrics/precision_eg.cpp b/examples/metrics/precision_eg.cpp similarity index 86% rename from examples/methods/metrics/precision_eg.cpp rename to examples/metrics/precision_eg.cpp index 33b354e..f41f005 100644 --- a/examples/methods/metrics/precision_eg.cpp +++ b/examples/metrics/precision_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/precision.hpp" +// #include "../src/slowmokit/methods/metrics/precision.hpp" // int main() // { // std::vector pred = {0, 1, 2, 1, 0, 2, 1, 0, 1, 2}; diff --git a/examples/methods/metrics/recall_eg.cpp b/examples/metrics/recall_eg.cpp similarity index 86% rename from examples/methods/metrics/recall_eg.cpp rename to examples/metrics/recall_eg.cpp index ee0c911..cb196ac 100644 --- a/examples/methods/metrics/recall_eg.cpp +++ b/examples/metrics/recall_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/recall.hpp" +// #include "../src/slowmokit/methods/metrics/recall.hpp" // int main() // { diff --git a/examples/methods/neighbors/bernoulli_nb.cpp b/examples/neighbors/bernoulli_nb.cpp similarity index 81% rename from examples/methods/neighbors/bernoulli_nb.cpp rename to examples/neighbors/bernoulli_nb.cpp index bb17dfb..8d707d2 100644 --- a/examples/methods/neighbors/bernoulli_nb.cpp +++ b/examples/neighbors/bernoulli_nb.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/neighbors/bernoulli_nb.hpp" +// #include "../../src/slowmokit/methods/neighbors/bernoulli_nb.hpp" +// #include "../../src/slowmokit/core.hpp" // signed main(){ // std::vector> xTrain{ diff --git a/examples/methods/neighbors/gaussian_nb.cpp b/examples/neighbors/gaussian_nb.cpp similarity index 82% rename from examples/methods/neighbors/gaussian_nb.cpp rename to examples/neighbors/gaussian_nb.cpp index 79ed8a3..ead14c3 100644 --- a/examples/methods/neighbors/gaussian_nb.cpp +++ b/examples/neighbors/gaussian_nb.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/neighbors/gaussian_nb.hpp" +// #include "../../src/slowmokit/methods/neighbors/gaussian_nb.hpp" +// #include "../../src/slowmokit/core.hpp" // signed main(){ // std::vector> x_train{ diff --git a/examples/methods/neighbors/knn.cpp b/examples/neighbors/knn.cpp similarity index 78% rename from examples/methods/neighbors/knn.cpp rename to examples/neighbors/knn.cpp index 0d39c76..5eb1a00 100644 --- a/examples/methods/neighbors/knn.cpp +++ b/examples/neighbors/knn.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/neighbors/knn.hpp" +// #include "../../src/slowmokit/methods/neighbors/knn.hpp" +// #include "../../src/slowmokit/core.hpp" // signed main(){ // std::vector> x{ diff --git a/examples/methods/preprocessing/label_encoder.cpp b/examples/preprocessing/label_encoder.cpp similarity index 79% rename from examples/methods/preprocessing/label_encoder.cpp rename to examples/preprocessing/label_encoder.cpp index 5e8ce5f..0a54461 100644 --- a/examples/methods/preprocessing/label_encoder.cpp +++ b/examples/preprocessing/label_encoder.cpp @@ -1,4 +1,4 @@ -//#include "../../src/slowmokit/methods/preprocessing/label_encoder.hpp" +//#include "src/slowmokit/methods/preprocessing/label_encoder.hpp" //int main() { // std::vector data = {"luffy","zoro","sanji","luffy","law","zoro"}; diff --git a/examples/methods/preprocessing/normalization_eg.cpp b/examples/preprocessing/normalization_eg.cpp similarity index 73% rename from examples/methods/preprocessing/normalization_eg.cpp rename to examples/preprocessing/normalization_eg.cpp index 255ad2b..fb28f4e 100644 --- a/examples/methods/preprocessing/normalization_eg.cpp +++ b/examples/preprocessing/normalization_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/preprocessing/normalization.hpp" +// #include "../src/slowmokit/methods/preprocessing/normalization.hpp" // int main(){ // std::vector values={1,2,3,4,5}; // normalize(values); diff --git a/examples/methods/preprocessing/one_hot_encoder_eg.cpp b/examples/preprocessing/one_hot_encoder_eg.cpp similarity index 84% rename from examples/methods/preprocessing/one_hot_encoder_eg.cpp rename to examples/preprocessing/one_hot_encoder_eg.cpp index b11846d..9fe6529 100644 --- a/examples/methods/preprocessing/one_hot_encoder_eg.cpp +++ b/examples/preprocessing/one_hot_encoder_eg.cpp @@ -1,4 +1,4 @@ -//#include "../../src/slowmokit/methods/preprocessing/one_hot_encoder.hpp" +//#include "src/slowmokit/methods/preprocessing/one_hot_encoder.hpp" //int main() { // std::vector data = {"apples", "banana", "mango", "pear", "mango","apples","pear"}; diff --git a/examples/methods/preprocessing/standardization_eg.cpp b/examples/preprocessing/standardization_eg.cpp similarity index 73% rename from examples/methods/preprocessing/standardization_eg.cpp rename to examples/preprocessing/standardization_eg.cpp index a51a622..fb9c5fc 100644 --- a/examples/methods/preprocessing/standardization_eg.cpp +++ b/examples/preprocessing/standardization_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/preprocessing/standardization.hpp" +// #include "../src/slowmokit/methods/preprocessing/standardization.hpp" // int main(){ // std::vector values={1,2,3,4,5}; diff --git a/src/slowmokit.cpp b/src/slowmokit.cpp deleted file mode 100644 index 0d55b17..0000000 --- a/src/slowmokit.cpp +++ /dev/null @@ -1,7 +0,0 @@ -/** - * @file slowmokit.cpp - * - * File important to read the cpp files inside the src/ - * Just a necessary include, no edits should be made to this file - * - */ \ No newline at end of file diff --git a/src/slowmokit/CMakeLists.txt b/src/slowmokit/CMakeLists.txt deleted file mode 100644 index e57d108..0000000 --- a/src/slowmokit/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -# Recurse into methods/ to get the definitions of any bindings. -#add_subdirectory(methods) - -# At install time, we simply install the src/ directory to include/ -install(FILES - "${CMAKE_CURRENT_SOURCE_DIR}/../slowmokit.hpp" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") - -install(FILES - "${CMAKE_CURRENT_SOURCE_DIR}/base.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/core.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/prereqs.hpp" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/slowmokit/") - -install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/methods" DESTINATION - "${CMAKE_INSTALL_INCLUDEDIR}/slowmokit") \ No newline at end of file diff --git a/src/slowmokit/core.hpp b/src/slowmokit/core.hpp index c72eec4..0684181 100644 --- a/src/slowmokit/core.hpp +++ b/src/slowmokit/core.hpp @@ -10,4 +10,7 @@ // pre-requisites of the library #include "prereqs.hpp" +// standard model class +#include "models/model.hpp" + #endif // SLOWMOKIT_CORE_HPP diff --git a/src/slowmokit/ducks/matrix/matrix_main.cpp b/src/slowmokit/ducks/matrix/matrix.cpp similarity index 98% rename from src/slowmokit/ducks/matrix/matrix_main.cpp rename to src/slowmokit/ducks/matrix/matrix.cpp index bc71834..0e63bc2 100644 --- a/src/slowmokit/ducks/matrix/matrix_main.cpp +++ b/src/slowmokit/ducks/matrix/matrix.cpp @@ -1,7 +1,7 @@ /** - * @file ducks/matrix/matrix_main.cpp + * @file ducks/matrix/matrix.cpp * - * Implementation of the main methods of Matrix + * Implementation of the matrix main program */ #include "matrix.hpp" diff --git a/src/slowmokit/ducks/matrix/matrix.hpp b/src/slowmokit/ducks/matrix/matrix.hpp index d58f85d..6a6343c 100644 --- a/src/slowmokit/ducks/matrix/matrix.hpp +++ b/src/slowmokit/ducks/matrix/matrix.hpp @@ -91,6 +91,16 @@ template class Matrix std::array getShape() const; + /** + * @brief Function for taking dot product of current matrix with another + * matrix + * @param rhs: Take dot product of current Matrix with `rhs` + * @returns: A New Matrix after taking dot product with matrix `rhs` + * @throws: std::invalid_argument incase of incompatible dimensions + */ + Matrix &dot(const Matrix &); + + /** * @brief overloading += operator for adding another matrix to existing matrix * @param1: Matrix `rhs` which is to be added @@ -149,13 +159,14 @@ template class Matrix * @throw: whatever operator *= throws */ Matrix subtract(const Matrix); - - /** - * @brief overloading << for easy printing of Matrix - */ - friend std::ostream &operator<< (std::ostream &, const Matrix &); }; + +/** + * @brief overloading << for easy printing of Matrix + */ +template std::ostream &operator<<(std::ostream &, const Matrix &); + /** * @brief Free Function to multiply a matrix to a number or another matrix * @param lhs: A number or a Matrix @@ -187,4 +198,4 @@ template Matrix operator-(Matrix, const G &); template Matrix operator-(Matrix, const Matrix &); -#endif // SLOWMOKIT_IO_HPP \ No newline at end of file +#endif // SLOWMOKIT_IO_HPP diff --git a/src/slowmokit/ducks/matrix/matrix_free.cpp b/src/slowmokit/ducks/matrix/matrix_free.cpp deleted file mode 100644 index cfc57fe..0000000 --- a/src/slowmokit/ducks/matrix/matrix_free.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/** - * @file ducks/matrix/matrix_free.cpp - * - * Implementation of the free functions associated with matrix - */ - -#include "matrix.hpp" - -template Matrix operator*(Matrix lhs, const Matrix &rhs) -{ - lhs *= rhs; - return lhs; -} - -template Matrix operator+(Matrix lhs, const Matrix &rhs) -{ - lhs += rhs; - return lhs; -} - -template Matrix operator-(Matrix lhs, const Matrix &rhs) -{ - lhs -= rhs; - return lhs; -} - -template Matrix operator+(G num, const Matrix &matrix) -{ - Matrix res = matrix; - res += num; - return res; -} - -template Matrix operator-(G num, const Matrix &matrix) -{ - Matrix res = matrix; - res -= num; - return res; -} - -template Matrix operator*(G num, const Matrix &matrix) -{ - Matrix res = matrix; - res *= num; - return res; -} - -template Matrix operator+(Matrix matrix, const G &num) -{ - matrix += num; - return matrix; -} - -template Matrix operator-(Matrix matrix, const G &num) -{ - matrix -= num; - return matrix; -} - -template Matrix operator*(Matrix matrix, const G &num) -{ - matrix *= num; - return matrix; -} - -template Matrix Matrix::matmul(const Matrix rhs) -{ - Matrix res = *this; - res *= rhs; - return res; -} - -template Matrix Matrix::add(const Matrix rhs) -{ - Matrix res = *this; - res += rhs; - return res; -} - -template Matrix Matrix::subtract(const Matrix rhs) -{ - Matrix res = *this; - res -= rhs; - return res; -} \ No newline at end of file diff --git a/src/slowmokit/methods/cluster/kMeans.hpp b/src/slowmokit/methods/cluster/kMeans.hpp index 82cdd94..a2b006e 100644 --- a/src/slowmokit/methods/cluster/kMeans.hpp +++ b/src/slowmokit/methods/cluster/kMeans.hpp @@ -4,9 +4,9 @@ * Easy include for kMeans algorithm */ -#ifndef SLOWMOKIT_KMEANS_HPP_MAIN -#define SLOWMOKIT_KMEANS_HPP_MAIN +#ifndef SLOWMOKIT_KMEANS_HPP +#define SLOWMOKIT_KMEANS_HPP #include "kMeans/kMeans.hpp" -#endif // SLOWMOKIT_KMEANS_HPP_MAIN +#endif // SLOWMOKIT_KMEANS_HPP diff --git a/src/slowmokit/methods/cluster/kMeans/kMeans.cpp b/src/slowmokit/methods/cluster/kMeans/kMeans.cpp index 1418414..b637584 100644 --- a/src/slowmokit/methods/cluster/kMeans/kMeans.cpp +++ b/src/slowmokit/methods/cluster/kMeans/kMeans.cpp @@ -3,7 +3,6 @@ * * Implementation of the K-Means main program */ - #include "kMeans.hpp" template void kMeans::fit(std::vector> X) diff --git a/src/slowmokit/methods/cluster/kMeans/kMeans.hpp b/src/slowmokit/methods/cluster/kMeans/kMeans.hpp index 29db2b0..575582c 100644 --- a/src/slowmokit/methods/cluster/kMeans/kMeans.hpp +++ b/src/slowmokit/methods/cluster/kMeans/kMeans.hpp @@ -4,14 +4,13 @@ * The header file including the kMeans */ -#ifndef SLOWMOKIT_KMEANS_HPP_1 -#define SLOWMOKIT_KMEANS_HPP_1 +#ifndef SLOWMOKIT_KMEANS_HPP +#define SLOWMOKIT_KMEANS_HPP #include "../../../core.hpp" template class kMeans { - const int default_epoch = 40; const int k; const int epoch; std::vector clusters; @@ -85,17 +84,17 @@ template class kMeans public: kMeans(int k, int epoch) : k(k), epoch(epoch) { - if (k <= 1) + if (k <= 0) { - throw std::invalid_argument("k should be greater than 1"); + throw "k should be a positive integer."; } } - kMeans(int k) : kMeans(k, default_epoch) {} + kMeans(int k) : kMeans(k, 40) {} kMeans(int k, std::vector> initial_centroids, int epoch) - : kMeans(k, epoch) + : kMeans(k, 40) { this->centroids = initial_centroids; } @@ -105,12 +104,12 @@ template class kMeans std::vector predict(std::vector>); /** - * @Returns which cluster point-i belongs to + * Returns which cluster point-i belongs to */ std::vector labels() const; /** - * @Returns the final centroid for each cluster. + * Returns the final centroid for each cluster. */ std::vector> getCentroid() const; }; diff --git a/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp b/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp index 9149aad..21d7e68 100644 --- a/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp +++ b/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp @@ -4,8 +4,8 @@ * The header file including the linear regression algorithm */ -#ifndef SLOWMOKIT_LINEAR_REGRESSION_HPP_1 -#define SLOWMOKIT_LINEAR_REGRESSION_HPP_1 +#ifndef SLOWMOKIT_LINEAR_REGRESSION_HPP +#define SLOWMOKIT_LINEAR_REGRESSION_HPP #include "../../../core.hpp" diff --git a/src/slowmokit/methods/metrics/metrics.cpp b/src/slowmokit/methods/metrics/metrics.cpp new file mode 100644 index 0000000..9b43db0 --- /dev/null +++ b/src/slowmokit/methods/metrics/metrics.cpp @@ -0,0 +1,289 @@ +/** + * @file methods/metrics/metrics.hpp + * + * Easy include to execute all the functions of class metrics + */ + +#include "metrics.hpp" + + +template +double Metrics::accuracy(const std::vector &pred, + const std::vector &trueLabels) +{ + if (pred.size() != trueLabels.size()) + { + throw std ::invalid_argument("pred and true_labels must have same size"); + } + int correct = 0; + int total = pred.size(); + for (int i = 0; i < total; i++) + { + if (pred[i] == trueLabels[i]) + { + correct++; + } + } + return (double) correct / total; +} + +template +double Metrics::meanSquaredError(const std::vector &actual, + const std::vector &pred) +{ + if (actual.size() != pred.size()) + { + throw std::invalid_argument( + "Actual and Predicted vectors must have same size"); + } + + double sum = 0.0; + for (int i = 0; i < actual.size(); i++) + { + sum += (actual[i] - pred[i]) * (actual[i] - pred[i]); + } + return sum / actual.size(); +} + +template +std::map Metrics::precision(const std::vector &pred, + const std::vector &actual) +{ + if (pred.size() != actual.size()) + { + throw std::invalid_argument( + "Predicted and actual vectors must have same size"); + } + int n = actual.size(); + std::set s; + for (int i = 0; i < n; i++) + { + s.insert(actual[i]); + } + int numClasses = s.size(); + std::map precisionMap; + std::map truePosMap, falsePosMap; + + for (int i = 0; i < n; i++) + { + if (pred[i] == actual[i]) + { + truePosMap[actual[i]]++; + } + else + { + falsePosMap[pred[i]]++; + } + } + + for (int i = 0; i < numClasses; i++) + { + if (truePosMap[i] > 0 || falsePosMap[i] > 0) + { + precisionMap[i] = + (double) (truePosMap[i] / (double) (truePosMap[i] + falsePosMap[i])); + } + else + { + precisionMap[i] = 1.0; + } + + double x = precisionMap[i]; + float value = (int) (x * 100 + .5); + precisionMap[i] = (float) value / 100; + } + + return precisionMap; +} + + +template +std::map Metrics::recall(const std::vector &pred, + const std::vector &actual) +{ + if (pred.size() != actual.size()) + { + throw std::invalid_argument( + "Predicted and actual vectors must have same size"); + } + int n = actual.size(); + std::set s; + for (int i = 0; i < n; i++) + { + s.insert(actual[i]); + } + int numClasses = s.size(); + std::map recallMap; + std::map truePosMap, falseNegMap; + for (int i = 0; i < n; i++) + { + if (pred[i] == actual[i]) + { + truePosMap[actual[i]]++; + } + else + { + falseNegMap[actual[i]]++; + } + } + + for (int i = 0; i < numClasses; i++) + { + if (truePosMap[i] > 0 || falseNegMap[i] > 0) + { + recallMap[i] = + (double) (truePosMap[i] / (double) (truePosMap[i] + falseNegMap[i])); + } + else + { + recallMap[i] = 1.0; + } + + double x = recallMap[i]; + float value = (int) (x * 100 + .5); + recallMap[i] = (float) value / 100; + } + + + return recallMap; +} + +template +std::map Metrics::f1Score(const std::vector &pred, + const std::vector &actual) +{ + // 2 * Precision * Recall / (Precision + Recall) + std::map precisionMap, recallMap; + precisionMap = precision(pred, actual); + recallMap = recall(pred, actual); + std::map f1ScoreMap; + for (int i = 0; i < precisionMap.size(); i++) + { + T classNumber = i; + if (precisionMap[classNumber] == 0 || recallMap[classNumber] == 0) + { + f1ScoreMap[classNumber] = 0; + } + else + { + f1ScoreMap[classNumber] = (2 * (double) precisionMap[classNumber] * + (double) recallMap[classNumber]) / + ((double) precisionMap[classNumber] + + (double) recallMap[classNumber]); + + double x = f1ScoreMap[classNumber]; + float value = (int) (x * 100 + .5); + f1ScoreMap[classNumber] = (float) value / 100; + } + } + return f1ScoreMap; +} + +template +double Metrics::silhouetteScore(const std::vector> x, + const std::vector y, int numClusters, + std::string typeDist) +{ + if (x.size() != y.size()) + { + throw std::invalid_argument("Size of x and y values are not same"); + return -1; + } + + if (numClusters < 2 or x.size() - 1 < numClusters) + { + throw std::invalid_argument("Invalid arguments sizes of x or numClusters"); + return -1; + } + + std::transform(typeDist.begin(), typeDist.end(), typeDist.begin(), + [](unsigned char c) { return std::tolower(c); }); + + std::vector> distances(x.size(), + std::vector(x.size())); + for (int i = 0; i < x.size(); i++) + { // iterating over each x[i] + for (int j = 0; j < x.size(); j++) + { // iterating over each x[i] + if (i == j) + { + distances[i][j] = 0.0; // initialize distance=0 if computing + // distance between same values + } + else + { + if (typeDist == "euclidean") + { // euclidean distance + for (int k = 0; k < x[0].size(); k++) + { + distances[i][j] += + (x[i][k] - x[j][k]) * + (x[i][k] - x[j][k]); // x[i][k]-> i=point,k=1-d value of point + } + const double HALF = 0.5; + distances[i][j] = pow(distances[i][j], HALF); + } + else if (typeDist == "manhattan") + { + for (int k = 0; k < x[0].size(); k++) + { + distances[i][j] += abs(x[i][k] - x[j][k]); + } + } + } + } + } + + std::vector intraClusters( + x.size(), 0.0); // Computing intraclusters distances of each point + std::vector crossClusters( + x.size()); // Minimum Distance of each point to other clusters + for (int i = 0; i < x.size(); i++) + { + int sumNum = 0; + std::vector interClusters( + numClusters, + 0.0); // values of point to each cluster points + std::vector sumsOfParticular(numClusters, 0); + for (int j = 0; j < x.size(); j++) + { + if (y[j] == y[i]) + { + intraClusters[i] += distances[i][j]; // Sum of distance of point to each + // other point in same cluster + sumNum++; + } + else + { + interClusters[y[j]] += distances[i][j]; // Sum of distance of point to + // points in different clusters + sumsOfParticular[y[j]]++; // computes points in that cluster + } + } + intraClusters[i] /= sumNum; // Mean of sum values of distances b/w + // points of same cluster + double minimumOfall = std::numeric_limits::max(); + ; + for (int j = 0; j < numClusters; j++) + { + if (j != y[i]) + { + interClusters[j] /= sumsOfParticular[j]; // Mean of values of + // interclusters distances + if (interClusters[j] < minimumOfall) + { // computing minimum value of means of intercluster distances + minimumOfall = interClusters[j]; + } + } + } + crossClusters[i] = minimumOfall; + } + double si = 0.0; + for (int i = 0; i < x.size(); i++) + { + si += ((crossClusters[i] - intraClusters[i]) / + std::max(intraClusters[i], + crossClusters[i])); // s = b[i]-a[i] / max(b[i],a[i]) + } + return si / double(x.size()); +} \ No newline at end of file diff --git a/src/slowmokit/methods/metrics/metrics.hpp b/src/slowmokit/methods/metrics/metrics.hpp new file mode 100644 index 0000000..d1008de --- /dev/null +++ b/src/slowmokit/methods/metrics/metrics.hpp @@ -0,0 +1,99 @@ +/** + * @file methods/metrics/metrics.hpp + * + * Easy include to execute all the functions of class metrics + */ + +#ifndef SLOWMOKIT_METRICS_HPP +#define SLOWMOKIT_METRICS_HPP +#include "../../core.hpp" + +/** + * Takes predicted and actual values of classes + * @param predictedValue -> predicted values + * @param trueValue -> true values + * @returns the classification report + * @throws invalid_argument exception when size of the two vectors is not equal + */ + +template class Metrics +{ + public: + /** + * Takes predicted and actual values + * @param pred -> predicted values + * @param trueLabels -> true values + * @returns accuracy score + * @throws invalid_argument exception when size of the two vectors is not + * equal + */ + + static double accuracy(const std::vector &, const std::vector &); + + /** + * Takes predicted and actual values + * @param pred -> predicted values + * @param actual -> true values + * @returns mean squared error + * @throws exception invalid_argument in case size of the two vectors is not + * equal + */ + + static double meanSquaredError(const std::vector &, + const std::vector &); + + + /** + * Takes predicted and actual values + * @param pred -> predicted values + * @param actual -> actual values + * @throws exception invalid_argument in case size of the two vectors is not + * equal + * @returns map of precision values + */ + + + static std::map precision(const std::vector &, + const std::vector &); + + + /** + * Takes predicted and actual values + * @param pred -> predicted values + * @param actual -> actual values + * @throws exception invalid_argument in case size of the two vectors is not + * equal + * @returns map of recall values + */ + + static std::map recall(const std::vector &, + const std::vector &); + + + /** + * Takes predicted and actual values + * @param pred -> predicted values + * @param trueLabels -> true values + * @returns f1score score + * @throws exception invalid_argument in case size of the two vectors is not + * equal + */ + + + static std::map f1Score(const std::vector &, + const std::vector &); + + + /** + * @brief Calculates Silhouette Score + * @param vector> x values + * @param vector y values + * @param int number of clusters + * @param string distance type + * @return double Silhouette Score + */ + static double silhouetteScore(const std::vector>, + const std::vector, int, std::string); +}; + +#endif // SLOWMOKIT_METRICS_HPP diff --git a/src/slowmokit/methods/metrics/silhouette_score.cpp b/src/slowmokit/methods/metrics/silhouette_score.cpp index 17ed013..513bbce 100644 --- a/src/slowmokit/methods/metrics/silhouette_score.cpp +++ b/src/slowmokit/methods/metrics/silhouette_score.cpp @@ -32,8 +32,8 @@ double silhouetteScore(std::vector> x, std::vector y, { // iterating over each x[i] if (i == j) { - distances[i][j] = 0.0; // initialize distance=0 if computing distance - // between same values + distances[i][j] = 0.0; // initialize distance=0 if computing + // distance between same values } else { @@ -45,7 +45,8 @@ double silhouetteScore(std::vector> x, std::vector y, (x[i][k] - x[j][k]) * (x[i][k] - x[j][k]); // x[i][k]-> i=point,k=1-d value of point } - distances[i][j] = pow(distances[i][j], 0.5); + const double HALF = 0.5; + distances[i][j] = pow(distances[i][j], HALF); } else if (typeDist == "manhattan") { @@ -84,16 +85,16 @@ double silhouetteScore(std::vector> x, std::vector y, sumsOfParticular[y[j]]++; // computes points in that cluster } } - intraClusters[i] /= - sumNum; // Mean of sum values of distances b/w points of same cluster + intraClusters[i] /= sumNum; // Mean of sum values of distances b/w + // points of same cluster double minimumOfall = std::numeric_limits::max(); ; for (int j = 0; j < numClusters; j++) { if (j != y[i]) { - interClusters[j] /= - sumsOfParticular[j]; // Mean of values of interclusters distances + interClusters[j] /= sumsOfParticular[j]; // Mean of values of + // interclusters distances if (interClusters[j] < minimumOfall) { // computing minimum value of means of intercluster distances minimumOfall = interClusters[j]; diff --git a/src/slowmokit/methods/neighbors/knn/knn.hpp b/src/slowmokit/methods/neighbors/knn/knn.hpp index c8b2807..bbbc9a3 100644 --- a/src/slowmokit/methods/neighbors/knn/knn.hpp +++ b/src/slowmokit/methods/neighbors/knn/knn.hpp @@ -6,6 +6,8 @@ #ifndef SLOWMOKIT_KNN_HPP #define SLOWMOKIT_KNN_HPP +#include "../../../models/model.hpp" + template class KNN { private: