From 99f7d218713266045e74972ea46218c52f828e5f Mon Sep 17 00:00:00 2001 From: Yashita Bansal Date: Sat, 18 Feb 2023 13:08:47 +0530 Subject: [PATCH 1/6] Cleanup metrics directory #110 --- src/slowmokit/methods/metrics/metrics.cpp | 176 ++++++++++++++++++++++ src/slowmokit/methods/metrics/metrics.hpp | 85 +++++++++++ 2 files changed, 261 insertions(+) create mode 100644 src/slowmokit/methods/metrics/metrics.cpp create mode 100644 src/slowmokit/methods/metrics/metrics.hpp diff --git a/src/slowmokit/methods/metrics/metrics.cpp b/src/slowmokit/methods/metrics/metrics.cpp new file mode 100644 index 0000000..c4382f9 --- /dev/null +++ b/src/slowmokit/methods/metrics/metrics.cpp @@ -0,0 +1,176 @@ +/** + * @file methods/metrics/metrics.hpp + * + * Easy include to execute all the functions of class metrics + */ + +#include "metrics.hpp" + + +template +double Metrics::accuracy(const std::vector &pred, const std::vector &trueLabels) +{ + if (pred.size() != trueLabels.size()) + { + throw std ::invalid_argument("pred and true_labels must have same size"); + } + int correct = 0; + int total = pred.size(); + for (int i = 0; i < total; i++) + { + if (pred[i] == trueLabels[i]) + { + correct++; + } + } + return (double) correct / total; +} + +template +double Metrics::meanSquaredError(const std::vector &actual, const std::vector &pred) +{ + if (actual.size() != pred.size()) + { + throw std::invalid_argument( + "Actual and Predicted vectors must have same size"); + } + + double sum = 0.0; + for (int i = 0; i < actual.size(); i++) + { + sum += (actual[i] - pred[i]) * (actual[i] - pred[i]); + } + return sum / actual.size(); +} + +template +std::map Metrics::precision(const std::vector &pred, const std::vector &actual) +{ + if (pred.size() != actual.size()) + { + throw std::invalid_argument( + "Predicted and actual vectors must have same size"); + } + int n = actual.size(); + std::set s; + for (int i = 0; i < n; i++) + { + s.insert(actual[i]); + } + int numClasses = s.size(); + std::map precisionMap; + std::map truePosMap, falsePosMap; + + for (int i = 0; i < n; i++) + { + if (pred[i] == actual[i]) + { + truePosMap[actual[i]]++; + } + else + { + falsePosMap[pred[i]]++; + } + } + + for (int i = 0; i < numClasses; i++) + { + if (truePosMap[i] > 0 || falsePosMap[i] > 0) + { + precisionMap[i] = + (double) (truePosMap[i] / (double) (truePosMap[i] + falsePosMap[i])); + } + else + { + precisionMap[i] = 1.0; + } + + double x = precisionMap[i]; + float value = (int) (x * 100 + .5); + precisionMap[i] = (float) value / 100; + } + + return precisionMap; +} + + + +template +std::map Metrics::recall(const std::vector &pred, const std::vector &actual) +{ + if (pred.size() != actual.size()) + { + throw std::invalid_argument( + "Predicted and actual vectors must have same size"); + } + int n = actual.size(); + std::set s; + for (int i = 0; i < n; i++) + { + s.insert(actual[i]); + } + int numClasses = s.size(); + std::map recallMap; + std::map truePosMap, falseNegMap; + for (int i = 0; i < n; i++) + { + if (pred[i] == actual[i]) + { + truePosMap[actual[i]]++; + } + else + { + falseNegMap[actual[i]]++; + } + } + + for (int i = 0; i < numClasses; i++) + { + if (truePosMap[i] > 0 || falseNegMap[i] > 0) + { + recallMap[i] = + (double) (truePosMap[i] / (double) (truePosMap[i] + falseNegMap[i])); + } + else + { + recallMap[i] = 1.0; + } + + double x = recallMap[i]; + float value = (int) (x * 100 + .5); + recallMap[i] = (float) value / 100; + } + + + return recallMap; +} + +template +std::map Metrics::f1Score(const std::vector &pred, const std::vector &actual) +{ + // 2 * Precision * Recall / (Precision + Recall) + std::map precisionMap, recallMap; + precisionMap = precision(pred, actual); + recallMap = recall(pred, actual); + std::map f1ScoreMap; + for (int i = 0; i < precisionMap.size(); i++) + { + T classNumber = i; + if (precisionMap[classNumber] == 0 || recallMap[classNumber] == 0) + { + f1ScoreMap[classNumber] = 0; + } + else + { + f1ScoreMap[classNumber] = (2 * (double) precisionMap[classNumber] * + (double) recallMap[classNumber]) / + ((double) precisionMap[classNumber] + + (double) recallMap[classNumber]); + + double x = f1ScoreMap[classNumber]; + float value = (int) (x * 100 + .5); + f1ScoreMap[classNumber] = (float) value / 100; + } + } + return f1ScoreMap; +} \ No newline at end of file diff --git a/src/slowmokit/methods/metrics/metrics.hpp b/src/slowmokit/methods/metrics/metrics.hpp new file mode 100644 index 0000000..edd3809 --- /dev/null +++ b/src/slowmokit/methods/metrics/metrics.hpp @@ -0,0 +1,85 @@ +/** + * @file methods/metrics/metrics.hpp + * + * Easy include to execute all the functions of class metrics + */ + +#ifndef SLOWMOKIT_METRICS_HPP +#define SLOWMOKIT_METRICS_HPP +#include "../../core.hpp" + +/** + * Takes predicted and actual values of classes + * @param predictedValue -> predicted values + * @param trueValue -> true values + * @returns the classification report + * @throws invalid_argument exception when size of the two vectors is not equal + */ + +template class Metrics +{ + public: + /** + * Takes predicted and actual values + * @param pred -> predicted values + * @param trueLabels -> true values + * @returns accuracy score + * @throws invalid_argument exception when size of the two vectors is not equal + */ + + static double accuracy(const std::vector &, const std::vector &); + + /** + * Takes predicted and actual values + * @param pred -> predicted values + * @param actual -> true values + * @returns mean squared error + * @throws exception invalid_argument in case size of the two vectors is not + * equal + */ + + static double meanSquaredError(const std::vector &, + const std::vector &); + + + /** + * Takes predicted and actual values + * @param pred -> predicted values + * @param actual -> actual values + * @throws exception invalid_argument in case size of the two vectors is not + * equal + * @returns map of precision values + */ + + + static std::map precision(const std::vector &, + const std::vector &); + + + /** + * Takes predicted and actual values + * @param pred -> predicted values + * @param actual -> actual values + * @throws exception invalid_argument in case size of the two vectors is not + * equal + * @returns map of recall values + */ + + static std::map recall(const std::vector &,const std::vector &); + + + /** + * Takes predicted and actual values + * @param pred -> predicted values + * @param trueLabels -> true values + * @returns f1score score + * @throws exception invalid_argument in case size of the two vectors is not + * equal + */ + + + static std::map f1Score(const std::vector &, + const std::vector &); +}; + +#endif // SLOWMOKIT_METRICS_HPP From dbe5ac110ffa2a602c909b19b1cc6c7f36783f34 Mon Sep 17 00:00:00 2001 From: Yashita Bansal Date: Sat, 18 Feb 2023 13:18:29 +0530 Subject: [PATCH 2/6] Cleanup Metrics Directory#110 --- src/slowmokit/methods/metrics/metrics.cpp | 16 ++++++++++------ src/slowmokit/methods/metrics/metrics.hpp | 12 +++++++----- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/slowmokit/methods/metrics/metrics.cpp b/src/slowmokit/methods/metrics/metrics.cpp index c4382f9..b8c8472 100644 --- a/src/slowmokit/methods/metrics/metrics.cpp +++ b/src/slowmokit/methods/metrics/metrics.cpp @@ -8,7 +8,8 @@ template -double Metrics::accuracy(const std::vector &pred, const std::vector &trueLabels) +double Metrics::accuracy(const std::vector &pred, + const std::vector &trueLabels) { if (pred.size() != trueLabels.size()) { @@ -27,7 +28,8 @@ double Metrics::accuracy(const std::vector &pred, const std::vector &tr } template -double Metrics::meanSquaredError(const std::vector &actual, const std::vector &pred) +double Metrics::meanSquaredError(const std::vector &actual, + const std::vector &pred) { if (actual.size() != pred.size()) { @@ -44,7 +46,8 @@ double Metrics::meanSquaredError(const std::vector &actual, const std::vec } template -std::map Metrics::precision(const std::vector &pred, const std::vector &actual) +std::map Metrics::precision(const std::vector &pred, + const std::vector &actual) { if (pred.size() != actual.size()) { @@ -94,9 +97,9 @@ std::map Metrics::precision(const std::vector &pred, const std: } - template -std::map Metrics::recall(const std::vector &pred, const std::vector &actual) +std::map Metrics::recall(const std::vector &pred, + const std::vector &actual) { if (pred.size() != actual.size()) { @@ -146,7 +149,8 @@ std::map Metrics::recall(const std::vector &pred, const std::ve } template -std::map Metrics::f1Score(const std::vector &pred, const std::vector &actual) +std::map Metrics::f1Score(const std::vector &pred, + const std::vector &actual) { // 2 * Precision * Recall / (Precision + Recall) std::map precisionMap, recallMap; diff --git a/src/slowmokit/methods/metrics/metrics.hpp b/src/slowmokit/methods/metrics/metrics.hpp index edd3809..3730a29 100644 --- a/src/slowmokit/methods/metrics/metrics.hpp +++ b/src/slowmokit/methods/metrics/metrics.hpp @@ -24,7 +24,8 @@ template class Metrics * @param pred -> predicted values * @param trueLabels -> true values * @returns accuracy score - * @throws invalid_argument exception when size of the two vectors is not equal + * @throws invalid_argument exception when size of the two vectors is not + * equal */ static double accuracy(const std::vector &, const std::vector &); @@ -51,7 +52,7 @@ template class Metrics * @returns map of precision values */ - + static std::map precision(const std::vector &, const std::vector &); @@ -64,8 +65,9 @@ template class Metrics * equal * @returns map of recall values */ - - static std::map recall(const std::vector &,const std::vector &); + + static std::map recall(const std::vector &, + const std::vector &); /** @@ -77,7 +79,7 @@ template class Metrics * equal */ - + static std::map f1Score(const std::vector &, const std::vector &); }; From c9839b80d4a5d446826f1a65ef36f102ca524e0c Mon Sep 17 00:00:00 2001 From: Yashita Bansal Date: Sat, 18 Feb 2023 13:25:39 +0530 Subject: [PATCH 3/6] Metrics branch upto date with main --- .../linear_model/lasso_regularization.md | 50 ++++++++ .../linear_model/ridge_regularization.md | 50 ++++++++ docs/methods/metrics/silhouette_score.md | 33 ----- examples/{methods => }/cluster/kMeans.cpp | 3 +- examples/linear_model/lasso_regression.cpp | 22 ++++ examples/linear_model/linear_regression.cpp | 18 +++ .../linear_model/logistic_regression.cpp | 3 +- examples/linear_model/ridge_regression.cpp | 25 ++++ examples/matrix.cpp | 4 +- .../linear_model/linear_regression.cpp | 17 --- examples/methods/metrics/silhouette_score.cpp | 17 --- .../{methods => }/metrics/accuracy_eg.cpp | 2 +- .../metrics/classification_report_eg.cpp | 2 +- examples/{methods => }/metrics/f1score_eg.cpp | 0 .../metrics/mean_squared_error_eg.cpp | 2 +- .../{methods => }/metrics/precision_eg.cpp | 2 +- examples/{methods => }/metrics/recall_eg.cpp | 2 +- .../{methods => }/neighbors/bernoulli_nb.cpp | 3 +- .../{methods => }/neighbors/gaussian_nb.cpp | 3 +- examples/{methods => }/neighbors/knn.cpp | 3 +- .../preprocessing/label_encoder.cpp | 2 +- .../preprocessing/normalization_eg.cpp | 2 +- .../preprocessing/one_hot_encoder_eg.cpp | 2 +- .../preprocessing/standardization_eg.cpp | 2 +- src/slowmokit.cpp | 7 - src/slowmokit/CMakeLists.txt | 16 --- src/slowmokit/core.hpp | 3 + .../matrix/{matrix_main.cpp => matrix.cpp} | 4 +- src/slowmokit/ducks/matrix/matrix.hpp | 23 +++- src/slowmokit/ducks/matrix/matrix_free.cpp | 85 ------------- src/slowmokit/methods/cluster/kMeans.hpp | 6 +- .../methods/cluster/kMeans/kMeans.cpp | 1 - .../methods/cluster/kMeans/kMeans.hpp | 17 ++- .../linear_model/lasso_regularization.cpp | 120 ++++++++++++++++++ .../linear_model/lasso_regularization.hpp | 63 +++++++++ .../linear_regression/linear_regression.hpp | 4 +- .../linear_model/ridge_regularization.cpp | 102 +++++++++++++++ .../linear_model/ridge_regularization.hpp | 66 ++++++++++ src/slowmokit/methods/metrics/metrics.cpp | 16 +-- src/slowmokit/methods/metrics/metrics.hpp | 12 +- .../methods/metrics/silhouette_score.cpp | 113 ----------------- .../methods/metrics/silhouette_score.hpp | 24 ---- src/slowmokit/methods/neighbors/knn/knn.hpp | 2 + 43 files changed, 584 insertions(+), 369 deletions(-) create mode 100644 docs/methods/linear_model/lasso_regularization.md create mode 100644 docs/methods/linear_model/ridge_regularization.md delete mode 100644 docs/methods/metrics/silhouette_score.md rename examples/{methods => }/cluster/kMeans.cpp (94%) create mode 100644 examples/linear_model/lasso_regression.cpp create mode 100644 examples/linear_model/linear_regression.cpp rename examples/{methods => }/linear_model/logistic_regression.cpp (85%) create mode 100644 examples/linear_model/ridge_regression.cpp delete mode 100644 examples/methods/linear_model/linear_regression.cpp delete mode 100644 examples/methods/metrics/silhouette_score.cpp rename examples/{methods => }/metrics/accuracy_eg.cpp (79%) rename examples/{methods => }/metrics/classification_report_eg.cpp (78%) rename examples/{methods => }/metrics/f1score_eg.cpp (100%) rename examples/{methods => }/metrics/mean_squared_error_eg.cpp (78%) rename examples/{methods => }/metrics/precision_eg.cpp (86%) rename examples/{methods => }/metrics/recall_eg.cpp (86%) rename examples/{methods => }/neighbors/bernoulli_nb.cpp (81%) rename examples/{methods => }/neighbors/gaussian_nb.cpp (82%) rename examples/{methods => }/neighbors/knn.cpp (78%) rename examples/{methods => }/preprocessing/label_encoder.cpp (79%) rename examples/{methods => }/preprocessing/normalization_eg.cpp (73%) rename examples/{methods => }/preprocessing/one_hot_encoder_eg.cpp (84%) rename examples/{methods => }/preprocessing/standardization_eg.cpp (73%) delete mode 100644 src/slowmokit.cpp delete mode 100644 src/slowmokit/CMakeLists.txt rename src/slowmokit/ducks/matrix/{matrix_main.cpp => matrix.cpp} (98%) delete mode 100644 src/slowmokit/ducks/matrix/matrix_free.cpp create mode 100644 src/slowmokit/methods/linear_model/lasso_regularization.cpp create mode 100644 src/slowmokit/methods/linear_model/lasso_regularization.hpp create mode 100644 src/slowmokit/methods/linear_model/ridge_regularization.cpp create mode 100644 src/slowmokit/methods/linear_model/ridge_regularization.hpp delete mode 100644 src/slowmokit/methods/metrics/silhouette_score.cpp delete mode 100644 src/slowmokit/methods/metrics/silhouette_score.hpp diff --git a/docs/methods/linear_model/lasso_regularization.md b/docs/methods/linear_model/lasso_regularization.md new file mode 100644 index 0000000..72a19ad --- /dev/null +++ b/docs/methods/linear_model/lasso_regularization.md @@ -0,0 +1,50 @@ +# Lasso Regularization + +It stands for Least Absolute Shrinkage and Selection Operator. It adds L1 the penalty. + +L1 is the sum of the absolute value of the beta coefficients + +## Parameters + +| Name | Definition | Defaults | Type | +| ------------- | ------------------------------------------------------------------------------------------- | -------- | ---------| +| lambda | Constant that multiplies the L1 term, controlling regularization strength | 0.01 | `double` | + + +## Attributes + +| Name | Definition | Shape | +| ------------ | --------------------------------------------------------- | ---------- | +| Coefficients | Estimated coefficients for the lasso regularization | n_features | + +## Methods + +| Name | Definition | Return value | +| ------------------------------- | ----------------------------------------------------- | ----------------- | +| `lossFunction(vector> x, vector y)` |regularize loss function | `double` | +| `gradient(vector> x,vector y)` | Find gradient | `vector` | +| `gradientDescent(vector> x, vector y, double alpha)` | gradient optimization algorithm | `void` | +| `fit(vector> x, vector y,int epochs,double alpha)` | Fit linear model | `vector` | +| `predict(vector> x)` | Predict using the linear model | `vector` | +| `printCoefficients()` | Print coefficient of determination of the prediction | `void` | + +## Example + +```cpp +double alpha = 0.01; +std::vector> x = {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}, {4, 5, 6}}; +std::vector y = {2, 3, 4, 5}; +double lambda = 0.1; +int epochs = 1000; +RidgeRegularization model(alpha); +model.fit(x, y, epochs, alpha); +model.printCoefficients(); +std::vector yPred; +for (int i = 0; i < x.size(); i++) +{ + yPred.push_back(predict(x[i])); +} +for (int i = 0; i < y.size(); i++) + std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i]<< std::endl; + +``` diff --git a/docs/methods/linear_model/ridge_regularization.md b/docs/methods/linear_model/ridge_regularization.md new file mode 100644 index 0000000..eadd22f --- /dev/null +++ b/docs/methods/linear_model/ridge_regularization.md @@ -0,0 +1,50 @@ +# Ridge Regularization + +It adds L2 as the penalty. + +L2 is the sum of the square of the magnitude of beta coefficients. + +## Parameters + +| Name | Definition | Defaults | Type | +| ------------- | ------------------------------------------------------------------------------------------- | -------- | ---------| +| lambda | Constant that multiplies the L1 term, controlling regularization strength | 0.01 | `double` | + + +## Attributes + +| Name | Definition | Shape | +| ------------ | --------------------------------------------------------- | ---------- | +| Coefficients | Estimated coefficients for the ridge regularization | n_features | + +## Methods + +| Name | Definition | Return value | +| ------------------------------- | ----------------------------------------------------- | ----------------- | +| `lossFunction(vector x, double y)` |regularize loss function | `double` | +| `gradient(vector x,double y)` | Find gradient | `vector` | +| `gradientDescent(vector> x, vector y, double alpha,int epochs)` | gradient optimization algorithm | `void` | +| `fit(vector> x, vector y,int epochs,double alpha)` | Fit linear model | `vector` | +| `predict(vector x)` | Predict using the linear model | `double` | +| `printCoefficients()` | Print coefficient of determination of the prediction | `void` | + +## Example + +```cpp +double alpha = 0.01; +std::vector> x = {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}, {4, 5, 6}}; +std::vector y = {2, 3, 4, 5}; +double lambda = 0.1; +int epochs = 1000; +RidgeRegularization model(alpha); +model.fit(x, y, epochs, alpha); +model.printCoefficients(); +std::vector yPred; +for (int i = 0; i < x.size(); i++) +{ + yPred.push_back(predict(x[i])); +} +for (int i = 0; i < y.size(); i++) + std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] << std::endl; + +``` diff --git a/docs/methods/metrics/silhouette_score.md b/docs/methods/metrics/silhouette_score.md deleted file mode 100644 index 27d8b78..0000000 --- a/docs/methods/metrics/silhouette_score.md +++ /dev/null @@ -1,33 +0,0 @@ -# Silhouette Score - -The silhouette score is calculated as the average of all points si, where si is the difference of minimum of inter cluster distance to average of intra cluster distance divided by maximum of both. -The silhouette Score reflects how good the clusters are. - -## Parameters - -| Name | Definition | Type | -| ------------- | ------------------------------------------------------------------------------------------- | ----------------| -| numClusters | Takes a number of clusters | `int` | -| typeDist | You require euclidean/manhattan distance to compute silhoutte Score | `string` | -| x | Values of various points | `vector>` | -| y | Cluster number to which each x value belongs to | `vector` | - - -## Methods - -| Name | Definition | Return value | -| -----------------------------------------------| ----------------------------------------------------- | ----------------- | -| `silhouetteScore(vector>x,vector y,int numClusters,string typeDist)`|To find the silhoutte score| `double` | - -## Example - -```cpp - -std::vector> x{{1,2,3},{1.21,2.32,3.24},{5.56,5.45,5.23},{5.35,5.00,5.78}}; -std::vector y{0,0,1,1}; -int numClusters=2; -std::string s = "euclidean"; -double score = silhoutteScore(x,y,numClusters,s); -std::cout< int main() { diff --git a/examples/linear_model/lasso_regression.cpp b/examples/linear_model/lasso_regression.cpp new file mode 100644 index 0000000..5048129 --- /dev/null +++ b/examples/linear_model/lasso_regression.cpp @@ -0,0 +1,22 @@ +// #include "../src/slowmokit/methods/linear_model/lasso_regression.hpp" + +// int main() +// { +// std::vector> x = { +// {1.0, 2.0, 3.0}, {2.0, 3.0, 4.0}, {3.0, 4.0, 5.0}}; +// std::vector y = {1.0, 2.0, 3.0}; +// double alpha = 0.01; +// double lambda = 0.1; +// int epochs = 100; + +// LassoRegularization model(alpha); +// model.fit(x, y, epochs, alpha); +// model.printCoefficients(); + +// std::vector yPred = model.predict(x); +// for (int i = 0; i < y.size(); i++) +// std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] +// << std::endl; + +// return 0; +// } \ No newline at end of file diff --git a/examples/linear_model/linear_regression.cpp b/examples/linear_model/linear_regression.cpp new file mode 100644 index 0000000..95e8807 --- /dev/null +++ b/examples/linear_model/linear_regression.cpp @@ -0,0 +1,18 @@ +#include "../../src/slowmokit/methods/linear_model/linear_regression.hpp" +#include "../../src/slowmokit/core.hpp" + +int main() +{ + LinearRegression model; + std::vector> x = {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}; + std::vector y = {2, 3, 4, 5, 6}; + + model.fit(x, y); + model.printCoefficients(); + + std::vector yPred = model.predict(x); + for (int i = 0; i < y.size(); i++) + cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] << endl; + + return 0; +} diff --git a/examples/methods/linear_model/logistic_regression.cpp b/examples/linear_model/logistic_regression.cpp similarity index 85% rename from examples/methods/linear_model/logistic_regression.cpp rename to examples/linear_model/logistic_regression.cpp index ada7f4e..67792c5 100644 --- a/examples/methods/linear_model/logistic_regression.cpp +++ b/examples/linear_model/logistic_regression.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/linear_model/logistic_regression.hpp" +// #include "../../src/slowmokit/methods/linear_model/logistic_regression.hpp" +// #include "../../src/slowmokit/core.hpp" // int main() // { diff --git a/examples/linear_model/ridge_regression.cpp b/examples/linear_model/ridge_regression.cpp new file mode 100644 index 0000000..c4afe1e --- /dev/null +++ b/examples/linear_model/ridge_regression.cpp @@ -0,0 +1,25 @@ +// #include "../src/slowmokit/methods/linear_model/ridge_regression.hpp" + +// int main() +// { +// double alpha = 0.01; +// std::vector> x = { +// {1, 2, 3}, {2, 3, 4}, {3, 4, 5}, {4, 5, 6}}; +// std::vector y = {2, 3, 4, 5}; + +// double lambda = 0.1; +// int epochs = 1000; +// RidgeRegularization model(alpha); +// model.fit(x, y, epochs, alpha); +// model.printCoefficients(); +// std::vector yPred; +// for (int i = 0; i < x.size(); i++) +// { +// yPred.push_back(predict(x[i])); +// } +// for (int i = 0; i < y.size(); i++) +// std::cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] +// << std::endl; + +// return 0; +// } diff --git a/examples/matrix.cpp b/examples/matrix.cpp index 677311c..e5a3707 100644 --- a/examples/matrix.cpp +++ b/examples/matrix.cpp @@ -3,9 +3,7 @@ int main() { int n = 3, m = 3; - Matrix mat(2, 2); - // std::cout << mat << " 2"; return 0; -} \ No newline at end of file +} diff --git a/examples/methods/linear_model/linear_regression.cpp b/examples/methods/linear_model/linear_regression.cpp deleted file mode 100644 index dc58d05..0000000 --- a/examples/methods/linear_model/linear_regression.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// #include "../../../src/slowmokit/methods/linear_model/linear_regression.hpp" - -// int main() -// { -// LinearRegression model; -// std::vector> x = {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}; -// std::vector y = {2, 3, 4, 5, 6}; - -// model.fit(x, y); -// model.printCoefficients(); - -// std::vector yPred = model.predict(x); -// for (int i = 0; i < y.size(); i++) -// cout << "Actual value: " << y[i] << ", Predicted value: " << yPred[i] << endl; - -// return 0; -// } diff --git a/examples/methods/metrics/silhouette_score.cpp b/examples/methods/metrics/silhouette_score.cpp deleted file mode 100644 index 29120a8..0000000 --- a/examples/methods/metrics/silhouette_score.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// #include "../../src/slowmokit/methods/cluster/silhouette_score.hpp" -// #include "../../../core.hpp" - -// signed main(){ -// std::vector> x{ -// {1,2,3}, -// {1.21,2.32,3.24}, -// {5.56,5.45,5.23}, -// {5.35,5.00,5.78} -// }; -// std::vector y{0,0,1,1}; -// int numClusters=2; -// std::string s = "euclidean"; -// double score = silhouetteScore(x,y,numClusters,s); - // std::cout< pred = {1, 0, 1, 1, 0, 1}; diff --git a/examples/methods/metrics/classification_report_eg.cpp b/examples/metrics/classification_report_eg.cpp similarity index 78% rename from examples/methods/metrics/classification_report_eg.cpp rename to examples/metrics/classification_report_eg.cpp index 5b68a3c..605da7a 100644 --- a/examples/methods/metrics/classification_report_eg.cpp +++ b/examples/metrics/classification_report_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/classification_report.hpp" +// #include "../src/slowmokit/methods/metrics/classification_report.hpp" // int main() // { diff --git a/examples/methods/metrics/f1score_eg.cpp b/examples/metrics/f1score_eg.cpp similarity index 100% rename from examples/methods/metrics/f1score_eg.cpp rename to examples/metrics/f1score_eg.cpp diff --git a/examples/methods/metrics/mean_squared_error_eg.cpp b/examples/metrics/mean_squared_error_eg.cpp similarity index 78% rename from examples/methods/metrics/mean_squared_error_eg.cpp rename to examples/metrics/mean_squared_error_eg.cpp index a427daf..3fe8489 100644 --- a/examples/methods/metrics/mean_squared_error_eg.cpp +++ b/examples/metrics/mean_squared_error_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/mean_squared_error.hpp" +// #include "../src/slowmokit/methods/metrics/mean_squared_error.hpp" // int main() { // std::vector actual = {1.0, 2.0, 3.0}; // std::vector pred = {0.5, 1.5, 2.5}; diff --git a/examples/methods/metrics/precision_eg.cpp b/examples/metrics/precision_eg.cpp similarity index 86% rename from examples/methods/metrics/precision_eg.cpp rename to examples/metrics/precision_eg.cpp index 33b354e..f41f005 100644 --- a/examples/methods/metrics/precision_eg.cpp +++ b/examples/metrics/precision_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/precision.hpp" +// #include "../src/slowmokit/methods/metrics/precision.hpp" // int main() // { // std::vector pred = {0, 1, 2, 1, 0, 2, 1, 0, 1, 2}; diff --git a/examples/methods/metrics/recall_eg.cpp b/examples/metrics/recall_eg.cpp similarity index 86% rename from examples/methods/metrics/recall_eg.cpp rename to examples/metrics/recall_eg.cpp index ee0c911..cb196ac 100644 --- a/examples/methods/metrics/recall_eg.cpp +++ b/examples/metrics/recall_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/metrics/recall.hpp" +// #include "../src/slowmokit/methods/metrics/recall.hpp" // int main() // { diff --git a/examples/methods/neighbors/bernoulli_nb.cpp b/examples/neighbors/bernoulli_nb.cpp similarity index 81% rename from examples/methods/neighbors/bernoulli_nb.cpp rename to examples/neighbors/bernoulli_nb.cpp index bb17dfb..8d707d2 100644 --- a/examples/methods/neighbors/bernoulli_nb.cpp +++ b/examples/neighbors/bernoulli_nb.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/neighbors/bernoulli_nb.hpp" +// #include "../../src/slowmokit/methods/neighbors/bernoulli_nb.hpp" +// #include "../../src/slowmokit/core.hpp" // signed main(){ // std::vector> xTrain{ diff --git a/examples/methods/neighbors/gaussian_nb.cpp b/examples/neighbors/gaussian_nb.cpp similarity index 82% rename from examples/methods/neighbors/gaussian_nb.cpp rename to examples/neighbors/gaussian_nb.cpp index 79ed8a3..ead14c3 100644 --- a/examples/methods/neighbors/gaussian_nb.cpp +++ b/examples/neighbors/gaussian_nb.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/neighbors/gaussian_nb.hpp" +// #include "../../src/slowmokit/methods/neighbors/gaussian_nb.hpp" +// #include "../../src/slowmokit/core.hpp" // signed main(){ // std::vector> x_train{ diff --git a/examples/methods/neighbors/knn.cpp b/examples/neighbors/knn.cpp similarity index 78% rename from examples/methods/neighbors/knn.cpp rename to examples/neighbors/knn.cpp index 0d39c76..5eb1a00 100644 --- a/examples/methods/neighbors/knn.cpp +++ b/examples/neighbors/knn.cpp @@ -1,4 +1,5 @@ -// #include "../../../src/slowmokit/methods/neighbors/knn.hpp" +// #include "../../src/slowmokit/methods/neighbors/knn.hpp" +// #include "../../src/slowmokit/core.hpp" // signed main(){ // std::vector> x{ diff --git a/examples/methods/preprocessing/label_encoder.cpp b/examples/preprocessing/label_encoder.cpp similarity index 79% rename from examples/methods/preprocessing/label_encoder.cpp rename to examples/preprocessing/label_encoder.cpp index 5e8ce5f..0a54461 100644 --- a/examples/methods/preprocessing/label_encoder.cpp +++ b/examples/preprocessing/label_encoder.cpp @@ -1,4 +1,4 @@ -//#include "../../src/slowmokit/methods/preprocessing/label_encoder.hpp" +//#include "src/slowmokit/methods/preprocessing/label_encoder.hpp" //int main() { // std::vector data = {"luffy","zoro","sanji","luffy","law","zoro"}; diff --git a/examples/methods/preprocessing/normalization_eg.cpp b/examples/preprocessing/normalization_eg.cpp similarity index 73% rename from examples/methods/preprocessing/normalization_eg.cpp rename to examples/preprocessing/normalization_eg.cpp index 255ad2b..fb28f4e 100644 --- a/examples/methods/preprocessing/normalization_eg.cpp +++ b/examples/preprocessing/normalization_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/preprocessing/normalization.hpp" +// #include "../src/slowmokit/methods/preprocessing/normalization.hpp" // int main(){ // std::vector values={1,2,3,4,5}; // normalize(values); diff --git a/examples/methods/preprocessing/one_hot_encoder_eg.cpp b/examples/preprocessing/one_hot_encoder_eg.cpp similarity index 84% rename from examples/methods/preprocessing/one_hot_encoder_eg.cpp rename to examples/preprocessing/one_hot_encoder_eg.cpp index b11846d..9fe6529 100644 --- a/examples/methods/preprocessing/one_hot_encoder_eg.cpp +++ b/examples/preprocessing/one_hot_encoder_eg.cpp @@ -1,4 +1,4 @@ -//#include "../../src/slowmokit/methods/preprocessing/one_hot_encoder.hpp" +//#include "src/slowmokit/methods/preprocessing/one_hot_encoder.hpp" //int main() { // std::vector data = {"apples", "banana", "mango", "pear", "mango","apples","pear"}; diff --git a/examples/methods/preprocessing/standardization_eg.cpp b/examples/preprocessing/standardization_eg.cpp similarity index 73% rename from examples/methods/preprocessing/standardization_eg.cpp rename to examples/preprocessing/standardization_eg.cpp index a51a622..fb9c5fc 100644 --- a/examples/methods/preprocessing/standardization_eg.cpp +++ b/examples/preprocessing/standardization_eg.cpp @@ -1,4 +1,4 @@ -// #include "../../src/slowmokit/methods/preprocessing/standardization.hpp" +// #include "../src/slowmokit/methods/preprocessing/standardization.hpp" // int main(){ // std::vector values={1,2,3,4,5}; diff --git a/src/slowmokit.cpp b/src/slowmokit.cpp deleted file mode 100644 index 0d55b17..0000000 --- a/src/slowmokit.cpp +++ /dev/null @@ -1,7 +0,0 @@ -/** - * @file slowmokit.cpp - * - * File important to read the cpp files inside the src/ - * Just a necessary include, no edits should be made to this file - * - */ \ No newline at end of file diff --git a/src/slowmokit/CMakeLists.txt b/src/slowmokit/CMakeLists.txt deleted file mode 100644 index e57d108..0000000 --- a/src/slowmokit/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -# Recurse into methods/ to get the definitions of any bindings. -#add_subdirectory(methods) - -# At install time, we simply install the src/ directory to include/ -install(FILES - "${CMAKE_CURRENT_SOURCE_DIR}/../slowmokit.hpp" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") - -install(FILES - "${CMAKE_CURRENT_SOURCE_DIR}/base.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/core.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/prereqs.hpp" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/slowmokit/") - -install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/methods" DESTINATION - "${CMAKE_INSTALL_INCLUDEDIR}/slowmokit") \ No newline at end of file diff --git a/src/slowmokit/core.hpp b/src/slowmokit/core.hpp index c72eec4..0684181 100644 --- a/src/slowmokit/core.hpp +++ b/src/slowmokit/core.hpp @@ -10,4 +10,7 @@ // pre-requisites of the library #include "prereqs.hpp" +// standard model class +#include "models/model.hpp" + #endif // SLOWMOKIT_CORE_HPP diff --git a/src/slowmokit/ducks/matrix/matrix_main.cpp b/src/slowmokit/ducks/matrix/matrix.cpp similarity index 98% rename from src/slowmokit/ducks/matrix/matrix_main.cpp rename to src/slowmokit/ducks/matrix/matrix.cpp index bc71834..0e63bc2 100644 --- a/src/slowmokit/ducks/matrix/matrix_main.cpp +++ b/src/slowmokit/ducks/matrix/matrix.cpp @@ -1,7 +1,7 @@ /** - * @file ducks/matrix/matrix_main.cpp + * @file ducks/matrix/matrix.cpp * - * Implementation of the main methods of Matrix + * Implementation of the matrix main program */ #include "matrix.hpp" diff --git a/src/slowmokit/ducks/matrix/matrix.hpp b/src/slowmokit/ducks/matrix/matrix.hpp index d58f85d..6a6343c 100644 --- a/src/slowmokit/ducks/matrix/matrix.hpp +++ b/src/slowmokit/ducks/matrix/matrix.hpp @@ -91,6 +91,16 @@ template class Matrix std::array getShape() const; + /** + * @brief Function for taking dot product of current matrix with another + * matrix + * @param rhs: Take dot product of current Matrix with `rhs` + * @returns: A New Matrix after taking dot product with matrix `rhs` + * @throws: std::invalid_argument incase of incompatible dimensions + */ + Matrix &dot(const Matrix &); + + /** * @brief overloading += operator for adding another matrix to existing matrix * @param1: Matrix `rhs` which is to be added @@ -149,13 +159,14 @@ template class Matrix * @throw: whatever operator *= throws */ Matrix subtract(const Matrix); - - /** - * @brief overloading << for easy printing of Matrix - */ - friend std::ostream &operator<< (std::ostream &, const Matrix &); }; + +/** + * @brief overloading << for easy printing of Matrix + */ +template std::ostream &operator<<(std::ostream &, const Matrix &); + /** * @brief Free Function to multiply a matrix to a number or another matrix * @param lhs: A number or a Matrix @@ -187,4 +198,4 @@ template Matrix operator-(Matrix, const G &); template Matrix operator-(Matrix, const Matrix &); -#endif // SLOWMOKIT_IO_HPP \ No newline at end of file +#endif // SLOWMOKIT_IO_HPP diff --git a/src/slowmokit/ducks/matrix/matrix_free.cpp b/src/slowmokit/ducks/matrix/matrix_free.cpp deleted file mode 100644 index cfc57fe..0000000 --- a/src/slowmokit/ducks/matrix/matrix_free.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/** - * @file ducks/matrix/matrix_free.cpp - * - * Implementation of the free functions associated with matrix - */ - -#include "matrix.hpp" - -template Matrix operator*(Matrix lhs, const Matrix &rhs) -{ - lhs *= rhs; - return lhs; -} - -template Matrix operator+(Matrix lhs, const Matrix &rhs) -{ - lhs += rhs; - return lhs; -} - -template Matrix operator-(Matrix lhs, const Matrix &rhs) -{ - lhs -= rhs; - return lhs; -} - -template Matrix operator+(G num, const Matrix &matrix) -{ - Matrix res = matrix; - res += num; - return res; -} - -template Matrix operator-(G num, const Matrix &matrix) -{ - Matrix res = matrix; - res -= num; - return res; -} - -template Matrix operator*(G num, const Matrix &matrix) -{ - Matrix res = matrix; - res *= num; - return res; -} - -template Matrix operator+(Matrix matrix, const G &num) -{ - matrix += num; - return matrix; -} - -template Matrix operator-(Matrix matrix, const G &num) -{ - matrix -= num; - return matrix; -} - -template Matrix operator*(Matrix matrix, const G &num) -{ - matrix *= num; - return matrix; -} - -template Matrix Matrix::matmul(const Matrix rhs) -{ - Matrix res = *this; - res *= rhs; - return res; -} - -template Matrix Matrix::add(const Matrix rhs) -{ - Matrix res = *this; - res += rhs; - return res; -} - -template Matrix Matrix::subtract(const Matrix rhs) -{ - Matrix res = *this; - res -= rhs; - return res; -} \ No newline at end of file diff --git a/src/slowmokit/methods/cluster/kMeans.hpp b/src/slowmokit/methods/cluster/kMeans.hpp index 82cdd94..a2b006e 100644 --- a/src/slowmokit/methods/cluster/kMeans.hpp +++ b/src/slowmokit/methods/cluster/kMeans.hpp @@ -4,9 +4,9 @@ * Easy include for kMeans algorithm */ -#ifndef SLOWMOKIT_KMEANS_HPP_MAIN -#define SLOWMOKIT_KMEANS_HPP_MAIN +#ifndef SLOWMOKIT_KMEANS_HPP +#define SLOWMOKIT_KMEANS_HPP #include "kMeans/kMeans.hpp" -#endif // SLOWMOKIT_KMEANS_HPP_MAIN +#endif // SLOWMOKIT_KMEANS_HPP diff --git a/src/slowmokit/methods/cluster/kMeans/kMeans.cpp b/src/slowmokit/methods/cluster/kMeans/kMeans.cpp index 1418414..b637584 100644 --- a/src/slowmokit/methods/cluster/kMeans/kMeans.cpp +++ b/src/slowmokit/methods/cluster/kMeans/kMeans.cpp @@ -3,7 +3,6 @@ * * Implementation of the K-Means main program */ - #include "kMeans.hpp" template void kMeans::fit(std::vector> X) diff --git a/src/slowmokit/methods/cluster/kMeans/kMeans.hpp b/src/slowmokit/methods/cluster/kMeans/kMeans.hpp index 29db2b0..575582c 100644 --- a/src/slowmokit/methods/cluster/kMeans/kMeans.hpp +++ b/src/slowmokit/methods/cluster/kMeans/kMeans.hpp @@ -4,14 +4,13 @@ * The header file including the kMeans */ -#ifndef SLOWMOKIT_KMEANS_HPP_1 -#define SLOWMOKIT_KMEANS_HPP_1 +#ifndef SLOWMOKIT_KMEANS_HPP +#define SLOWMOKIT_KMEANS_HPP #include "../../../core.hpp" template class kMeans { - const int default_epoch = 40; const int k; const int epoch; std::vector clusters; @@ -85,17 +84,17 @@ template class kMeans public: kMeans(int k, int epoch) : k(k), epoch(epoch) { - if (k <= 1) + if (k <= 0) { - throw std::invalid_argument("k should be greater than 1"); + throw "k should be a positive integer."; } } - kMeans(int k) : kMeans(k, default_epoch) {} + kMeans(int k) : kMeans(k, 40) {} kMeans(int k, std::vector> initial_centroids, int epoch) - : kMeans(k, epoch) + : kMeans(k, 40) { this->centroids = initial_centroids; } @@ -105,12 +104,12 @@ template class kMeans std::vector predict(std::vector>); /** - * @Returns which cluster point-i belongs to + * Returns which cluster point-i belongs to */ std::vector labels() const; /** - * @Returns the final centroid for each cluster. + * Returns the final centroid for each cluster. */ std::vector> getCentroid() const; }; diff --git a/src/slowmokit/methods/linear_model/lasso_regularization.cpp b/src/slowmokit/methods/linear_model/lasso_regularization.cpp new file mode 100644 index 0000000..fa0d28c --- /dev/null +++ b/src/slowmokit/methods/linear_model/lasso_regularization.cpp @@ -0,0 +1,120 @@ +/** + * @file methods/linear_model/lasso_regularization.cpp + * + * Implementation of the Lasso Regularization main program + */ + +#include "lasso_regularization.hpp" + +template +LassoRegularization::LassoRegularization(double lambda) : lambda(lambda) +{ +} + +template +double LassoRegularization::lossFunction(std::vector> &x, + std::vector &y) +{ + int trainingSampleSize = x.size(); + double loss = 0.0; + for (int i = 0; i < trainingSampleSize; i++) + { + double yPred = 0.0; + for (int j = 0; j < coefficients.size(); j++) + { + yPred += coefficients[j] * coefficients[i][j]; + } + loss += pow(y[i] - yPred, 2); + } + loss /= 2 * trainingSampleSize; + double regularization = 0.0; + for (int i = 0; i < coefficients.size(); i++) + { + regularization += fabs(coefficients[i]); + } + regularization *= lambda; + return loss + regularization; +} +template +std::vector +LassoRegularization::gradient(std::vector> &x, + std::vector &y) +{ + int trainingSampleSize = x.size(); + int m = coefficients.size(); + std::vector grad(m); + for (int j = 0; j < m; j++) + { + double sum = 0.0; + for (int i = 0; i < trainingSampleSize; i++) + { + sum += (coefficients[j] * x[i][j] - y[i]) * x[i][j]; + } + grad[j] = sum / trainingSampleSize; + if (coefficients[j] > 0) + { + grad[j] += lambda; + } + else if (coefficients[j] < 0) + { + grad[j] -= lambda; + } + } + return grad; +} +template +void LassoRegularization::gradientDescent(std::vector> &x, + std::vector &y, double alpha) +{ + std::vector grad = gradient(x, y); + int m = coefficients.size(); + for (int j = 0; j < m; j++) + { + coefficients[j] -= alpha * grad[j]; + } +} +template +std::vector LassoRegularization::fit(std::vector> &x, + std::vector &y, int epochs, + double alpha) +{ + if (x.size() == 0 || y.size() == 0) + throw "Make sure that you have atleast one train example"; + if (x.size() != y.size()) + throw "Number of features and target must be equal"; + int trainExampleSize = x.size(); + int featureSize = x[0].size(); + if (featureSize == 0) + throw "Feature size should be at least 1"; + coefficients.clear(); + coefficients.resize(featureSize + 1); + int n = x.size(); + int m = x[0].size(); + std::vector coefficients(m, 0.0); + for (int i = 0; i < epochs; i++) + { + gradientDescent(x, y, alpha); + } + return coefficients; +} + +template +std::vector LassoRegularization::predict(std::vector> x) +{ + std::vector yPred; + int trainExampleSize = x.size(), featureSize = x[0].size(); + for (int example = 0; example < trainExampleSize; example++) + { + T currentY = coefficients[0]; + for (int feature = 0; feature < featureSize; feature++) + currentY += coefficients[feature + 1] * x[example][feature]; + yPred.push_back(currentY); + } + return yPred; +} + +template void LassoRegularization::printCoefficients() +{ + for (int i = 0; i < coefficients.size(); i++) + std::cout << "Θ" << i << ": " << coefficients[i] << std::endl; +} diff --git a/src/slowmokit/methods/linear_model/lasso_regularization.hpp b/src/slowmokit/methods/linear_model/lasso_regularization.hpp new file mode 100644 index 0000000..d630b03 --- /dev/null +++ b/src/slowmokit/methods/linear_model/lasso_regularization.hpp @@ -0,0 +1,63 @@ +/** + * @file methods/linear_model/lasso_regularization.hpp + * + * The header file including the lasso regularization model + */ + +#ifndef SLOWMOKIT_LASSO_REGULARIZATION_HPP +#define SLOWMOKIT_LASSO_REGULARIZATION_HPP + +#include "../../core.hpp" + +template class LassoRegularization +{ + private: + std::vector coefficients; + double lambda = 0.01; + + public: + LassoRegularization(double = 0.01); + + + /** + * @brief calculates the lasso regularization term + * @param x training x values + * @param y training output values + * @return regularized loss function + */ + double lossFunction(std::vector> &, std::vector &); + + + /** + * @brief calculates the gradient of the loss function + * @param x training x values + * @param y training output values + * @return std::vector + */ + std::vector gradient(std::vector> &, std::vector &); + + + /** + * @brief implements the gradient descent optimization algorithm to find the + * optimal coefficients for the lasso regression model + * @param x training x values + * @param y training output values + * @param alpha + */ + void gradientDescent(std::vector> &, std::vector &, double); + + /** + * @param x training x values + * @param y training output values + * @param epochs + * @returns std::vector + */ + std::vector fit(std::vector> &, std::vector &, int, + double); + + std::vector predict(std::vector>); + + void printCoefficients(); +}; + +#endif // SLOWMOKIT_LASSO_REGULARIZATION_HPP diff --git a/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp b/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp index 9149aad..21d7e68 100644 --- a/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp +++ b/src/slowmokit/methods/linear_model/linear_regression/linear_regression.hpp @@ -4,8 +4,8 @@ * The header file including the linear regression algorithm */ -#ifndef SLOWMOKIT_LINEAR_REGRESSION_HPP_1 -#define SLOWMOKIT_LINEAR_REGRESSION_HPP_1 +#ifndef SLOWMOKIT_LINEAR_REGRESSION_HPP +#define SLOWMOKIT_LINEAR_REGRESSION_HPP #include "../../../core.hpp" diff --git a/src/slowmokit/methods/linear_model/ridge_regularization.cpp b/src/slowmokit/methods/linear_model/ridge_regularization.cpp new file mode 100644 index 0000000..36155df --- /dev/null +++ b/src/slowmokit/methods/linear_model/ridge_regularization.cpp @@ -0,0 +1,102 @@ +/** + * @file methods/linear_model/ridge_regularization.cpp + * + * Implementation of the Ridge Regularization main program + */ + +#include "ridge_regularization.hpp" + +template +RidgeRegularization::RidgeRegularization(double lambda) : lambda(lambda) +{ +} + +template +double RidgeRegularization::lossFunction(std::vector &x, double y) +{ + double yPred = predict(x, coefficients); + double error = yPred - y; + double loss = error * error; + for (int i = 1; i < coefficients.size(); ++i) + { + loss += lambda * coefficients[i] * coefficients[i]; + } + return loss; +} +template +std::vector RidgeRegularization::gradient(std::vector &x, + double y) +{ + std::vector gradient(coefficients.size()); + double yPred = predict(x); + double error = yPred - y; + gradient[0] = 2 * error; + for (int i = 0; i < x.size(); ++i) + { + gradient[i + 1] = 2 * error * x[i] + 2 * lambda * coefficients[i + 1]; + } + return gradient; +} +template +void RidgeRegularization::gradientDescent(std::vector> &x, + std::vector &y, double alpha, + int epochs) +{ + int m = y.size(); + for (int epoch = 0; epoch < epochs; ++epoch) + { + std::vector grad(coefficients.size()); + for (int i = 0; i < m; ++i) + { + std::vector exampleGrad = gradient(x[i], y[i]); + for (int j = 0; j < coefficients.size(); ++j) + { + grad[j] += exampleGrad[j]; + } + } + for (int j = 0; j < coefficients.size(); ++j) + { + coefficients[j] = coefficients[j] - alpha * grad[j] / m; + } + } +} +template +std::vector RidgeRegularization::fit(std::vector> &x, + std::vector &y, int epochs, + double alpha) +{ + if (x.size() == 0 || y.size() == 0) + throw "Make sure that you have atleast one train example"; + if (x.size() != y.size()) + throw "Number of features and target must be equal"; + int trainExampleSize = x.size(); + int featureSize = x[0].size(); + if (featureSize == 0) + throw "Feature size should be at least 1"; + coefficients.clear(); + coefficients.resize(featureSize + 1); + int n = x.size(); + int m = x[0].size(); + std::vector coefficients(m, 0.0); + for (int i = 0; i < epochs; i++) + { + gradientDescent(x, y, alpha, epochs); + } + return coefficients; +} + +template double RidgeRegularization::predict(std::vector &x) +{ + double yPred = coefficients[0]; + for (int i = 0; i < x.size(); ++i) + { + yPred += coefficients[i + 1] * x[i]; + } + return yPred; +} + +template void RidgeRegularization::printCoefficients() +{ + for (int i = 0; i < coefficients.size(); i++) + std::cout << "Θ" << i << ": " << coefficients[i] << std::endl; +} diff --git a/src/slowmokit/methods/linear_model/ridge_regularization.hpp b/src/slowmokit/methods/linear_model/ridge_regularization.hpp new file mode 100644 index 0000000..0e1da6c --- /dev/null +++ b/src/slowmokit/methods/linear_model/ridge_regularization.hpp @@ -0,0 +1,66 @@ +/** + * @file methods/linear_model/ridge_regularization.hpp + * + * The header file including the ridge regularization model + */ + +#ifndef SLOWMOKIT_RIDGE_REGULARIZATION_HPP +#define SLOWMOKIT_RIDGE_REGULARIZATION_HPP + +#include "../../core.hpp" + +template class RidgeRegularization +{ + private: + std::vector coefficients; + double lambda = 0.01; + + public: + RidgeRegularization(double = 0.01); + + + /** + * @brief calculates the ridge regularization term + * @param x training x values for a single feature + * @param y training output value + * @return regularized loss function + */ + double lossFunction(std::vector &, double); + + + /** + * @brief calculates the gradient of the loss function + * @param x training x values for a single feature + * @param y training output value + * @return std::vector + */ + std::vector gradient(std::vector &, double); + + + /** + * @brief implements the gradient descent optimization algorithm to find the + * optimal coefficients for the ridge regression model + * @param x training x values + * @param y training output values + * @param alpha + * @param epochs + */ + void gradientDescent(std::vector> &, std::vector &, double, + int); + + /** + * @param x training x values + * @param y training output values + * @param epochs + * @param alpha + * @returns std::vector + */ + std::vector fit(std::vector> &, std::vector &, int, + double); + + double predict(std::vector &); + + void printCoefficients(); +}; + +#endif // SLOWMOKIT_RIDGE_REGULARIZATION_HPP diff --git a/src/slowmokit/methods/metrics/metrics.cpp b/src/slowmokit/methods/metrics/metrics.cpp index b8c8472..c4382f9 100644 --- a/src/slowmokit/methods/metrics/metrics.cpp +++ b/src/slowmokit/methods/metrics/metrics.cpp @@ -8,8 +8,7 @@ template -double Metrics::accuracy(const std::vector &pred, - const std::vector &trueLabels) +double Metrics::accuracy(const std::vector &pred, const std::vector &trueLabels) { if (pred.size() != trueLabels.size()) { @@ -28,8 +27,7 @@ double Metrics::accuracy(const std::vector &pred, } template -double Metrics::meanSquaredError(const std::vector &actual, - const std::vector &pred) +double Metrics::meanSquaredError(const std::vector &actual, const std::vector &pred) { if (actual.size() != pred.size()) { @@ -46,8 +44,7 @@ double Metrics::meanSquaredError(const std::vector &actual, } template -std::map Metrics::precision(const std::vector &pred, - const std::vector &actual) +std::map Metrics::precision(const std::vector &pred, const std::vector &actual) { if (pred.size() != actual.size()) { @@ -97,9 +94,9 @@ std::map Metrics::precision(const std::vector &pred, } + template -std::map Metrics::recall(const std::vector &pred, - const std::vector &actual) +std::map Metrics::recall(const std::vector &pred, const std::vector &actual) { if (pred.size() != actual.size()) { @@ -149,8 +146,7 @@ std::map Metrics::recall(const std::vector &pred, } template -std::map Metrics::f1Score(const std::vector &pred, - const std::vector &actual) +std::map Metrics::f1Score(const std::vector &pred, const std::vector &actual) { // 2 * Precision * Recall / (Precision + Recall) std::map precisionMap, recallMap; diff --git a/src/slowmokit/methods/metrics/metrics.hpp b/src/slowmokit/methods/metrics/metrics.hpp index 3730a29..edd3809 100644 --- a/src/slowmokit/methods/metrics/metrics.hpp +++ b/src/slowmokit/methods/metrics/metrics.hpp @@ -24,8 +24,7 @@ template class Metrics * @param pred -> predicted values * @param trueLabels -> true values * @returns accuracy score - * @throws invalid_argument exception when size of the two vectors is not - * equal + * @throws invalid_argument exception when size of the two vectors is not equal */ static double accuracy(const std::vector &, const std::vector &); @@ -52,7 +51,7 @@ template class Metrics * @returns map of precision values */ - + static std::map precision(const std::vector &, const std::vector &); @@ -65,9 +64,8 @@ template class Metrics * equal * @returns map of recall values */ - - static std::map recall(const std::vector &, - const std::vector &); + + static std::map recall(const std::vector &,const std::vector &); /** @@ -79,7 +77,7 @@ template class Metrics * equal */ - + static std::map f1Score(const std::vector &, const std::vector &); }; diff --git a/src/slowmokit/methods/metrics/silhouette_score.cpp b/src/slowmokit/methods/metrics/silhouette_score.cpp deleted file mode 100644 index 17ed013..0000000 --- a/src/slowmokit/methods/metrics/silhouette_score.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/** - * @file methods/metrics/silhouette_score.cpp - * - * Implementation of the silhouette Score main program - */ - -#include "silhouette_score.hpp" -template -double silhouetteScore(std::vector> x, std::vector y, - int numClusters, std::string typeDist) -{ - if (x.size() != y.size()) - { - throw std::invalid_argument("Size of x and y values are not same"); - return -1; - } - - if (numClusters < 2 or x.size() - 1 < numClusters) - { - throw std::invalid_argument("Invalid arguments sizes of x or numClusters"); - return -1; - } - - std::transform(typeDist.begin(), typeDist.end(), typeDist.begin(), - [](unsigned char c) { return std::tolower(c); }); - - std::vector> distances(x.size(), - std::vector(x.size())); - for (int i = 0; i < x.size(); i++) - { // iterating over each x[i] - for (int j = 0; j < x.size(); j++) - { // iterating over each x[i] - if (i == j) - { - distances[i][j] = 0.0; // initialize distance=0 if computing distance - // between same values - } - else - { - if (typeDist == "euclidean") - { // euclidean distance - for (int k = 0; k < x[0].size(); k++) - { - distances[i][j] += - (x[i][k] - x[j][k]) * - (x[i][k] - x[j][k]); // x[i][k]-> i=point,k=1-d value of point - } - distances[i][j] = pow(distances[i][j], 0.5); - } - else if (typeDist == "manhattan") - { - for (int k = 0; k < x[0].size(); k++) - { - distances[i][j] += abs(x[i][k] - x[j][k]); - } - } - } - } - } - - std::vector intraClusters( - x.size(), 0.0); // Computing intraclusters distances of each point - std::vector crossClusters( - x.size()); // Minimum Distance of each point to other clusters - for (int i = 0; i < x.size(); i++) - { - int sumNum = 0; - std::vector interClusters( - numClusters, - 0.0); // values of point to each cluster points - std::vector sumsOfParticular(numClusters, 0); - for (int j = 0; j < x.size(); j++) - { - if (y[j] == y[i]) - { - intraClusters[i] += distances[i][j]; // Sum of distance of point to each - // other point in same cluster - sumNum++; - } - else - { - interClusters[y[j]] += distances[i][j]; // Sum of distance of point to - // points in different clusters - sumsOfParticular[y[j]]++; // computes points in that cluster - } - } - intraClusters[i] /= - sumNum; // Mean of sum values of distances b/w points of same cluster - double minimumOfall = std::numeric_limits::max(); - ; - for (int j = 0; j < numClusters; j++) - { - if (j != y[i]) - { - interClusters[j] /= - sumsOfParticular[j]; // Mean of values of interclusters distances - if (interClusters[j] < minimumOfall) - { // computing minimum value of means of intercluster distances - minimumOfall = interClusters[j]; - } - } - } - crossClusters[i] = minimumOfall; - } - double si = 0.0; - for (int i = 0; i < x.size(); i++) - { - si += ((crossClusters[i] - intraClusters[i]) / - std::max(intraClusters[i], - crossClusters[i])); // s = b[i]-a[i] / max(b[i],a[i]) - } - return si / double(x.size()); -}; \ No newline at end of file diff --git a/src/slowmokit/methods/metrics/silhouette_score.hpp b/src/slowmokit/methods/metrics/silhouette_score.hpp deleted file mode 100644 index 94fa3d6..0000000 --- a/src/slowmokit/methods/metrics/silhouette_score.hpp +++ /dev/null @@ -1,24 +0,0 @@ -/** - * @file methods/metrics/silhoutte_score.hpp - * - * Easy include to find Silhoutte score - */ - -#ifndef SLOWMOKIT_SILHOUETTE_SCORE_HPP -#define SLOWMOKIT_SILHOUETTE_SCORE_HPP -#include "../../core.hpp" - -template -/** - * @brief Calculates Silhouette Score - * - * @param vector> x values - * @param vector y values - * @param int number of clusters - * @param string distance type - * @return double Silhouette Score - */ -double silhouetteScore(std::vector>, std::vector, int, - std::string); - -#endif // SLOWMOKIT_SILHOUETTE_SCORE_HPP \ No newline at end of file diff --git a/src/slowmokit/methods/neighbors/knn/knn.hpp b/src/slowmokit/methods/neighbors/knn/knn.hpp index c8b2807..bbbc9a3 100644 --- a/src/slowmokit/methods/neighbors/knn/knn.hpp +++ b/src/slowmokit/methods/neighbors/knn/knn.hpp @@ -6,6 +6,8 @@ #ifndef SLOWMOKIT_KNN_HPP #define SLOWMOKIT_KNN_HPP +#include "../../../models/model.hpp" + template class KNN { private: From 86469d65d4e114d4600479a9ec5592cf610d2b33 Mon Sep 17 00:00:00 2001 From: Yashita Bansal Date: Sat, 18 Feb 2023 13:32:07 +0530 Subject: [PATCH 4/6] Formatted --- src/slowmokit/methods/metrics/metrics.cpp | 16 ++++++++++------ src/slowmokit/methods/metrics/metrics.hpp | 12 +++++++----- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/slowmokit/methods/metrics/metrics.cpp b/src/slowmokit/methods/metrics/metrics.cpp index c4382f9..b8c8472 100644 --- a/src/slowmokit/methods/metrics/metrics.cpp +++ b/src/slowmokit/methods/metrics/metrics.cpp @@ -8,7 +8,8 @@ template -double Metrics::accuracy(const std::vector &pred, const std::vector &trueLabels) +double Metrics::accuracy(const std::vector &pred, + const std::vector &trueLabels) { if (pred.size() != trueLabels.size()) { @@ -27,7 +28,8 @@ double Metrics::accuracy(const std::vector &pred, const std::vector &tr } template -double Metrics::meanSquaredError(const std::vector &actual, const std::vector &pred) +double Metrics::meanSquaredError(const std::vector &actual, + const std::vector &pred) { if (actual.size() != pred.size()) { @@ -44,7 +46,8 @@ double Metrics::meanSquaredError(const std::vector &actual, const std::vec } template -std::map Metrics::precision(const std::vector &pred, const std::vector &actual) +std::map Metrics::precision(const std::vector &pred, + const std::vector &actual) { if (pred.size() != actual.size()) { @@ -94,9 +97,9 @@ std::map Metrics::precision(const std::vector &pred, const std: } - template -std::map Metrics::recall(const std::vector &pred, const std::vector &actual) +std::map Metrics::recall(const std::vector &pred, + const std::vector &actual) { if (pred.size() != actual.size()) { @@ -146,7 +149,8 @@ std::map Metrics::recall(const std::vector &pred, const std::ve } template -std::map Metrics::f1Score(const std::vector &pred, const std::vector &actual) +std::map Metrics::f1Score(const std::vector &pred, + const std::vector &actual) { // 2 * Precision * Recall / (Precision + Recall) std::map precisionMap, recallMap; diff --git a/src/slowmokit/methods/metrics/metrics.hpp b/src/slowmokit/methods/metrics/metrics.hpp index edd3809..3730a29 100644 --- a/src/slowmokit/methods/metrics/metrics.hpp +++ b/src/slowmokit/methods/metrics/metrics.hpp @@ -24,7 +24,8 @@ template class Metrics * @param pred -> predicted values * @param trueLabels -> true values * @returns accuracy score - * @throws invalid_argument exception when size of the two vectors is not equal + * @throws invalid_argument exception when size of the two vectors is not + * equal */ static double accuracy(const std::vector &, const std::vector &); @@ -51,7 +52,7 @@ template class Metrics * @returns map of precision values */ - + static std::map precision(const std::vector &, const std::vector &); @@ -64,8 +65,9 @@ template class Metrics * equal * @returns map of recall values */ - - static std::map recall(const std::vector &,const std::vector &); + + static std::map recall(const std::vector &, + const std::vector &); /** @@ -77,7 +79,7 @@ template class Metrics * equal */ - + static std::map f1Score(const std::vector &, const std::vector &); }; From 1740bf856902ebe9d33113442682023f3e81b004 Mon Sep 17 00:00:00 2001 From: Yashita Bansal Date: Sat, 18 Feb 2023 13:46:23 +0530 Subject: [PATCH 5/6] Removed regularisation to separate out PRs --- .../linear_model/lasso_regularization.cpp | 120 ------------------ .../linear_model/lasso_regularization.hpp | 63 --------- .../linear_model/ridge_regularization.cpp | 102 --------------- .../linear_model/ridge_regularization.hpp | 66 ---------- 4 files changed, 351 deletions(-) delete mode 100644 src/slowmokit/methods/linear_model/lasso_regularization.cpp delete mode 100644 src/slowmokit/methods/linear_model/lasso_regularization.hpp delete mode 100644 src/slowmokit/methods/linear_model/ridge_regularization.cpp delete mode 100644 src/slowmokit/methods/linear_model/ridge_regularization.hpp diff --git a/src/slowmokit/methods/linear_model/lasso_regularization.cpp b/src/slowmokit/methods/linear_model/lasso_regularization.cpp deleted file mode 100644 index fa0d28c..0000000 --- a/src/slowmokit/methods/linear_model/lasso_regularization.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/** - * @file methods/linear_model/lasso_regularization.cpp - * - * Implementation of the Lasso Regularization main program - */ - -#include "lasso_regularization.hpp" - -template -LassoRegularization::LassoRegularization(double lambda) : lambda(lambda) -{ -} - -template -double LassoRegularization::lossFunction(std::vector> &x, - std::vector &y) -{ - int trainingSampleSize = x.size(); - double loss = 0.0; - for (int i = 0; i < trainingSampleSize; i++) - { - double yPred = 0.0; - for (int j = 0; j < coefficients.size(); j++) - { - yPred += coefficients[j] * coefficients[i][j]; - } - loss += pow(y[i] - yPred, 2); - } - loss /= 2 * trainingSampleSize; - double regularization = 0.0; - for (int i = 0; i < coefficients.size(); i++) - { - regularization += fabs(coefficients[i]); - } - regularization *= lambda; - return loss + regularization; -} -template -std::vector -LassoRegularization::gradient(std::vector> &x, - std::vector &y) -{ - int trainingSampleSize = x.size(); - int m = coefficients.size(); - std::vector grad(m); - for (int j = 0; j < m; j++) - { - double sum = 0.0; - for (int i = 0; i < trainingSampleSize; i++) - { - sum += (coefficients[j] * x[i][j] - y[i]) * x[i][j]; - } - grad[j] = sum / trainingSampleSize; - if (coefficients[j] > 0) - { - grad[j] += lambda; - } - else if (coefficients[j] < 0) - { - grad[j] -= lambda; - } - } - return grad; -} -template -void LassoRegularization::gradientDescent(std::vector> &x, - std::vector &y, double alpha) -{ - std::vector grad = gradient(x, y); - int m = coefficients.size(); - for (int j = 0; j < m; j++) - { - coefficients[j] -= alpha * grad[j]; - } -} -template -std::vector LassoRegularization::fit(std::vector> &x, - std::vector &y, int epochs, - double alpha) -{ - if (x.size() == 0 || y.size() == 0) - throw "Make sure that you have atleast one train example"; - if (x.size() != y.size()) - throw "Number of features and target must be equal"; - int trainExampleSize = x.size(); - int featureSize = x[0].size(); - if (featureSize == 0) - throw "Feature size should be at least 1"; - coefficients.clear(); - coefficients.resize(featureSize + 1); - int n = x.size(); - int m = x[0].size(); - std::vector coefficients(m, 0.0); - for (int i = 0; i < epochs; i++) - { - gradientDescent(x, y, alpha); - } - return coefficients; -} - -template -std::vector LassoRegularization::predict(std::vector> x) -{ - std::vector yPred; - int trainExampleSize = x.size(), featureSize = x[0].size(); - for (int example = 0; example < trainExampleSize; example++) - { - T currentY = coefficients[0]; - for (int feature = 0; feature < featureSize; feature++) - currentY += coefficients[feature + 1] * x[example][feature]; - yPred.push_back(currentY); - } - return yPred; -} - -template void LassoRegularization::printCoefficients() -{ - for (int i = 0; i < coefficients.size(); i++) - std::cout << "Θ" << i << ": " << coefficients[i] << std::endl; -} diff --git a/src/slowmokit/methods/linear_model/lasso_regularization.hpp b/src/slowmokit/methods/linear_model/lasso_regularization.hpp deleted file mode 100644 index d630b03..0000000 --- a/src/slowmokit/methods/linear_model/lasso_regularization.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/** - * @file methods/linear_model/lasso_regularization.hpp - * - * The header file including the lasso regularization model - */ - -#ifndef SLOWMOKIT_LASSO_REGULARIZATION_HPP -#define SLOWMOKIT_LASSO_REGULARIZATION_HPP - -#include "../../core.hpp" - -template class LassoRegularization -{ - private: - std::vector coefficients; - double lambda = 0.01; - - public: - LassoRegularization(double = 0.01); - - - /** - * @brief calculates the lasso regularization term - * @param x training x values - * @param y training output values - * @return regularized loss function - */ - double lossFunction(std::vector> &, std::vector &); - - - /** - * @brief calculates the gradient of the loss function - * @param x training x values - * @param y training output values - * @return std::vector - */ - std::vector gradient(std::vector> &, std::vector &); - - - /** - * @brief implements the gradient descent optimization algorithm to find the - * optimal coefficients for the lasso regression model - * @param x training x values - * @param y training output values - * @param alpha - */ - void gradientDescent(std::vector> &, std::vector &, double); - - /** - * @param x training x values - * @param y training output values - * @param epochs - * @returns std::vector - */ - std::vector fit(std::vector> &, std::vector &, int, - double); - - std::vector predict(std::vector>); - - void printCoefficients(); -}; - -#endif // SLOWMOKIT_LASSO_REGULARIZATION_HPP diff --git a/src/slowmokit/methods/linear_model/ridge_regularization.cpp b/src/slowmokit/methods/linear_model/ridge_regularization.cpp deleted file mode 100644 index 36155df..0000000 --- a/src/slowmokit/methods/linear_model/ridge_regularization.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/** - * @file methods/linear_model/ridge_regularization.cpp - * - * Implementation of the Ridge Regularization main program - */ - -#include "ridge_regularization.hpp" - -template -RidgeRegularization::RidgeRegularization(double lambda) : lambda(lambda) -{ -} - -template -double RidgeRegularization::lossFunction(std::vector &x, double y) -{ - double yPred = predict(x, coefficients); - double error = yPred - y; - double loss = error * error; - for (int i = 1; i < coefficients.size(); ++i) - { - loss += lambda * coefficients[i] * coefficients[i]; - } - return loss; -} -template -std::vector RidgeRegularization::gradient(std::vector &x, - double y) -{ - std::vector gradient(coefficients.size()); - double yPred = predict(x); - double error = yPred - y; - gradient[0] = 2 * error; - for (int i = 0; i < x.size(); ++i) - { - gradient[i + 1] = 2 * error * x[i] + 2 * lambda * coefficients[i + 1]; - } - return gradient; -} -template -void RidgeRegularization::gradientDescent(std::vector> &x, - std::vector &y, double alpha, - int epochs) -{ - int m = y.size(); - for (int epoch = 0; epoch < epochs; ++epoch) - { - std::vector grad(coefficients.size()); - for (int i = 0; i < m; ++i) - { - std::vector exampleGrad = gradient(x[i], y[i]); - for (int j = 0; j < coefficients.size(); ++j) - { - grad[j] += exampleGrad[j]; - } - } - for (int j = 0; j < coefficients.size(); ++j) - { - coefficients[j] = coefficients[j] - alpha * grad[j] / m; - } - } -} -template -std::vector RidgeRegularization::fit(std::vector> &x, - std::vector &y, int epochs, - double alpha) -{ - if (x.size() == 0 || y.size() == 0) - throw "Make sure that you have atleast one train example"; - if (x.size() != y.size()) - throw "Number of features and target must be equal"; - int trainExampleSize = x.size(); - int featureSize = x[0].size(); - if (featureSize == 0) - throw "Feature size should be at least 1"; - coefficients.clear(); - coefficients.resize(featureSize + 1); - int n = x.size(); - int m = x[0].size(); - std::vector coefficients(m, 0.0); - for (int i = 0; i < epochs; i++) - { - gradientDescent(x, y, alpha, epochs); - } - return coefficients; -} - -template double RidgeRegularization::predict(std::vector &x) -{ - double yPred = coefficients[0]; - for (int i = 0; i < x.size(); ++i) - { - yPred += coefficients[i + 1] * x[i]; - } - return yPred; -} - -template void RidgeRegularization::printCoefficients() -{ - for (int i = 0; i < coefficients.size(); i++) - std::cout << "Θ" << i << ": " << coefficients[i] << std::endl; -} diff --git a/src/slowmokit/methods/linear_model/ridge_regularization.hpp b/src/slowmokit/methods/linear_model/ridge_regularization.hpp deleted file mode 100644 index 0e1da6c..0000000 --- a/src/slowmokit/methods/linear_model/ridge_regularization.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/** - * @file methods/linear_model/ridge_regularization.hpp - * - * The header file including the ridge regularization model - */ - -#ifndef SLOWMOKIT_RIDGE_REGULARIZATION_HPP -#define SLOWMOKIT_RIDGE_REGULARIZATION_HPP - -#include "../../core.hpp" - -template class RidgeRegularization -{ - private: - std::vector coefficients; - double lambda = 0.01; - - public: - RidgeRegularization(double = 0.01); - - - /** - * @brief calculates the ridge regularization term - * @param x training x values for a single feature - * @param y training output value - * @return regularized loss function - */ - double lossFunction(std::vector &, double); - - - /** - * @brief calculates the gradient of the loss function - * @param x training x values for a single feature - * @param y training output value - * @return std::vector - */ - std::vector gradient(std::vector &, double); - - - /** - * @brief implements the gradient descent optimization algorithm to find the - * optimal coefficients for the ridge regression model - * @param x training x values - * @param y training output values - * @param alpha - * @param epochs - */ - void gradientDescent(std::vector> &, std::vector &, double, - int); - - /** - * @param x training x values - * @param y training output values - * @param epochs - * @param alpha - * @returns std::vector - */ - std::vector fit(std::vector> &, std::vector &, int, - double); - - double predict(std::vector &); - - void printCoefficients(); -}; - -#endif // SLOWMOKIT_RIDGE_REGULARIZATION_HPP From d492f059ccf0395be3da19d2b72b7df0ff986203 Mon Sep 17 00:00:00 2001 From: Yashita Bansal Date: Sun, 19 Feb 2023 10:43:04 +0530 Subject: [PATCH 6/6] Added silhouette_score in metrics cleanup --- src/slowmokit/methods/metrics/metrics.cpp | 109 +++++++++++++++++ src/slowmokit/methods/metrics/metrics.hpp | 12 ++ .../methods/metrics/silhouette_score.cpp | 114 ++++++++++++++++++ .../methods/metrics/silhouette_score.hpp | 24 ++++ 4 files changed, 259 insertions(+) create mode 100644 src/slowmokit/methods/metrics/silhouette_score.cpp create mode 100644 src/slowmokit/methods/metrics/silhouette_score.hpp diff --git a/src/slowmokit/methods/metrics/metrics.cpp b/src/slowmokit/methods/metrics/metrics.cpp index b8c8472..9b43db0 100644 --- a/src/slowmokit/methods/metrics/metrics.cpp +++ b/src/slowmokit/methods/metrics/metrics.cpp @@ -177,4 +177,113 @@ std::map Metrics::f1Score(const std::vector &pred, } } return f1ScoreMap; +} + +template +double Metrics::silhouetteScore(const std::vector> x, + const std::vector y, int numClusters, + std::string typeDist) +{ + if (x.size() != y.size()) + { + throw std::invalid_argument("Size of x and y values are not same"); + return -1; + } + + if (numClusters < 2 or x.size() - 1 < numClusters) + { + throw std::invalid_argument("Invalid arguments sizes of x or numClusters"); + return -1; + } + + std::transform(typeDist.begin(), typeDist.end(), typeDist.begin(), + [](unsigned char c) { return std::tolower(c); }); + + std::vector> distances(x.size(), + std::vector(x.size())); + for (int i = 0; i < x.size(); i++) + { // iterating over each x[i] + for (int j = 0; j < x.size(); j++) + { // iterating over each x[i] + if (i == j) + { + distances[i][j] = 0.0; // initialize distance=0 if computing + // distance between same values + } + else + { + if (typeDist == "euclidean") + { // euclidean distance + for (int k = 0; k < x[0].size(); k++) + { + distances[i][j] += + (x[i][k] - x[j][k]) * + (x[i][k] - x[j][k]); // x[i][k]-> i=point,k=1-d value of point + } + const double HALF = 0.5; + distances[i][j] = pow(distances[i][j], HALF); + } + else if (typeDist == "manhattan") + { + for (int k = 0; k < x[0].size(); k++) + { + distances[i][j] += abs(x[i][k] - x[j][k]); + } + } + } + } + } + + std::vector intraClusters( + x.size(), 0.0); // Computing intraclusters distances of each point + std::vector crossClusters( + x.size()); // Minimum Distance of each point to other clusters + for (int i = 0; i < x.size(); i++) + { + int sumNum = 0; + std::vector interClusters( + numClusters, + 0.0); // values of point to each cluster points + std::vector sumsOfParticular(numClusters, 0); + for (int j = 0; j < x.size(); j++) + { + if (y[j] == y[i]) + { + intraClusters[i] += distances[i][j]; // Sum of distance of point to each + // other point in same cluster + sumNum++; + } + else + { + interClusters[y[j]] += distances[i][j]; // Sum of distance of point to + // points in different clusters + sumsOfParticular[y[j]]++; // computes points in that cluster + } + } + intraClusters[i] /= sumNum; // Mean of sum values of distances b/w + // points of same cluster + double minimumOfall = std::numeric_limits::max(); + ; + for (int j = 0; j < numClusters; j++) + { + if (j != y[i]) + { + interClusters[j] /= sumsOfParticular[j]; // Mean of values of + // interclusters distances + if (interClusters[j] < minimumOfall) + { // computing minimum value of means of intercluster distances + minimumOfall = interClusters[j]; + } + } + } + crossClusters[i] = minimumOfall; + } + double si = 0.0; + for (int i = 0; i < x.size(); i++) + { + si += ((crossClusters[i] - intraClusters[i]) / + std::max(intraClusters[i], + crossClusters[i])); // s = b[i]-a[i] / max(b[i],a[i]) + } + return si / double(x.size()); } \ No newline at end of file diff --git a/src/slowmokit/methods/metrics/metrics.hpp b/src/slowmokit/methods/metrics/metrics.hpp index 3730a29..d1008de 100644 --- a/src/slowmokit/methods/metrics/metrics.hpp +++ b/src/slowmokit/methods/metrics/metrics.hpp @@ -82,6 +82,18 @@ template class Metrics static std::map f1Score(const std::vector &, const std::vector &); + + + /** + * @brief Calculates Silhouette Score + * @param vector> x values + * @param vector y values + * @param int number of clusters + * @param string distance type + * @return double Silhouette Score + */ + static double silhouetteScore(const std::vector>, + const std::vector, int, std::string); }; #endif // SLOWMOKIT_METRICS_HPP diff --git a/src/slowmokit/methods/metrics/silhouette_score.cpp b/src/slowmokit/methods/metrics/silhouette_score.cpp new file mode 100644 index 0000000..513bbce --- /dev/null +++ b/src/slowmokit/methods/metrics/silhouette_score.cpp @@ -0,0 +1,114 @@ +/** + * @file methods/metrics/silhouette_score.cpp + * + * Implementation of the silhouette Score main program + */ + +#include "silhouette_score.hpp" +template +double silhouetteScore(std::vector> x, std::vector y, + int numClusters, std::string typeDist) +{ + if (x.size() != y.size()) + { + throw std::invalid_argument("Size of x and y values are not same"); + return -1; + } + + if (numClusters < 2 or x.size() - 1 < numClusters) + { + throw std::invalid_argument("Invalid arguments sizes of x or numClusters"); + return -1; + } + + std::transform(typeDist.begin(), typeDist.end(), typeDist.begin(), + [](unsigned char c) { return std::tolower(c); }); + + std::vector> distances(x.size(), + std::vector(x.size())); + for (int i = 0; i < x.size(); i++) + { // iterating over each x[i] + for (int j = 0; j < x.size(); j++) + { // iterating over each x[i] + if (i == j) + { + distances[i][j] = 0.0; // initialize distance=0 if computing + // distance between same values + } + else + { + if (typeDist == "euclidean") + { // euclidean distance + for (int k = 0; k < x[0].size(); k++) + { + distances[i][j] += + (x[i][k] - x[j][k]) * + (x[i][k] - x[j][k]); // x[i][k]-> i=point,k=1-d value of point + } + const double HALF = 0.5; + distances[i][j] = pow(distances[i][j], HALF); + } + else if (typeDist == "manhattan") + { + for (int k = 0; k < x[0].size(); k++) + { + distances[i][j] += abs(x[i][k] - x[j][k]); + } + } + } + } + } + + std::vector intraClusters( + x.size(), 0.0); // Computing intraclusters distances of each point + std::vector crossClusters( + x.size()); // Minimum Distance of each point to other clusters + for (int i = 0; i < x.size(); i++) + { + int sumNum = 0; + std::vector interClusters( + numClusters, + 0.0); // values of point to each cluster points + std::vector sumsOfParticular(numClusters, 0); + for (int j = 0; j < x.size(); j++) + { + if (y[j] == y[i]) + { + intraClusters[i] += distances[i][j]; // Sum of distance of point to each + // other point in same cluster + sumNum++; + } + else + { + interClusters[y[j]] += distances[i][j]; // Sum of distance of point to + // points in different clusters + sumsOfParticular[y[j]]++; // computes points in that cluster + } + } + intraClusters[i] /= sumNum; // Mean of sum values of distances b/w + // points of same cluster + double minimumOfall = std::numeric_limits::max(); + ; + for (int j = 0; j < numClusters; j++) + { + if (j != y[i]) + { + interClusters[j] /= sumsOfParticular[j]; // Mean of values of + // interclusters distances + if (interClusters[j] < minimumOfall) + { // computing minimum value of means of intercluster distances + minimumOfall = interClusters[j]; + } + } + } + crossClusters[i] = minimumOfall; + } + double si = 0.0; + for (int i = 0; i < x.size(); i++) + { + si += ((crossClusters[i] - intraClusters[i]) / + std::max(intraClusters[i], + crossClusters[i])); // s = b[i]-a[i] / max(b[i],a[i]) + } + return si / double(x.size()); +}; \ No newline at end of file diff --git a/src/slowmokit/methods/metrics/silhouette_score.hpp b/src/slowmokit/methods/metrics/silhouette_score.hpp new file mode 100644 index 0000000..94fa3d6 --- /dev/null +++ b/src/slowmokit/methods/metrics/silhouette_score.hpp @@ -0,0 +1,24 @@ +/** + * @file methods/metrics/silhoutte_score.hpp + * + * Easy include to find Silhoutte score + */ + +#ifndef SLOWMOKIT_SILHOUETTE_SCORE_HPP +#define SLOWMOKIT_SILHOUETTE_SCORE_HPP +#include "../../core.hpp" + +template +/** + * @brief Calculates Silhouette Score + * + * @param vector> x values + * @param vector y values + * @param int number of clusters + * @param string distance type + * @return double Silhouette Score + */ +double silhouetteScore(std::vector>, std::vector, int, + std::string); + +#endif // SLOWMOKIT_SILHOUETTE_SCORE_HPP \ No newline at end of file