diff --git a/packages/r/R/RcppExports.R b/packages/r/R/RcppExports.R index ae74c19..6ff1c06 100644 --- a/packages/r/R/RcppExports.R +++ b/packages/r/R/RcppExports.R @@ -1,7 +1,7 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -tapkee_embed_cpp <- function(data, method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity) { - .Call(`_tapkee_tapkee_embed_cpp`, data, method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity) +tapkee_embed_cpp <- function(data, method, neighbors_method, eigen_method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity) { + .Call(`_tapkee_tapkee_embed_cpp`, data, method, neighbors_method, eigen_method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity) } diff --git a/packages/r/R/embed.R b/packages/r/R/embed.R index bc82b52..d369e57 100644 --- a/packages/r/R/embed.R +++ b/packages/r/R/embed.R @@ -11,6 +11,12 @@ #' \code{"npe"}, \code{"ltsa"}, \code{"lltsa"}, \code{"hlle"}, #' \code{"l-mds"}, \code{"l-isomap"}, \code{"spe"}, \code{"fa"}, #' \code{"random_projection"}, \code{"manifold_sculpting"}, \code{"passthru"}. +#' @param neighbors_method Character string specifying the neighbor search method: +#' \code{"brute"} (brute-force), \code{"vptree"} (vantage point tree), +#' \code{"covertree"} (cover tree, if available). Default: library default. +#' @param eigen_method Character string specifying the eigendecomposition method: +#' \code{"dense"}, \code{"randomized"}, \code{"arpack"} (if available). +#' Default: library default. #' @param num_neighbors Integer. Number of nearest neighbors for local methods. #' Default in Tapkee: 5. #' @param target_dimension Integer. Output dimensionality. @@ -58,6 +64,8 @@ tapkee_embed <- function( data, method = "lle", + neighbors_method = NULL, + eigen_method = NULL, num_neighbors = NULL, target_dimension = NULL, gaussian_kernel_width = NULL, @@ -93,6 +101,8 @@ tapkee_embed <- function( tapkee_embed_cpp( data = data, method = method, + neighbors_method = neighbors_method, + eigen_method = eigen_method, num_neighbors = num_neighbors, target_dimension = target_dimension, gaussian_kernel_width = gaussian_kernel_width, diff --git a/packages/r/man/tapkee_embed.Rd b/packages/r/man/tapkee_embed.Rd index b725b79..75fb6a6 100644 --- a/packages/r/man/tapkee_embed.Rd +++ b/packages/r/man/tapkee_embed.Rd @@ -7,6 +7,8 @@ tapkee_embed( data, method = "lle", + neighbors_method = NULL, + eigen_method = NULL, num_neighbors = NULL, target_dimension = NULL, gaussian_kernel_width = NULL, @@ -36,6 +38,14 @@ samples. Each column is one data point.} \code{"l-mds"}, \code{"l-isomap"}, \code{"spe"}, \code{"fa"}, \code{"random_projection"}, \code{"manifold_sculpting"}, \code{"passthru"}.} +\item{neighbors_method}{Character string specifying the neighbor search method: +\code{"brute"} (brute-force), \code{"vptree"} (vantage point tree), +\code{"covertree"} (cover tree, if available). Default: library default.} + +\item{eigen_method}{Character string specifying the eigendecomposition method: +\code{"dense"}, \code{"randomized"}, \code{"arpack"} (if available). +Default: library default.} + \item{num_neighbors}{Integer. Number of nearest neighbors for local methods. Default in Tapkee: 5.} diff --git a/packages/r/src/RcppExports.cpp b/packages/r/src/RcppExports.cpp index f49fbc0..dcee29a 100644 --- a/packages/r/src/RcppExports.cpp +++ b/packages/r/src/RcppExports.cpp @@ -12,13 +12,15 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // tapkee_embed_cpp -Eigen::MatrixXd tapkee_embed_cpp(const Eigen::MatrixXd& data, const std::string& method, Rcpp::Nullable num_neighbors, Rcpp::Nullable target_dimension, Rcpp::Nullable gaussian_kernel_width, Rcpp::Nullable landmark_ratio, Rcpp::Nullable max_iteration, Rcpp::Nullable diffusion_map_timesteps, Rcpp::Nullable sne_perplexity, Rcpp::Nullable sne_theta, Rcpp::Nullable squishing_rate, Rcpp::Nullable spe_global_strategy, Rcpp::Nullable spe_num_updates, Rcpp::Nullable spe_tolerance, Rcpp::Nullable nullspace_shift, Rcpp::Nullable klle_shift, Rcpp::Nullable fa_epsilon, Rcpp::Nullable check_connectivity); -RcppExport SEXP _tapkee_tapkee_embed_cpp(SEXP dataSEXP, SEXP methodSEXP, SEXP num_neighborsSEXP, SEXP target_dimensionSEXP, SEXP gaussian_kernel_widthSEXP, SEXP landmark_ratioSEXP, SEXP max_iterationSEXP, SEXP diffusion_map_timestepsSEXP, SEXP sne_perplexitySEXP, SEXP sne_thetaSEXP, SEXP squishing_rateSEXP, SEXP spe_global_strategySEXP, SEXP spe_num_updatesSEXP, SEXP spe_toleranceSEXP, SEXP nullspace_shiftSEXP, SEXP klle_shiftSEXP, SEXP fa_epsilonSEXP, SEXP check_connectivitySEXP) { +Eigen::MatrixXd tapkee_embed_cpp(const Eigen::MatrixXd& data, const std::string& method, Rcpp::Nullable neighbors_method, Rcpp::Nullable eigen_method, Rcpp::Nullable num_neighbors, Rcpp::Nullable target_dimension, Rcpp::Nullable gaussian_kernel_width, Rcpp::Nullable landmark_ratio, Rcpp::Nullable max_iteration, Rcpp::Nullable diffusion_map_timesteps, Rcpp::Nullable sne_perplexity, Rcpp::Nullable sne_theta, Rcpp::Nullable squishing_rate, Rcpp::Nullable spe_global_strategy, Rcpp::Nullable spe_num_updates, Rcpp::Nullable spe_tolerance, Rcpp::Nullable nullspace_shift, Rcpp::Nullable klle_shift, Rcpp::Nullable fa_epsilon, Rcpp::Nullable check_connectivity); +RcppExport SEXP _tapkee_tapkee_embed_cpp(SEXP dataSEXP, SEXP methodSEXP, SEXP neighbors_methodSEXP, SEXP eigen_methodSEXP, SEXP num_neighborsSEXP, SEXP target_dimensionSEXP, SEXP gaussian_kernel_widthSEXP, SEXP landmark_ratioSEXP, SEXP max_iterationSEXP, SEXP diffusion_map_timestepsSEXP, SEXP sne_perplexitySEXP, SEXP sne_thetaSEXP, SEXP squishing_rateSEXP, SEXP spe_global_strategySEXP, SEXP spe_num_updatesSEXP, SEXP spe_toleranceSEXP, SEXP nullspace_shiftSEXP, SEXP klle_shiftSEXP, SEXP fa_epsilonSEXP, SEXP check_connectivitySEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type data(dataSEXP); Rcpp::traits::input_parameter< const std::string& >::type method(methodSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type neighbors_method(neighbors_methodSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type eigen_method(eigen_methodSEXP); Rcpp::traits::input_parameter< Rcpp::Nullable >::type num_neighbors(num_neighborsSEXP); Rcpp::traits::input_parameter< Rcpp::Nullable >::type target_dimension(target_dimensionSEXP); Rcpp::traits::input_parameter< Rcpp::Nullable >::type gaussian_kernel_width(gaussian_kernel_widthSEXP); @@ -35,13 +37,13 @@ BEGIN_RCPP Rcpp::traits::input_parameter< Rcpp::Nullable >::type klle_shift(klle_shiftSEXP); Rcpp::traits::input_parameter< Rcpp::Nullable >::type fa_epsilon(fa_epsilonSEXP); Rcpp::traits::input_parameter< Rcpp::Nullable >::type check_connectivity(check_connectivitySEXP); - rcpp_result_gen = Rcpp::wrap(tapkee_embed_cpp(data, method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity)); + rcpp_result_gen = Rcpp::wrap(tapkee_embed_cpp(data, method, neighbors_method, eigen_method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity)); return rcpp_result_gen; END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_tapkee_tapkee_embed_cpp", (DL_FUNC) &_tapkee_tapkee_embed_cpp, 18}, + {"_tapkee_tapkee_embed_cpp", (DL_FUNC) &_tapkee_tapkee_embed_cpp, 20}, {NULL, NULL, 0} }; diff --git a/packages/r/tests/testthat/test-embed.R b/packages/r/tests/testthat/test-embed.R index 566700b..0b7c967 100644 --- a/packages/r/tests/testthat/test-embed.R +++ b/packages/r/tests/testthat/test-embed.R @@ -53,6 +53,56 @@ test_that("Factor Analysis works", { expect_equal(dim(result), c(100, 2)) }) +test_that("neighbors_method brute works", { + set.seed(42) + X <- matrix(rnorm(300), nrow = 3, ncol = 100) + result <- tapkee_embed(X, method = "isomap", + neighbors_method = "brute", + num_neighbors = 15L, target_dimension = 2L) + expect_equal(dim(result), c(100, 2)) +}) + +test_that("neighbors_method vptree works", { + set.seed(42) + X <- matrix(rnorm(300), nrow = 3, ncol = 100) + result <- tapkee_embed(X, method = "isomap", + neighbors_method = "vptree", + num_neighbors = 15L, target_dimension = 2L) + expect_equal(dim(result), c(100, 2)) +}) + +test_that("eigen_method dense works", { + set.seed(42) + X <- matrix(rnorm(300), nrow = 3, ncol = 100) + result <- tapkee_embed(X, method = "pca", + eigen_method = "dense", + target_dimension = 2L) + expect_equal(dim(result), c(100, 2)) +}) + +test_that("eigen_method randomized works", { + set.seed(42) + X <- matrix(rnorm(300), nrow = 3, ncol = 100) + result <- tapkee_embed(X, method = "pca", + eigen_method = "randomized", + target_dimension = 2L) + expect_equal(dim(result), c(100, 2)) +}) + +test_that("unknown neighbors_method raises error", { + X <- matrix(rnorm(30), nrow = 3, ncol = 10) + expect_error(tapkee_embed(X, method = "isomap", + neighbors_method = "nonexistent"), + "Unknown neighbors method") +}) + +test_that("unknown eigen_method raises error", { + X <- matrix(rnorm(30), nrow = 3, ncol = 10) + expect_error(tapkee_embed(X, method = "pca", + eigen_method = "nonexistent"), + "Unknown eigen method") +}) + test_that("unknown method raises error", { X <- matrix(rnorm(30), nrow = 3, ncol = 10) expect_error(tapkee_embed(X, method = "nonexistent"), "Unknown method") diff --git a/src/R/tapkee_embed.cpp b/src/R/tapkee_embed.cpp index b1d25b8..6323822 100644 --- a/src/R/tapkee_embed.cpp +++ b/src/R/tapkee_embed.cpp @@ -16,6 +16,7 @@ #include #include +#include #include using stichwort::Parameter; @@ -57,10 +58,52 @@ static DimensionReductionMethod parse_method(const std::string& name) { return tapkee::PassThru; // unreachable } +static tapkee::NeighborsMethod parse_neighbors_method(const std::string& name) { + static const std::map methods = { + {"brute", tapkee::Brute}, + {"vptree", tapkee::VpTree}, +#ifdef TAPKEE_USE_LGPL_COVERTREE + {"covertree", tapkee::CoverTree}, +#endif + }; + auto it = methods.find(name); + if (it != methods.end()) { + return it->second; + } + Rcpp::stop("Unknown neighbors method: '%s'. Use one of: brute, vptree" +#ifdef TAPKEE_USE_LGPL_COVERTREE + ", covertree" +#endif + , name.c_str()); + return tapkee::Brute; // unreachable +} + +static tapkee::EigenMethod parse_eigen_method(const std::string& name) { + static const std::map methods = { + {"dense", tapkee::Dense}, + {"randomized", tapkee::Randomized}, +#ifdef TAPKEE_WITH_ARPACK + {"arpack", tapkee::Arpack}, +#endif + }; + auto it = methods.find(name); + if (it != methods.end()) { + return it->second; + } + Rcpp::stop("Unknown eigen method: '%s'. Use one of: dense, randomized" +#ifdef TAPKEE_WITH_ARPACK + ", arpack" +#endif + , name.c_str()); + return tapkee::Dense; // unreachable +} + // [[Rcpp::export]] Eigen::MatrixXd tapkee_embed_cpp( const Eigen::MatrixXd& data, const std::string& method, + Rcpp::Nullable neighbors_method, + Rcpp::Nullable eigen_method, Rcpp::Nullable num_neighbors, Rcpp::Nullable target_dimension, Rcpp::Nullable gaussian_kernel_width, @@ -83,6 +126,13 @@ Eigen::MatrixXd tapkee_embed_cpp( ParametersSet params; params.add(Parameter::create("dimension reduction method", parse_method(method))); + if (neighbors_method.isNotNull()) + params.add(Parameter::create("nearest neighbors method", + parse_neighbors_method(Rcpp::as(neighbors_method)))); + if (eigen_method.isNotNull()) + params.add(Parameter::create("eigendecomposition method", + parse_eigen_method(Rcpp::as(eigen_method)))); + if (num_neighbors.isNotNull()) params.add(Parameter::create("number of neighbors", Rcpp::as(num_neighbors))); if (target_dimension.isNotNull()) diff --git a/src/python/nanobind_extension.cpp b/src/python/nanobind_extension.cpp index 88ac966..9217a11 100644 --- a/src/python/nanobind_extension.cpp +++ b/src/python/nanobind_extension.cpp @@ -28,6 +28,8 @@ using tapkee::tapkee_internal::ParametersInitializedState; DenseMatrix embed( const DenseMatrix& data, const std::string& method, + std::optional neighbors_method, + std::optional eigen_method, std::optional num_neighbors, std::optional target_dimension, std::optional gaussian_kernel_width, @@ -50,6 +52,12 @@ DenseMatrix embed( // Required: method params.add(Parameter::create("dimension reduction method", parse_reduction_method(method))); + // Optional: neighbors and eigen methods + if (neighbors_method) + params.add(Parameter::create("nearest neighbors method", parse_multiple(NEIGHBORS_METHODS, *neighbors_method))); + if (eigen_method) + params.add(Parameter::create("eigendecomposition method", parse_multiple(EIGEN_METHODS, *eigen_method))); + // Optional parameters - only add if provided if (num_neighbors) params.add(Parameter::create("number of neighbors", *num_neighbors)); @@ -94,6 +102,8 @@ NB_MODULE(tapkee, m) { m.def("embed", &embed, nb::arg("data"), nb::arg("method") = "lle", + nb::arg("neighbors_method") = nb::none(), + nb::arg("eigen_method") = nb::none(), nb::arg("num_neighbors") = nb::none(), nb::arg("target_dimension") = nb::none(), nb::arg("gaussian_kernel_width") = nb::none(), @@ -116,6 +126,8 @@ NB_MODULE(tapkee, m) { Args: data: Input matrix (features x samples) method: Reduction method ('lle', 'isomap', 'pca', 't-sne', etc.) + neighbors_method: Neighbor search method ('brute', 'vptree', 'covertree') + eigen_method: Eigendecomposition method ('dense', 'randomized', 'arpack') num_neighbors: Number of neighbors for local methods (default: 5) target_dimension: Output dimensionality (default: 2) gaussian_kernel_width: Kernel width for Laplacian Eigenmaps, DM, LPP diff --git a/test/test_nanobind_extension.py b/test/test_nanobind_extension.py index 9d824ba..4ea55e4 100644 --- a/test/test_nanobind_extension.py +++ b/test/test_nanobind_extension.py @@ -8,8 +8,54 @@ def test_exception_unknown_method(): except: pass +def test_embed_neighbors_method_brute(): + data = np.random.randn(100, 3) + result = tapkee.embed(data, method='isomap', neighbors_method='brute', + num_neighbors=15, target_dimension=2) + assert result.shape == (3, 2) + +def test_embed_neighbors_method_vptree(): + data = np.random.randn(100, 3) + result = tapkee.embed(data, method='isomap', neighbors_method='vptree', + num_neighbors=15, target_dimension=2) + assert result.shape == (3, 2) + +def test_embed_eigen_method_dense(): + data = np.random.randn(100, 3) + result = tapkee.embed(data, method='pca', eigen_method='dense', + target_dimension=2) + assert result.shape == (3, 2) + +def test_embed_eigen_method_randomized(): + data = np.random.randn(100, 3) + result = tapkee.embed(data, method='pca', eigen_method='randomized', + target_dimension=2) + assert result.shape == (3, 2) + +def test_embed_unknown_neighbors_method(): + data = np.random.randn(100, 3) + try: + tapkee.embed(data, method='isomap', neighbors_method='nonexistent') + assert False, "Should have raised an error" + except RuntimeError: + pass + +def test_embed_unknown_eigen_method(): + data = np.random.randn(100, 3) + try: + tapkee.embed(data, method='pca', eigen_method='nonexistent') + assert False, "Should have raised an error" + except RuntimeError: + pass + if __name__=='__main__': test_exception_unknown_method() + test_embed_neighbors_method_brute() + test_embed_neighbors_method_vptree() + test_embed_eigen_method_dense() + test_embed_eigen_method_randomized() + test_embed_unknown_neighbors_method() + test_embed_unknown_eigen_method() parameters = tapkee.ParametersSet() method = tapkee.parse_reduction_method('spe') assert(method.name == 'Stochastic Proximity Embedding (SPE)')