Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions packages/r/R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

tapkee_embed_cpp <- function(data, method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity) {
.Call(`_tapkee_tapkee_embed_cpp`, data, method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity)
tapkee_embed_cpp <- function(data, method, neighbors_method, eigen_method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity) {
.Call(`_tapkee_tapkee_embed_cpp`, data, method, neighbors_method, eigen_method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity)
}

10 changes: 10 additions & 0 deletions packages/r/R/embed.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
#' \code{"npe"}, \code{"ltsa"}, \code{"lltsa"}, \code{"hlle"},
#' \code{"l-mds"}, \code{"l-isomap"}, \code{"spe"}, \code{"fa"},
#' \code{"random_projection"}, \code{"manifold_sculpting"}, \code{"passthru"}.
#' @param neighbors_method Character string specifying the neighbor search method:
#' \code{"brute"} (brute-force), \code{"vptree"} (vantage point tree),
#' \code{"covertree"} (cover tree, if available). Default: library default.
#' @param eigen_method Character string specifying the eigendecomposition method:
#' \code{"dense"}, \code{"randomized"}, \code{"arpack"} (if available).
#' Default: library default.
#' @param num_neighbors Integer. Number of nearest neighbors for local methods.
#' Default in Tapkee: 5.
#' @param target_dimension Integer. Output dimensionality.
Expand Down Expand Up @@ -58,6 +64,8 @@
tapkee_embed <- function(
data,
method = "lle",
neighbors_method = NULL,
eigen_method = NULL,
num_neighbors = NULL,
target_dimension = NULL,
gaussian_kernel_width = NULL,
Expand Down Expand Up @@ -93,6 +101,8 @@ tapkee_embed <- function(
tapkee_embed_cpp(
data = data,
method = method,
neighbors_method = neighbors_method,
eigen_method = eigen_method,
num_neighbors = num_neighbors,
target_dimension = target_dimension,
gaussian_kernel_width = gaussian_kernel_width,
Expand Down
10 changes: 10 additions & 0 deletions packages/r/man/tapkee_embed.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions packages/r/src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif

// tapkee_embed_cpp
Eigen::MatrixXd tapkee_embed_cpp(const Eigen::MatrixXd& data, const std::string& method, Rcpp::Nullable<int> num_neighbors, Rcpp::Nullable<int> target_dimension, Rcpp::Nullable<double> gaussian_kernel_width, Rcpp::Nullable<double> landmark_ratio, Rcpp::Nullable<int> max_iteration, Rcpp::Nullable<int> diffusion_map_timesteps, Rcpp::Nullable<double> sne_perplexity, Rcpp::Nullable<double> sne_theta, Rcpp::Nullable<double> squishing_rate, Rcpp::Nullable<bool> spe_global_strategy, Rcpp::Nullable<int> spe_num_updates, Rcpp::Nullable<double> spe_tolerance, Rcpp::Nullable<double> nullspace_shift, Rcpp::Nullable<double> klle_shift, Rcpp::Nullable<double> fa_epsilon, Rcpp::Nullable<bool> check_connectivity);
RcppExport SEXP _tapkee_tapkee_embed_cpp(SEXP dataSEXP, SEXP methodSEXP, SEXP num_neighborsSEXP, SEXP target_dimensionSEXP, SEXP gaussian_kernel_widthSEXP, SEXP landmark_ratioSEXP, SEXP max_iterationSEXP, SEXP diffusion_map_timestepsSEXP, SEXP sne_perplexitySEXP, SEXP sne_thetaSEXP, SEXP squishing_rateSEXP, SEXP spe_global_strategySEXP, SEXP spe_num_updatesSEXP, SEXP spe_toleranceSEXP, SEXP nullspace_shiftSEXP, SEXP klle_shiftSEXP, SEXP fa_epsilonSEXP, SEXP check_connectivitySEXP) {
Eigen::MatrixXd tapkee_embed_cpp(const Eigen::MatrixXd& data, const std::string& method, Rcpp::Nullable<std::string> neighbors_method, Rcpp::Nullable<std::string> eigen_method, Rcpp::Nullable<int> num_neighbors, Rcpp::Nullable<int> target_dimension, Rcpp::Nullable<double> gaussian_kernel_width, Rcpp::Nullable<double> landmark_ratio, Rcpp::Nullable<int> max_iteration, Rcpp::Nullable<int> diffusion_map_timesteps, Rcpp::Nullable<double> sne_perplexity, Rcpp::Nullable<double> sne_theta, Rcpp::Nullable<double> squishing_rate, Rcpp::Nullable<bool> spe_global_strategy, Rcpp::Nullable<int> spe_num_updates, Rcpp::Nullable<double> spe_tolerance, Rcpp::Nullable<double> nullspace_shift, Rcpp::Nullable<double> klle_shift, Rcpp::Nullable<double> fa_epsilon, Rcpp::Nullable<bool> check_connectivity);
RcppExport SEXP _tapkee_tapkee_embed_cpp(SEXP dataSEXP, SEXP methodSEXP, SEXP neighbors_methodSEXP, SEXP eigen_methodSEXP, SEXP num_neighborsSEXP, SEXP target_dimensionSEXP, SEXP gaussian_kernel_widthSEXP, SEXP landmark_ratioSEXP, SEXP max_iterationSEXP, SEXP diffusion_map_timestepsSEXP, SEXP sne_perplexitySEXP, SEXP sne_thetaSEXP, SEXP squishing_rateSEXP, SEXP spe_global_strategySEXP, SEXP spe_num_updatesSEXP, SEXP spe_toleranceSEXP, SEXP nullspace_shiftSEXP, SEXP klle_shiftSEXP, SEXP fa_epsilonSEXP, SEXP check_connectivitySEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type data(dataSEXP);
Rcpp::traits::input_parameter< const std::string& >::type method(methodSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<std::string> >::type neighbors_method(neighbors_methodSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<std::string> >::type eigen_method(eigen_methodSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<int> >::type num_neighbors(num_neighborsSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<int> >::type target_dimension(target_dimensionSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<double> >::type gaussian_kernel_width(gaussian_kernel_widthSEXP);
Expand All @@ -35,13 +37,13 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< Rcpp::Nullable<double> >::type klle_shift(klle_shiftSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<double> >::type fa_epsilon(fa_epsilonSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<bool> >::type check_connectivity(check_connectivitySEXP);
rcpp_result_gen = Rcpp::wrap(tapkee_embed_cpp(data, method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity));
rcpp_result_gen = Rcpp::wrap(tapkee_embed_cpp(data, method, neighbors_method, eigen_method, num_neighbors, target_dimension, gaussian_kernel_width, landmark_ratio, max_iteration, diffusion_map_timesteps, sne_perplexity, sne_theta, squishing_rate, spe_global_strategy, spe_num_updates, spe_tolerance, nullspace_shift, klle_shift, fa_epsilon, check_connectivity));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_tapkee_tapkee_embed_cpp", (DL_FUNC) &_tapkee_tapkee_embed_cpp, 18},
{"_tapkee_tapkee_embed_cpp", (DL_FUNC) &_tapkee_tapkee_embed_cpp, 20},
{NULL, NULL, 0}
};

Expand Down
50 changes: 50 additions & 0 deletions packages/r/tests/testthat/test-embed.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,56 @@ test_that("Factor Analysis works", {
expect_equal(dim(result), c(100, 2))
})

test_that("neighbors_method brute works", {
set.seed(42)
X <- matrix(rnorm(300), nrow = 3, ncol = 100)
result <- tapkee_embed(X, method = "isomap",
neighbors_method = "brute",
num_neighbors = 15L, target_dimension = 2L)
expect_equal(dim(result), c(100, 2))
})

test_that("neighbors_method vptree works", {
set.seed(42)
X <- matrix(rnorm(300), nrow = 3, ncol = 100)
result <- tapkee_embed(X, method = "isomap",
neighbors_method = "vptree",
num_neighbors = 15L, target_dimension = 2L)
expect_equal(dim(result), c(100, 2))
})

test_that("eigen_method dense works", {
set.seed(42)
X <- matrix(rnorm(300), nrow = 3, ncol = 100)
result <- tapkee_embed(X, method = "pca",
eigen_method = "dense",
target_dimension = 2L)
expect_equal(dim(result), c(100, 2))
})

test_that("eigen_method randomized works", {
set.seed(42)
X <- matrix(rnorm(300), nrow = 3, ncol = 100)
result <- tapkee_embed(X, method = "pca",
eigen_method = "randomized",
target_dimension = 2L)
expect_equal(dim(result), c(100, 2))
})

test_that("unknown neighbors_method raises error", {
X <- matrix(rnorm(30), nrow = 3, ncol = 10)
expect_error(tapkee_embed(X, method = "isomap",
neighbors_method = "nonexistent"),
"Unknown neighbors method")
})

test_that("unknown eigen_method raises error", {
X <- matrix(rnorm(30), nrow = 3, ncol = 10)
expect_error(tapkee_embed(X, method = "pca",
eigen_method = "nonexistent"),
"Unknown eigen method")
})

test_that("unknown method raises error", {
X <- matrix(rnorm(30), nrow = 3, ncol = 10)
expect_error(tapkee_embed(X, method = "nonexistent"), "Unknown method")
Expand Down
50 changes: 50 additions & 0 deletions src/R/tapkee_embed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <tapkee/defines/method_names.hpp>
#include <tapkee/defines/keywords.hpp>

#include <map>
#include <string>

using stichwort::Parameter;
Expand Down Expand Up @@ -57,10 +58,52 @@ static DimensionReductionMethod parse_method(const std::string& name) {
return tapkee::PassThru; // unreachable
}

static tapkee::NeighborsMethod parse_neighbors_method(const std::string& name) {
static const std::map<std::string, tapkee::NeighborsMethod> methods = {
{"brute", tapkee::Brute},
{"vptree", tapkee::VpTree},
#ifdef TAPKEE_USE_LGPL_COVERTREE
{"covertree", tapkee::CoverTree},
#endif
};
auto it = methods.find(name);
if (it != methods.end()) {
return it->second;
}
Rcpp::stop("Unknown neighbors method: '%s'. Use one of: brute, vptree"
#ifdef TAPKEE_USE_LGPL_COVERTREE
", covertree"
#endif
, name.c_str());
return tapkee::Brute; // unreachable
}

static tapkee::EigenMethod parse_eigen_method(const std::string& name) {
static const std::map<std::string, tapkee::EigenMethod> methods = {
{"dense", tapkee::Dense},
{"randomized", tapkee::Randomized},
#ifdef TAPKEE_WITH_ARPACK
{"arpack", tapkee::Arpack},
#endif
};
auto it = methods.find(name);
if (it != methods.end()) {
return it->second;
}
Rcpp::stop("Unknown eigen method: '%s'. Use one of: dense, randomized"
#ifdef TAPKEE_WITH_ARPACK
", arpack"
#endif
, name.c_str());
return tapkee::Dense; // unreachable
}

// [[Rcpp::export]]
Eigen::MatrixXd tapkee_embed_cpp(
const Eigen::MatrixXd& data,
const std::string& method,
Rcpp::Nullable<std::string> neighbors_method,
Rcpp::Nullable<std::string> eigen_method,
Rcpp::Nullable<int> num_neighbors,
Rcpp::Nullable<int> target_dimension,
Rcpp::Nullable<double> gaussian_kernel_width,
Expand All @@ -83,6 +126,13 @@ Eigen::MatrixXd tapkee_embed_cpp(
ParametersSet params;
params.add(Parameter::create("dimension reduction method", parse_method(method)));

if (neighbors_method.isNotNull())
params.add(Parameter::create("nearest neighbors method",
parse_neighbors_method(Rcpp::as<std::string>(neighbors_method))));
if (eigen_method.isNotNull())
params.add(Parameter::create("eigendecomposition method",
parse_eigen_method(Rcpp::as<std::string>(eigen_method))));

if (num_neighbors.isNotNull())
params.add(Parameter::create("number of neighbors", Rcpp::as<int>(num_neighbors)));
if (target_dimension.isNotNull())
Expand Down
12 changes: 12 additions & 0 deletions src/python/nanobind_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ using tapkee::tapkee_internal::ParametersInitializedState;
DenseMatrix embed(
const DenseMatrix& data,
const std::string& method,
std::optional<std::string> neighbors_method,
std::optional<std::string> eigen_method,
std::optional<int> num_neighbors,
std::optional<int> target_dimension,
std::optional<double> gaussian_kernel_width,
Expand All @@ -50,6 +52,12 @@ DenseMatrix embed(
// Required: method
params.add(Parameter::create("dimension reduction method", parse_reduction_method(method)));

// Optional: neighbors and eigen methods
if (neighbors_method)
params.add(Parameter::create("nearest neighbors method", parse_multiple(NEIGHBORS_METHODS, *neighbors_method)));
if (eigen_method)
params.add(Parameter::create("eigendecomposition method", parse_multiple(EIGEN_METHODS, *eigen_method)));

// Optional parameters - only add if provided
if (num_neighbors)
params.add(Parameter::create("number of neighbors", *num_neighbors));
Expand Down Expand Up @@ -94,6 +102,8 @@ NB_MODULE(tapkee, m) {
m.def("embed", &embed,
nb::arg("data"),
nb::arg("method") = "lle",
nb::arg("neighbors_method") = nb::none(),
nb::arg("eigen_method") = nb::none(),
nb::arg("num_neighbors") = nb::none(),
nb::arg("target_dimension") = nb::none(),
nb::arg("gaussian_kernel_width") = nb::none(),
Expand All @@ -116,6 +126,8 @@ NB_MODULE(tapkee, m) {
Args:
data: Input matrix (features x samples)
method: Reduction method ('lle', 'isomap', 'pca', 't-sne', etc.)
neighbors_method: Neighbor search method ('brute', 'vptree', 'covertree')
eigen_method: Eigendecomposition method ('dense', 'randomized', 'arpack')
num_neighbors: Number of neighbors for local methods (default: 5)
target_dimension: Output dimensionality (default: 2)
gaussian_kernel_width: Kernel width for Laplacian Eigenmaps, DM, LPP
Expand Down
46 changes: 46 additions & 0 deletions test/test_nanobind_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,54 @@ def test_exception_unknown_method():
except:
pass

def test_embed_neighbors_method_brute():
data = np.random.randn(100, 3)
result = tapkee.embed(data, method='isomap', neighbors_method='brute',
num_neighbors=15, target_dimension=2)
assert result.shape == (3, 2)

def test_embed_neighbors_method_vptree():
data = np.random.randn(100, 3)
result = tapkee.embed(data, method='isomap', neighbors_method='vptree',
num_neighbors=15, target_dimension=2)
assert result.shape == (3, 2)

def test_embed_eigen_method_dense():
data = np.random.randn(100, 3)
result = tapkee.embed(data, method='pca', eigen_method='dense',
target_dimension=2)
assert result.shape == (3, 2)

def test_embed_eigen_method_randomized():
data = np.random.randn(100, 3)
result = tapkee.embed(data, method='pca', eigen_method='randomized',
target_dimension=2)
assert result.shape == (3, 2)

def test_embed_unknown_neighbors_method():
data = np.random.randn(100, 3)
try:
tapkee.embed(data, method='isomap', neighbors_method='nonexistent')
assert False, "Should have raised an error"
except RuntimeError:
pass

def test_embed_unknown_eigen_method():
data = np.random.randn(100, 3)
try:
tapkee.embed(data, method='pca', eigen_method='nonexistent')
assert False, "Should have raised an error"
except RuntimeError:
pass

if __name__=='__main__':
test_exception_unknown_method()
test_embed_neighbors_method_brute()
test_embed_neighbors_method_vptree()
test_embed_eigen_method_dense()
test_embed_eigen_method_randomized()
test_embed_unknown_neighbors_method()
test_embed_unknown_eigen_method()
parameters = tapkee.ParametersSet()
method = tapkee.parse_reduction_method('spe')
assert(method.name == 'Stochastic Proximity Embedding (SPE)')
Expand Down
Loading