From 54472222ae82818139f44a32134e19ce001e0cbd Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Wed, 16 Apr 2025 06:19:28 +0200
Subject: [PATCH 1/6] ...

---
 DESCRIPTION                                  |   1 +
 NAMESPACE                                    |   3 +
 NEWS.md                                      |   2 +
 R/DataBackendLazyTensors.R                   | 105 +++++++++++++++++++
 R/DataDescriptor.R                           |  14 +--
 R/LearnerTorch.R                             |   1 +
 R/lazy_tensor.R                              |  13 +++
 R/materialize.R                              |  10 +-
 R/shape.R                                    |   2 +-
 R/task_dataset.R                             |  12 ++-
 R/utils.R                                    |  12 +++
 TODO.md                                      |  22 ++++
 tests/testthat/test_DataBackendLazyTensors.R |  41 ++++++++
 tests/testthat/test_lazy_tensor.R            |   2 -
 tests/testthat/test_materialize.R            |   9 +-
 tests/testthat/test_shape.R                  |   2 +
 tests/testthat/test_utils.R                  |   1 +
 17 files changed, 230 insertions(+), 22 deletions(-)
 create mode 100644 R/DataBackendLazyTensors.R
 create mode 100644 TODO.md
 create mode 100644 tests/testthat/test_DataBackendLazyTensors.R

diff --git a/DESCRIPTION b/DESCRIPTION
index b6dd4ba0d..e41e48f24 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -91,6 +91,7 @@ Collate:
     'CallbackSetUnfreeze.R'
     'ContextTorch.R'
     'DataBackendLazy.R'
+    'DataBackendLazyTensors.R'
     'utils.R'
     'DataDescriptor.R'
     'LearnerTorch.R'
diff --git a/NAMESPACE b/NAMESPACE
index 0d0ebe087..5d6fbe5c1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,6 +12,7 @@ S3method(as_lazy_tensor,DataDescriptor)
 S3method(as_lazy_tensor,dataset)
 S3method(as_lazy_tensor,numeric)
 S3method(as_lazy_tensor,torch_tensor)
+S3method(as_lazy_tensors,dataset)
 S3method(as_torch_callback,R6ClassGenerator)
 S3method(as_torch_callback,TorchCallback)
 S3method(as_torch_callback,character)
@@ -27,6 +28,7 @@ S3method(as_torch_optimizer,character)
 S3method(as_torch_optimizer,torch_optimizer_generator)
 S3method(c,lazy_tensor)
 S3method(col_info,DataBackendLazy)
+S3method(col_info,DataBackendLazyTensors)
 S3method(format,lazy_tensor)
 S3method(hash_input,TorchIngressToken)
 S3method(hash_input,lazy_tensor)
@@ -158,6 +160,7 @@ export(TorchLoss)
 export(TorchOptimizer)
 export(as_data_descriptor)
 export(as_lazy_tensor)
+export(as_lazy_tensors)
 export(as_lr_scheduler)
 export(as_torch_callback)
 export(as_torch_callbacks)
diff --git a/NEWS.md b/NEWS.md
index a046df6ca..1999928f0 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,8 @@
 * feat: `TorchIngressToken` now also can take a `Selector` as argument `features`.
 * feat: Added encoders for numericals and categoricals
 * feat: Added `po("nn_fn")` for calling custom functions in a network.
+* fix: `NA` is now a valid shape for lazy tensors.
+* feat: `lazy_tensor`s of length 0 can now be materialized.
 
 # mlr3torch 0.2.1
 
diff --git a/R/DataBackendLazyTensors.R b/R/DataBackendLazyTensors.R
new file mode 100644
index 000000000..8fb94f626
--- /dev/null
+++ b/R/DataBackendLazyTensors.R
@@ -0,0 +1,105 @@
+
+#' @title Data Backend for Lazy Tensors
+#' @description
+#' Special **experimental** data backend that converts [`lazy_tensor`] columns to their R representation.
+#' However, [`LearnerTorch`] can directly operate on the lazy tensors.
+#' @export
+DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
+  cloneable = FALSE,
+  inherit = DataBackendDataTable,
+  public = list(
+    #' @description
+    #' Create a new instance of this [R6][R6::R6Class] class.
+    #' @param data (`data.table`)\cr
+    #'   Data containing (among others) [`lazy_tensor`] columns.
+    #' @param primary_key (`character(1)`)\cr
+    #'   Name of the column used as primary key.
+    #' @param converter (named `list()` of `function`s)\cr
+    #'   A named list of functions that convert the lazy tensor columns to their R representation.
+    #'   The names must be the names of the columns that need conversion.
+    #' @param cache (`character()`)\cr
+    #'   Names of the columns that should be cached.
+    #'   Per default, all columns that are converted are cached.
+    initialize = function(data, primary_key, converter, cache = names(converter)) {
+      private$.converter = assert_list(converter, types = "function", any.missing = FALSE)
+      assert_subset(names(converter), colnames(data))
+      private$.cached_cols = assert_subset(cache, names(converter))
+      walk(names(private$.converter), function(nm) {
+        if (!inherits(data[[nm]], "lazy_tensor")) {
+          stopf("Column '%s' is not a lazy tensor.", nm)
+        }
+      })
+      super$initialize(data, primary_key)
+      # select the column whose name is stored in primary_key from private$.data but keep its name
+      private$.data_cache = private$.data[, primary_key, with = FALSE]
+    },
+    data = function(rows, cols) {
+      rows = assert_integerish(rows, coerce = TRUE)
+      assert_names(cols, type = "unique")
+
+      if (getOption("mlr3torch.data_loading", FALSE)) {
+        # no caching, no materialization as this is called in the training loop
+        return(super$data(rows, cols))
+      }
+      if (all(cols %in% names(private$.data_cache))) {
+        cache_hit = private$.data_cache[list(rows), cols, on = self$primary_key, with = FALSE]
+        complete = complete.cases(cache_hit)
+        cache_hit = cache_hit[complete]
+        if (nrow(cache_hit) == length(rows)) {
+          return(cache_hit)
+        }
+        combined = rbindlist(list(cache_hit, private$.load_and_cache(rows[!complete], cols)))
+        reorder = vector("integer", nrow(combined))
+        reorder[complete] = seq_len(nrow(cache_hit))
+        reorder[!complete] = nrow(cache_hit) + seq_len(nrow(combined) - nrow(cache_hit))
+        return(combined[reorder])
+      }
+
+      private$.load_and_cache(rows, cols)
+    },
+    head = function(n = 6L) {
+      if (getOption("mlr3torch.data_loading", FALSE)) {
+        return(super$head(n))
+      }
+
+      self$data(n, self$colnames)
+    },
+    missings = function(rows, cols) {
+      set_names(rep(0L, length(cols)), cols)
+    }
+  ),
+  private = list(
+    # call this function only with rows that are not in the cache yet
+    .load_and_cache = function(rows, cols) {
+      # Process columns that need conversion
+      tbl = super$data(rows, cols)
+      for (nm in intersect(names(private$.converter), names(tbl))) {
+        converted = private$.converter[[nm]](materialize(tbl[[nm]], rbind = TRUE))
+        tbl[[nm]] = converted
+
+        if (nm %in% private$.cached_cols) {
+          set(private$.data_cache, i = rows, j = nm, value = converted)
+        }
+      }
+      return(tbl)
+    },
+    .data_cache = NULL,
+    .converter = NULL,
+    .cached_cols = NULL
+  )
+)
+
+#' @export
+col_info.DataBackendLazyTensors = function(x, ...) { # nolint
+  first_row = x$head(1L)
+  types = map_chr(first_row, function(x) class(x)[1L])
+  discrete = setdiff(names(types)[types %chin% c("factor", "ordered")], x$primary_key)
+  levels = insert_named(named_list(names(types)), map(first_row[discrete], levels))
+  data.table(id = names(types), type = unname(types), levels = levels, key = "id")
+}
+
+#' @export
+as_data_backend.dataset = function(x, dataset_shapes, primary_key ...) {
+
+
+}
\ No newline at end of file
diff --git a/R/DataDescriptor.R b/R/DataDescriptor.R
index 1bf3cd68d..11410b480 100644
--- a/R/DataDescriptor.R
+++ b/R/DataDescriptor.R
@@ -60,14 +60,7 @@ DataDescriptor = R6Class("DataDescriptor",
       # For simplicity we here require the first dimension of the shape to be NA so we don't have to deal with it,
       # e.g. during subsetting
 
-      if (is.null(dataset_shapes)) {
-        if (is.null(dataset$.getbatch)) {
-          stopf("dataset_shapes must be provided if dataset does not have a `.getbatch` method.")
-        }
-        dataset_shapes = infer_shapes_from_getbatch(dataset)
-      } else {
-        assert_compatible_shapes(dataset_shapes, dataset)
-      }
+      dataset_shapes = get_or_check_dataset_shapes(dataset, dataset_shapes)
 
       if (is.null(graph)) {
         # avoid name conflicts
@@ -84,8 +77,7 @@ DataDescriptor = R6Class("DataDescriptor",
         assert_true(length(graph$pipeops) >= 1L)
       }
       # no preprocessing, dataset returns only a single element (there we can infer a lot)
-      simple_case = length(graph$pipeops) == 1L && inherits(graph$pipeops[[1L]], "PipeOpNOP") &&
-        length(dataset_shapes) == 1L
+      simple_case = (length(graph$pipeops) == 1L) && inherits(graph$pipeops[[1L]], "PipeOpNOP")
 
       if (is.null(input_map) && nrow(graph$input) == 1L && length(dataset_shapes) == 1L) {
         input_map = names(dataset_shapes)
@@ -100,7 +92,7 @@ DataDescriptor = R6Class("DataDescriptor",
         assert_choice(pointer[[2]], graph$pipeops[[pointer[[1]]]]$output$name)
       }
       if (is.null(pointer_shape) && simple_case) {
-        pointer_shape = dataset_shapes[[1L]]
+        pointer_shape = dataset_shapes[[input_map]]
       } else {
         assert_shape(pointer_shape, null_ok = TRUE)
       }
diff --git a/R/LearnerTorch.R b/R/LearnerTorch.R
index 865ac733e..7829c91f3 100644
--- a/R/LearnerTorch.R
+++ b/R/LearnerTorch.R
@@ -93,6 +93,7 @@
 #'   * `.index` are the indices of the batch (`integer()` or a `torch_int()`).
 #'
 #'   Moreover, one needs to pay attention respect the row ids of the provided task.
+#'   It is strongly recommended to use the [`task_dataset`] class to create the dataset.
 #'
 #' It is also possible to overwrite the private `.dataloader()` method.
 #' This must respect the dataloader parameters from the [`ParamSet`][paradox::ParamSet].
diff --git a/R/lazy_tensor.R b/R/lazy_tensor.R
index d050f8545..079cf436d 100644
--- a/R/lazy_tensor.R
+++ b/R/lazy_tensor.R
@@ -197,6 +197,19 @@ as_lazy_tensor.torch_tensor = function(x, ...) { # nolint
   as_lazy_tensor(ds, dataset_shapes = list(x = c(NA, dim(x)[-1])))
 }
 
+#' @export
+as_lazy_tensors = function(x, ...) {
+  UseMethod("as_lazy_tensors")
+}
+
+#' @export
+as_lazy_tensors.dataset = function(x, dataset_shapes = NULL, ...) {
+  dataset_shapes = get_or_check_dataset_shapes(x, dataset_shapes)
+  set_names(map_dtc(names(dataset_shapes), function(shape) {
+    as_lazy_tensor(x, dataset_shapes = dataset_shapes, input_map = shape)
+  }), names(dataset_shapes))
+}
+
 #' Assert Lazy Tensor
 #'
 #' Asserts whether something is a lazy tensor.
diff --git a/R/materialize.R b/R/materialize.R
index 849024ad4..185baa5b2 100644
--- a/R/materialize.R
+++ b/R/materialize.R
@@ -44,6 +44,13 @@
 materialize = function(x, device = "cpu", rbind = FALSE, ...) {
   assert_choice(device, mlr_reflections$torch$devices)
   assert_flag(rbind)
+  if (length(x) == 0L) {
+    if (rbind) {
+      return(torch_empty(0L))
+    } else {
+      return(list())
+    }
+  }
   UseMethod("materialize")
 }
 
@@ -154,9 +161,6 @@ get_output = function(input, graph, varying_shapes, rbind, device) {
 #' @return [`lazy_tensor()`]
 #' @keywords internal
 materialize_internal = function(x, device = "cpu", cache = NULL, rbind) {
-  if (!length(x)) {
-    stopf("Cannot materialize lazy tensor of length 0.")
-  }
   do_caching = !is.null(cache)
   ids = map_int(x, 1)
 
diff --git a/R/shape.R b/R/shape.R
index d1fdda83d..7970c37ec 100644
--- a/R/shape.R
+++ b/R/shape.R
@@ -30,7 +30,7 @@ test_shape = function(shape, null_ok = FALSE, unknown_batch = NULL, len = NULL)
   if (is.null(shape) && null_ok) {
     return(TRUE)
   }
-  ok = test_integerish(shape, min.len = 2L, all.missing = FALSE, any.missing = TRUE, len = len)
+  ok = test_integerish(shape, min.len = 1L, any.missing = TRUE, len = len)
 
   if (!ok) {
     return(FALSE)
diff --git a/R/task_dataset.R b/R/task_dataset.R
index af52519ca..07d373465 100644
--- a/R/task_dataset.R
+++ b/R/task_dataset.R
@@ -81,13 +81,21 @@ task_dataset = dataset("task_dataset",
   .getbatch = function(index) {
     cache = if (self$cache_lazy_tensors) new.env()
 
-    datapool = self$task$data(rows = self$task$row_ids[index], cols = self$all_features)
+    datapool = withr::with_options(list(mlr3torch.data_loading = TRUE), {
+      self$task$data(rows = self$task$row_ids[index], cols = self$all_features)
+    })
+
     x = lapply(self$feature_ingress_tokens, function(it) {
       it$batchgetter(datapool[, it$features, with = FALSE], cache = cache)
     })
 
     y = if (!is.null(self$target_batchgetter)) {
-      self$target_batchgetter(datapool[, self$task$target_names, with = FALSE])
+      target = datapool[, self$task$target_names, with = FALSE]
+      if (!inherits(target[[1L]], "lazy_tensor")) {
+        self$target_batchgetter(target)
+      } else {
+        materialize(target[[1L]], rbind = TRUE)
+      }
     }
     out = list(x = x, .index = torch_tensor(index, dtype = torch_long()))
     if (!is.null(y)) out$y = y
diff --git a/R/utils.R b/R/utils.R
index 74ae570e3..684d8993b 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -307,3 +307,15 @@ infer_shapes = function(shapes_in, param_vals, output_names, fn, rowwise, id) {
 
   set_names(list(sout), output_names)
 }
+
+get_or_check_dataset_shapes = function(dataset, dataset_shapes) {
+  if (is.null(dataset_shapes)) {
+    if (is.null(dataset$.getbatch)) {
+      stopf("dataset_shapes must be provided if dataset does not have a `.getbatch` method.")
+    }
+    dataset_shapes = infer_shapes_from_getbatch(dataset)
+  } else {
+    assert_compatible_shapes(dataset_shapes, dataset)
+  }
+  dataset_shapes
+}
\ No newline at end of file
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 000000000..cb92919fe
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,22 @@
+* Add `as_lazy_tensors()`
+* Make it easier to se
+* Fix the bug that the shapes are reported as unknown below and make the code easier.
+  ```r
+  ds = dataset("test",
+    initialize = function() {
+      self$x = torch_randn(100, 10)
+      self$y = torch_randn(100, 1)
+    },
+    .getitem = function(i) {
+      list(x = self$x[i, ], y = self$y[i])
+    },
+    .length = function() {
+      nrow(self$x)
+    }
+  )()
+  x_lt = as_lazy_tensor(ds, list(x = c(NA, 10), y = c(NA, 1)), input_map = "x")
+  y_lt = as_lazy_tensor(ds, list(x = c(NA, 10), y = c(NA, 1)), input_map = "y")
+
+  tbl = data.table(x = x_lt, y = y_lt)
+  ```
+* Add checks on usage of `DataBackendLazyTensors` in `task_dataset`
\ No newline at end of file
diff --git a/tests/testthat/test_DataBackendLazyTensors.R b/tests/testthat/test_DataBackendLazyTensors.R
new file mode 100644
index 000000000..614ee5fd4
--- /dev/null
+++ b/tests/testthat/test_DataBackendLazyTensors.R
@@ -0,0 +1,41 @@
+test_that("DataBackendDataset", {
+  ds = dataset(
+    initialize = function() {
+      self$x = torch_randn(100, 10)
+      self$y = torch_randn(100, 1)
+    },
+    .getitem = function(i) {
+      list(x = self$x[i, ], y = self$y[i])
+    },
+    .length = function() {
+      nrow(self$x)
+    }
+  )()
+
+  tbl = as_lazy_tensors(ds, list(x = c(NA, 10), y = c(NA, 1)))
+  tbl$row_id = 1:100
+
+  be = DataBackendLazyTensors$new(tbl, primary_key = "row_id", converter = list(y = as.numeric), cache = "y")
+
+  expect_data_backend(be)
+
+  be$data(1, "y")
+
+  be$data(2, c("x", "y"))
+
+  be$head()
+
+
+  withr::with_options(list(mlr3torch.data_loading = TRUE), {
+    be$data(1, c("x", "y"))
+  })
+
+  learner = lrn("regr.mlp", batch_size = 32, epochs = 1)
+
+  task = as_task_regr(be, target = "y")
+  learner$train(task)
+})
+
+test_that("mlp works with it", {
+  learner = lrn("classif.mlp")
+})
diff --git a/tests/testthat/test_lazy_tensor.R b/tests/testthat/test_lazy_tensor.R
index b74208f8c..95881b083 100644
--- a/tests/testthat/test_lazy_tensor.R
+++ b/tests/testthat/test_lazy_tensor.R
@@ -3,8 +3,6 @@ test_that("prototype", {
   expect_class(proto, "lazy_tensor")
   expect_true(length(proto) == 0L)
   expect_error(dd(proto))
-
-  expect_error(materialize(lazy_tensor()), "Cannot materialize")
 })
 
 test_that("input checks", {
diff --git a/tests/testthat/test_materialize.R b/tests/testthat/test_materialize.R
index 170f673a4..f1fc5a8ae 100644
--- a/tests/testthat/test_materialize.R
+++ b/tests/testthat/test_materialize.R
@@ -17,8 +17,6 @@ test_that("materialize works on lazy_tensor", {
 
   expect_equal(torch_cat(map(output_meta_list, function(x) x$unsqueeze(1)), dim = 1L)$shape, output_meta_tnsr$shape)
   expect_true(output_meta_tnsr$device == torch_device("meta"))
-
-  expect_error(materialize(lazy_tensor()), "Cannot materialize ")
 })
 
 test_that("materialize works with differing shapes (hence uses .getitem)", {
@@ -75,7 +73,7 @@ test_that("materialize works with same shapes and .getitem method", {
 })
 
 test_that("materialize_internal works", {
-  expect_error(materialize_internal(lazy_tensor()), "Cannot materialize ")
+  expect_error(materialize_internal(lazy_tensor()), "Cannot access data descriptor")
   task = tsk("lazy_iris")
   x = task$data(1:2, cols = "x")[[1L]]
   res1 = materialize(x)
@@ -184,3 +182,8 @@ test_that("PipeOpFeatureUnion can properly check whether two lazy tensors are id
 
   expect_error(graph$train(task), "cannot aggregate different features sharing")
 })
+
+test_that("0-length", {
+  expect_equal(torch_empty(0L), materialize(lazy_tensor(), rbind = TRUE))
+  expect_equal(list(), materialize(lazy_tensor(), rbind = FALSE))
+})
diff --git a/tests/testthat/test_shape.R b/tests/testthat/test_shape.R
index dbafcb4dc..b1670a96e 100644
--- a/tests/testthat/test_shape.R
+++ b/tests/testthat/test_shape.R
@@ -21,4 +21,6 @@ test_that("assert_shape and friends", {
   expect_error(assert_shape(c(NA, 1, 2), len = 2))
   # NULL is ok even when len is specified
   expect_true(check_shape(NULL, null_ok = TRUE, len = 2))
+  # NA is valid shape
+  expect_true(check_shape(NA))
 })
diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R
index bfd20f409..fdc4f13f4 100644
--- a/tests/testthat/test_utils.R
+++ b/tests/testthat/test_utils.R
@@ -61,6 +61,7 @@ test_that("order_named_args works", {
   expect_error(order_named_args(function(..., x) NULL, list(2, 3, x = 1)), regexp = "`...` must")
   expect_error(order_named_args(function(y, ..., x) NULL, list(y = 4, 2, 3, x = 1)), regexp = "`...` must")
 })
+
 test_that("shape_to_str works", {
   expect_equal(shape_to_str(1), "(1)")
   expect_equal(shape_to_str(c(1, 2)), "(1,2)")

From b012de9e51bb9349809076814d06dadb7ce234f3 Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Wed, 16 Apr 2025 10:17:19 +0200
Subject: [PATCH 2/6] ...

---
 TODO.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/TODO.md b/TODO.md
index cb92919fe..ac96fb010 100644
--- a/TODO.md
+++ b/TODO.md
@@ -19,4 +19,6 @@
 
   tbl = data.table(x = x_lt, y = y_lt)
   ```
-* Add checks on usage of `DataBackendLazyTensors` in `task_dataset`
\ No newline at end of file
+* Add checks on usage of `DataBackendLazyTensors` in `task_dataset`
+* Add optimization that truths values don't have to be loaded twice during resampling, i.e.
+  once for making the predictions and once for retrieving the truth column.
\ No newline at end of file

From 521fdd7905686dda67d4dd9ae2c62bab28a83c1e Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Wed, 16 Apr 2025 17:06:54 +0200
Subject: [PATCH 3/6] ...

---
 NAMESPACE                                    |   3 +
 R/DataBackendLazyTensors.R                   |  71 +++++--
 R/lazy_tensor.R                              |  10 +
 R/materialize.R                              |  28 ++-
 R/utils.R                                    |   5 +-
 man/DataBackendLazyTensors.Rd                | 104 +++++++++
 man/mlr_learners_torch.Rd                    |   1 +
 tests/testthat/test_DataBackendLazyTensors.R | 210 +++++++++++++++++--
 8 files changed, 394 insertions(+), 38 deletions(-)
 create mode 100644 man/DataBackendLazyTensors.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 5d6fbe5c1..194cbde74 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,6 +7,7 @@ S3method("[[<-",lazy_tensor)
 S3method(as.data.table,DictionaryMlr3torchCallbacks)
 S3method(as.data.table,DictionaryMlr3torchLosses)
 S3method(as.data.table,DictionaryMlr3torchOptimizers)
+S3method(as_data_backend,dataset)
 S3method(as_data_descriptor,dataset)
 S3method(as_lazy_tensor,DataDescriptor)
 S3method(as_lazy_tensor,dataset)
@@ -29,6 +30,7 @@ S3method(as_torch_optimizer,torch_optimizer_generator)
 S3method(c,lazy_tensor)
 S3method(col_info,DataBackendLazy)
 S3method(col_info,DataBackendLazyTensors)
+S3method(distinct_values,lazy_tensor)
 S3method(format,lazy_tensor)
 S3method(hash_input,TorchIngressToken)
 S3method(hash_input,lazy_tensor)
@@ -71,6 +73,7 @@ export(CallbackSetTB)
 export(CallbackSetUnfreeze)
 export(ContextTorch)
 export(DataBackendLazy)
+export(DataBackendLazyTensors)
 export(DataDescriptor)
 export(LearnerTorch)
 export(LearnerTorchFeatureless)
diff --git a/R/DataBackendLazyTensors.R b/R/DataBackendLazyTensors.R
index 8fb94f626..d02897323 100644
--- a/R/DataBackendLazyTensors.R
+++ b/R/DataBackendLazyTensors.R
@@ -1,9 +1,37 @@
 
-#' @title Data Backend for Lazy Tensors
+#' @title Special Backend for Lazy Tensors
 #' @description
-#' Special **experimental** data backend that converts [`lazy_tensor`] columns to their R representation.
-#' However, [`LearnerTorch`] can directly operate on the lazy tensors.
+#' This backend essentially allows you to use a [`torch::dataset`] directly with
+#' an [`mlr3::Learner`].
+#'
+#' * The data cannot contain missing values, as [`lazy_tensor`]s do not support them.
+#'   For this reason, calling `$missings()` will always return `0` for all columns.
+#' * The `$distinct()` method will consider two lazy tensors that refer to the same element of a
+#'   [`DataDescriptor`] to be identical.
+#'   This means, that it might be underreporting the number of distinct values of lazy tensor columns.
+#'
 #' @export
+#' @examplesIf torch::torch_is_installed()
+#' # used as feature in all backends
+#' x = torch_randn(100, 10)
+#' # regression
+#' ds_regr = tensor_dataset(x = x, y = torch_randn(100, 1))
+#' be_regr = as_data_backend(ds_regr, converter = list(y = as.numeric))
+#' be_regr$head()
+#'
+#'
+#' # binary classification: underlying target tensor must be float in [0, 1]
+#' ds_binary = tensor_dataset(x = x, y = torch_randint(0, 2, c(100, 1))$float())
+#' be_binary = as_data_backend(ds_binary, converter = list(
+#'   y = function(x) factor(as.integer(x), levels = c(0, 1), labels = c("A", "yes"))
+#' ))
+#' be_binary$head()
+#'
+#' # multi-class classification: underlying target tensor must be integer in [1, K]
+#' ds_multiclass = tensor_dataset(x = x, y = torch_randint(1, 4, size = c(100, 1)))
+#' be_multiclass = as_data_backend(ds_multiclass, converter = list(y = as.numeric))
+#' be_multiclass$head()
+
 DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
   cloneable = FALSE,
   inherit = DataBackendDataTable,
@@ -62,7 +90,7 @@ DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
         return(super$head(n))
       }
 
-      self$data(n, self$colnames)
+      self$data(seq_len(n), self$colnames)
     },
     missings = function(rows, cols) {
       set_names(rep(0L, length(cols)), cols)
@@ -73,8 +101,14 @@ DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
     .load_and_cache = function(rows, cols) {
       # Process columns that need conversion
       tbl = super$data(rows, cols)
-      for (nm in intersect(names(private$.converter), names(tbl))) {
-        converted = private$.converter[[nm]](materialize(tbl[[nm]], rbind = TRUE))
+      cols_to_convert = intersect(names(private$.converter), names(tbl))
+      tbl_to_mat = tbl[, cols_to_convert, with = FALSE]
+      tbl_mat = materialize(tbl_to_mat, rbind = TRUE)
+
+      if (!length(rows)) browser()
+
+      for (nm in cols_to_convert) {
+        converted = private$.converter[[nm]](tbl_mat[[nm]])
         tbl[[nm]] = converted
 
         if (nm %in% private$.cached_cols) {
@@ -90,16 +124,27 @@ DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
 )
 
 #' @export
-col_info.DataBackendLazyTensors = function(x, ...) { # nolint
-  first_row = x$head(1L)
-  types = map_chr(first_row, function(x) class(x)[1L])
-  discrete = setdiff(names(types)[types %chin% c("factor", "ordered")], x$primary_key)
-  levels = insert_named(named_list(names(types)), map(first_row[discrete], levels))
-  data.table(id = names(types), type = unname(types), levels = levels, key = "id")
+as_data_backend.dataset = function(x, dataset_shapes, ...) {
+  tbl = as_lazy_tensors(x, dataset_shapes, ...)
+  tbl$row_id = seq_len(nrow(tbl))
+  DataBackendLazyTensors$new(tbl, primary_key = "row_id", ...)
 }
 
 #' @export
-as_data_backend.dataset = function(x, dataset_shapes, primary_key ...) {
+as_task_classif.dataset = function(x, dataset_shapes, target, ...) {
+  # TODO
+}
 
+#' @export
+as_task_regr.dataset = function(x, dataset_shapes, target, converter, ...) {
+  # TODO
+}
 
+#' @export
+col_info.DataBackendLazyTensors = function(x, ...) { # nolint
+  first_row = x$head(1L)
+  types = map_chr(first_row, function(x) class(x)[1L])
+  discrete = setdiff(names(types)[types %chin% c("factor", "ordered")], x$primary_key)
+  levels = insert_named(named_list(names(types)), map(first_row[, discrete, with = FALSE], levels))
+  data.table(id = names(types), type = unname(types), levels = levels, key = "id")
 }
\ No newline at end of file
diff --git a/R/lazy_tensor.R b/R/lazy_tensor.R
index 079cf436d..00b397575 100644
--- a/R/lazy_tensor.R
+++ b/R/lazy_tensor.R
@@ -352,3 +352,13 @@ rep.lazy_tensor = function(x, ...) {
 rep_len.lazy_tensor = function(x, ...) {
   set_class(NextMethod(), c("lazy_tensor", "list"))
 }
+
+
+#' @export
+distinct_values.lazy_tensor = function(x, drop = TRUE, na_rm = TRUE) {
+  if (!length(x)) {
+    return(x)
+  }
+  ids = distinct_values(map_int(x, 1))
+  lazy_tensor(dd(x), ids)
+}
\ No newline at end of file
diff --git a/R/materialize.R b/R/materialize.R
index 185baa5b2..a2294b47a 100644
--- a/R/materialize.R
+++ b/R/materialize.R
@@ -44,13 +44,6 @@
 materialize = function(x, device = "cpu", rbind = FALSE, ...) {
   assert_choice(device, mlr_reflections$torch$devices)
   assert_flag(rbind)
-  if (length(x) == 0L) {
-    if (rbind) {
-      return(torch_empty(0L))
-    } else {
-      return(list())
-    }
-  }
   UseMethod("materialize")
 }
 
@@ -70,6 +63,13 @@ materialize.list = function(x, device = "cpu", rbind = FALSE, cache = "auto", ..
 
   map(x, function(col) {
     if (is_lazy_tensor(col)) {
+      if (length(col) == 0L) {
+        if (rbind) {
+          return(torch_empty(0L))
+        } else {
+          return(list())
+        }
+      }
       materialize_internal(col, device = device, cache = cache, rbind = rbind)
     } else {
       col
@@ -83,12 +83,26 @@ materialize.list = function(x, device = "cpu", rbind = FALSE, cache = "auto", ..
 #' @method materialize data.frame
 #' @export
 materialize.data.frame = function(x, device = "cpu", rbind = FALSE, cache = "auto", ...) { # nolint
+  if (nrow(x) == 0L) {
+    if (rbind) {
+      set_names(replicate(ncol(x), torch_empty(0L)), names(x))
+    } else {
+      set_names(replicate(ncol(x), list()), names(x))
+    }
+  }
   materialize(as.list(x), device = device, rbind = rbind, cache = cache)
 }
 
 
 #' @export
 materialize.lazy_tensor = function(x, device = "cpu", rbind = FALSE, ...) { # nolint
+  if (length(x) == 0L) {
+    if (rbind) {
+      return(torch_empty(0L))
+    } else {
+      return(list())
+    }
+  }
   materialize_internal(x = x, device = device, cache = NULL, rbind = rbind)
 }
 
diff --git a/R/utils.R b/R/utils.R
index 684d8993b..4ec8f8030 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -198,7 +198,10 @@ list_to_batch = function(tensors) {
 }
 
 auto_cache_lazy_tensors = function(lts) {
-  any(duplicated(map_chr(lts, function(x) dd(x)$dataset_hash)))
+  if (length(lts) <= 1L) {
+    FALSE
+  }
+  anyDuplicated(unlist(map_if(lts, function(x) length(x) > 0, function(x) dd(x)$dataset_hash)))
 }
 
 #' Replace the head of a network
diff --git a/man/DataBackendLazyTensors.Rd b/man/DataBackendLazyTensors.Rd
new file mode 100644
index 000000000..6ae1817a4
--- /dev/null
+++ b/man/DataBackendLazyTensors.Rd
@@ -0,0 +1,104 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/DataBackendLazyTensors.R
+\name{DataBackendLazyTensors}
+\alias{DataBackendLazyTensors}
+\title{Data Backend for Lazy Tensors}
+\description{
+Special \strong{experimental} data backend that converts \code{\link{lazy_tensor}} columns to their R representation.
+However, \code{\link{LearnerTorch}} can directly operate on the lazy tensors.
+\itemize{
+\item The data cannot contain missing values, as \code{\link{lazy_tensor}}s do not support them.
+For this reason, calling \verb{$missings()} will always return \code{0} for all columns.
+\item The \verb{$distinct()} method will consider two lazy tensors that refer to the same element of a
+\code{\link{DataDescriptor}} to be identical.
+This means, that it might be underreporting the number of distinct values of lazy tensor columns.
+}
+}
+\examples{
+# regression
+ds = tensor_dataset(x = torch_randn(100, 10), y = torch_randn(100, 1))
+tbl = as
+}
+\section{Super classes}{
+\code{\link[mlr3:DataBackend]{mlr3::DataBackend}} -> \code{\link[mlr3:DataBackendDataTable]{mlr3::DataBackendDataTable}} -> \code{DataBackendLazyTensors}
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-DataBackendLazyTensors-new}{\code{DataBackendLazyTensors$new()}}
+\item \href{#method-DataBackendLazyTensors-data}{\code{DataBackendLazyTensors$data()}}
+\item \href{#method-DataBackendLazyTensors-head}{\code{DataBackendLazyTensors$head()}}
+\item \href{#method-DataBackendLazyTensors-missings}{\code{DataBackendLazyTensors$missings()}}
+}
+}
+\if{html}{\out{
+<details open><summary>Inherited methods</summary>
+<ul>
+<li><span class="pkg-link" data-pkg="mlr3" data-topic="DataBackend" data-id="format"><a href='../../mlr3/html/DataBackend.html#method-DataBackend-format'><code>mlr3::DataBackend$format()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3" data-topic="DataBackend" data-id="print"><a href='../../mlr3/html/DataBackend.html#method-DataBackend-print'><code>mlr3::DataBackend$print()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3" data-topic="DataBackendDataTable" data-id="distinct"><a href='../../mlr3/html/DataBackendDataTable.html#method-DataBackendDataTable-distinct'><code>mlr3::DataBackendDataTable$distinct()</code></a></span></li>
+</ul>
+</details>
+}}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-DataBackendLazyTensors-new"></a>}}
+\if{latex}{\out{\hypertarget{method-DataBackendLazyTensors-new}{}}}
+\subsection{Method \code{new()}}{
+Create a new instance of this \link[R6:R6Class]{R6} class.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{DataBackendLazyTensors$new(
+  data,
+  primary_key,
+  converter,
+  cache = names(converter)
+)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{data}}{(\code{data.table})\cr
+Data containing (among others) \code{\link{lazy_tensor}} columns.}
+
+\item{\code{primary_key}}{(\code{character(1)})\cr
+Name of the column used as primary key.}
+
+\item{\code{converter}}{(named \code{list()} of \code{function}s)\cr
+A named list of functions that convert the lazy tensor columns to their R representation.
+The names must be the names of the columns that need conversion.}
+
+\item{\code{cache}}{(\code{character()})\cr
+Names of the columns that should be cached.
+Per default, all columns that are converted are cached.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-DataBackendLazyTensors-data"></a>}}
+\if{latex}{\out{\hypertarget{method-DataBackendLazyTensors-data}{}}}
+\subsection{Method \code{data()}}{
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{DataBackendLazyTensors$data(rows, cols)}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-DataBackendLazyTensors-head"></a>}}
+\if{latex}{\out{\hypertarget{method-DataBackendLazyTensors-head}{}}}
+\subsection{Method \code{head()}}{
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{DataBackendLazyTensors$head(n = 6L)}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-DataBackendLazyTensors-missings"></a>}}
+\if{latex}{\out{\hypertarget{method-DataBackendLazyTensors-missings}{}}}
+\subsection{Method \code{missings()}}{
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{DataBackendLazyTensors$missings(rows, cols)}\if{html}{\out{</div>}}
+}
+
+}
+}
diff --git a/man/mlr_learners_torch.Rd b/man/mlr_learners_torch.Rd
index 3f1d3e6d0..cbb497d59 100644
--- a/man/mlr_learners_torch.Rd
+++ b/man/mlr_learners_torch.Rd
@@ -183,6 +183,7 @@ For networks with more than one input, the names must correspond to the inputs o
 }
 
 Moreover, one needs to pay attention respect the row ids of the provided task.
+It is strongly recommended to use the \code{\link{task_dataset}} class to create the dataset.
 }
 
 It is also possible to overwrite the private \code{.dataloader()} method.
diff --git a/tests/testthat/test_DataBackendLazyTensors.R b/tests/testthat/test_DataBackendLazyTensors.R
index 614ee5fd4..c248c9b67 100644
--- a/tests/testthat/test_DataBackendLazyTensors.R
+++ b/tests/testthat/test_DataBackendLazyTensors.R
@@ -1,41 +1,217 @@
-test_that("DataBackendDataset", {
+test_that("correct input checks", {
+
+})
+
+test_that("main API works", {
+  # regression target
+  ds = tensor_dataset(
+    x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1)),
+    y = torch_tensor(as.matrix(1:100, nrow = 100, ncol = 1))
+  )
+
+  be = as_data_backend(ds, converter = list(y = as.numeric), dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)))
+
+  # converted data
+
+  batch1 = be$data(1, c("x", "y"))
+  expect_class(batch1$x, "lazy_tensor")
+  expect_equal(length(batch1$x), 1)
+  expect_equal(materialize(batch1$x, rbind = TRUE), torch_tensor(matrix(100L, nrow = 1, ncol = 1)))
+  expect_equal(batch1$y, 1)
+
+  batch2 = be$data(2:1, c("x", "y"))
+  expect_class(batch2$x, "lazy_tensor")
+  expect_equal(length(batch2$x), 2)
+  expect_equal(materialize(batch2$x, rbind = TRUE), torch_tensor(matrix(100:99, nrow = 2, ncol = 1)))
+  expect_equal(batch2$y, c(2, 1))
+
+  # lt data
+  batch_lt1 = withr::with_options(list(mlr3torch.data_loading = TRUE), {
+    be$data(1, c("x", "y"))
+  })
+  expect_class(batch_lt1$x, "lazy_tensor")
+  expect_equal(length(batch_lt1$x), 1)
+  expect_equal(materialize(batch_lt1$x, rbind = TRUE), torch_tensor(matrix(100L, nrow = 1, ncol = 1)))
+  # y is still a lazy tensor
+  expect_class(batch_lt1$y, "lazy_tensor")
+  expect_equal(length(batch_lt1$y), 1)
+
+  batch_lt2 = withr::with_options(list(mlr3torch.data_loading = TRUE), {
+    be$data(2:1, c("x", "y"))
+  })
+  expect_class(batch_lt2$x, "lazy_tensor")
+  expect_equal(length(batch_lt2$x), 2)
+  expect_equal(materialize(batch_lt2$x, rbind = TRUE), torch_tensor(matrix(100:99, nrow = 2, ncol = 1)))
+  # y is still a lazy tensor
+  expect_class(batch_lt2$y, "lazy_tensor")
+  expect_equal(length(batch_lt2$y), 2)
+
+  # missings
+  expect_equal(be$missings(1:100, c("y", "x")), c(y = 0, x = 0))
+  expect_equal(be$missings(1:100, "y"), c(y = 0))
+  expect_equal(be$missings(1:100, "x"), c(x = 0))
+
+  # head
+  tbl = be$head(n = 3)
+  expect_data_table(tbl, nrow = 3, ncol = 3)
+  expect_class(tbl$x, "lazy_tensor")
+  expect_equal(materialize(tbl$x, rbind = TRUE), torch_tensor(matrix(100:98, nrow = 3, ncol = 1)))
+  expect_class(tbl$y, "numeric")
+  expect_equal(tbl$row_id, as.numeric(1:3))
+  expect_class(tbl$row_id, "integer")
+  expect_equal(tbl$row_id, 1:3)
+
+  # distinct values: this can be expensive
+  dist = be$distinct(1:3, c("x", "y", "row_id"))
+  expect_list(dist, len = 3)
+  expect_equal(materialize(dist$x, rbind = TRUE), torch_tensor(matrix(100:98, nrow = 3, ncol = 1)))
+  expect_equal(dist$y, c(1, 2, 3))
+  expect_equal(dist$row_id, 1:3)
+})
+
+test_that("classif target works", {
   ds = dataset(
     initialize = function() {
-      self$x = torch_randn(100, 10)
-      self$y = torch_randn(100, 1)
+      self$x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))
+      self$y = torch_tensor(matrix(rep(c(0, 1), each = 50), nrow = 100, ncol = 1))
     },
     .getitem = function(i) {
-      list(x = self$x[i, ], y = self$y[i])
+      list(x = self$x[i], y = self$y[i])
     },
     .length = function() {
       nrow(self$x)
     }
   )()
 
-  tbl = as_lazy_tensors(ds, list(x = c(NA, 10), y = c(NA, 1)))
+  tbl = as_lazy_tensors(ds, list(x = c(NA, 1), y = c(NA, 1)))
   tbl$row_id = 1:100
 
-  be = DataBackendLazyTensors$new(tbl, primary_key = "row_id", converter = list(y = as.numeric), cache = "y")
+  be = DataBackendLazyTensors$new(tbl, primary_key = "row_id", converter = list(
+    y = function(x) factor(as.integer(x), levels = c(0, 1), labels = c("yes", "no"))
+  ))
+  batch = be$data(c(1, 2, 51, 52), c("x", "y", "row_id"))
+  expect_class(batch$y, "factor")
+  expect_equal(batch$y, factor(c("yes", "yes", "no", "no"), levels = c("yes", "no")))
 
-  expect_data_backend(be)
+  batch_lt = withr::with_options(list(mlr3torch.data_loading = TRUE), {
+    be$data(c(1, 2, 51, 52), c("x", "y", "row_id"))
+  })
+  expect_class(batch_lt$y, "lazy_tensor")
+  expect_equal(length(batch_lt$y), 4)
+  expect_equal(materialize(batch_lt$y, rbind = TRUE), torch_tensor(matrix(c(1, 1, 0, 0), nrow = 4, ncol = 1)))
+})
 
-  be$data(1, "y")
+test_that("errors when weird preprocessing", {
+  # test following example pipeops:
+  # - target trafo
+  # - fix factors
+  # - smote
 
-  be$data(2, c("x", "y"))
+})
 
-  be$head()
+test_that("caching works", {
+  dsc = dataset(
+    initialize = function() {
+      self$x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))
+      self$y = torch_tensor(as.matrix(1:100, nrow = 100, ncol = 1))
+      self$counter = 0
+    },
+    .getitem = function(i) {
+      self$counter = self$counter + 1
+      list(x = self$x[i], y = self$y[i])
+    },
+    .length = function() {
+      nrow(self$x)
+    }
+  )
 
+  ds = dsc()
 
-  withr::with_options(list(mlr3torch.data_loading = TRUE), {
-    be$data(1, c("x", "y"))
-  })
+  be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)),
+    converter = list(y = as.integer), cache = "y")
+
+  check = function(be, ds, rows, cols, n) {
+    counter_prev = ds$counter
+    tbl = be$data(rows, cols)
+    observed_n = ds$counter - counter_prev
+    expect_equal(observed_n, n)
+
+    expect_equal(materialize(tbl$x, rbind = TRUE), ds$x[rows])
+    expect_equal(tbl$y, as.integer(ds$y[rows]))
+  }
+  check(be, ds, 1, c("x", "y"), 1)
+  # y is no in the cache, so .getitem() is not called on $data()
+  check(be, ds, 1, "y", 0)
+
+  # but x is not cached, so we still need to call .getitem below
+  check(be, ds, 1, c("x", "y"), 1)
+
+  # more than one row also works
+  check(be, ds, 2:1, "y", 1)
+  check(be, ds, c(3, 1), "y", 1)
+  check(be, ds, 1:3, "y", 0)
+
+  # when caching more than one, we materialize only once per batch
+  be2 = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)),
+    converter = list(y = as.integer, x = as.integer), cache = c("y", "x"))
 
-  learner = lrn("regr.mlp", batch_size = 32, epochs = 1)
+  check2 = function(be, ds, rows, cols, n) {
+    counter_prev = ds$counter
+    tbl = be$data(rows, cols)
+    observed_n = ds$counter - counter_prev
+    expect_equal(observed_n, n)
 
+    expect_equal(tbl$y, as.integer(ds$y[rows]))
+    expect_equal(tbl$x, as.integer(ds$x[rows]))
+  }
+
+  check2(be2, ds, 1, c("x", "y"), 1)
+  check2(be2, ds, 1, c("x", "y"), 0)
+  check2(be2, ds, 2:1, c("x", "y"), 1)
+  check2(be2, ds, 2, c("x", "y"), 0)
+})
+
+test_that("can train a regression learner", {
+  ds = tensor_dataset(
+    x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))$float(),
+    y = torch_tensor(as.matrix(1:100, nrow = 100, ncol = 1))$float()
+  )
+
+  be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)),
+    converter = list(y = as.numeric))
   task = as_task_regr(be, target = "y")
-  learner$train(task)
+
+  learner = lrn("regr.mlp", epochs = 200, batch_size = 100, jit_trace = TRUE, opt.lr = 1, seed = 1)
+  rr = resample(task, learner, rsmp("insample"))
+  expect_true(rr$aggregate(msr("regr.rmse")) < 3)
 })
 
-test_that("mlp works with it", {
-  learner = lrn("classif.mlp")
+test_that("can train a binary classification learner", {
+  ds = tensor_dataset(
+    x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))$float(),
+    y = torch_tensor(as.matrix(1:100, nrow = 100, ncol = 1))$float()
+  )
+
+  be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)),
+    converter = list(y = as.numeric))
+  task = as_task_regr(be, target = "y")
+
+  learner = lrn("regr.mlp", epochs = 200, batch_size = 100, jit_trace = TRUE, opt.lr = 1, seed = 1)
+  rr = resample(task, learner, rsmp("insample"))
+  expect_true(rr$aggregate(msr("regr.rmse")) < 3)
 })
+
+test_that("can train a multiclass classification learner", {
+  ds = tensor_dataset(
+    x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))$float(),
+    y = torch_tensor(matrix(rep(c(0, 1), each = 50), nrow = 100, ncol = 1))$float()
+  )
+
+  be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)),
+    converter = list(y = function(x) factor(as.integer(x), levels = c(0, 1), labels = c("yes", "no"))))
+  task = as_task_classif(be, target = "y")
+
+  learner = lrn("classif.mlp", epochs = 200, batch_size = 100, jit_trace = TRUE, opt.lr = 1, seed = 1)
+  rr = resample(task, learner, rsmp("insample"))
+  expect_true(rr$aggregate(msr("regr.rmse")) < 3)
+})
\ No newline at end of file

From 9aa26f16dbb8a929d08991e24041b1f50efeecff Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Thu, 17 Apr 2025 08:27:53 +0200
Subject: [PATCH 4/6] ...

---
 NAMESPACE                     |  2 ++
 R/DataBackendLazyTensors.R    |  2 --
 R/DataDescriptor.R            |  8 +++-----
 R/utils.R                     |  1 +
 man/DataBackendLazyTensors.Rd | 28 +++++++++++++++++++++++-----
 5 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 4228002d1..1ed76e181 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -14,6 +14,8 @@ S3method(as_lazy_tensor,dataset)
 S3method(as_lazy_tensor,numeric)
 S3method(as_lazy_tensor,torch_tensor)
 S3method(as_lazy_tensors,dataset)
+S3method(as_task_classif,dataset)
+S3method(as_task_regr,dataset)
 S3method(as_torch_callback,R6ClassGenerator)
 S3method(as_torch_callback,TorchCallback)
 S3method(as_torch_callback,character)
diff --git a/R/DataBackendLazyTensors.R b/R/DataBackendLazyTensors.R
index d02897323..21697b860 100644
--- a/R/DataBackendLazyTensors.R
+++ b/R/DataBackendLazyTensors.R
@@ -105,8 +105,6 @@ DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
       tbl_to_mat = tbl[, cols_to_convert, with = FALSE]
       tbl_mat = materialize(tbl_to_mat, rbind = TRUE)
 
-      if (!length(rows)) browser()
-
       for (nm in cols_to_convert) {
         converted = private$.converter[[nm]](tbl_mat[[nm]])
         tbl[[nm]] = converted
diff --git a/R/DataDescriptor.R b/R/DataDescriptor.R
index 11410b480..8377bdca4 100644
--- a/R/DataDescriptor.R
+++ b/R/DataDescriptor.R
@@ -234,12 +234,10 @@ assert_compatible_shapes = function(shapes, dataset) {
     }
   })
 
-  if (is.null(dataset$.getbatch)) {
-    example = map(example, function(x) x$unsqueeze(1))
-  }
-
   iwalk(shapes, function(dataset_shape, name) {
-    if (!is.null(dataset_shape) && !test_equal(shapes[[name]][-1], example[[name]]$shape[-1L])) {
+    observed_shape = example[[name]]$shape
+    observed_shape[1] = NA
+    if (!is.null(dataset_shape) && !test_equal(shapes[[name]], observed_shape)) {
       expected_shape = example[[name]]$shape
       expected_shape[1] = NA
       stopf(paste0("First batch from dataset is incompatible with the provided shape of %s:\n",
diff --git a/R/utils.R b/R/utils.R
index c6cdbec0d..2680785fc 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -304,6 +304,7 @@ infer_shapes = function(shapes_in, param_vals, output_names, fn, rowwise, id) {
 }
 
 get_or_check_dataset_shapes = function(dataset, dataset_shapes) {
+  dataset_shapes = map(dataset_shapes, as.integer)
   if (is.null(dataset_shapes)) {
     if (is.null(dataset$.getbatch)) {
       stopf("dataset_shapes must be provided if dataset does not have a `.getbatch` method.")
diff --git a/man/DataBackendLazyTensors.Rd b/man/DataBackendLazyTensors.Rd
index 6ae1817a4..9ac930281 100644
--- a/man/DataBackendLazyTensors.Rd
+++ b/man/DataBackendLazyTensors.Rd
@@ -2,10 +2,10 @@
 % Please edit documentation in R/DataBackendLazyTensors.R
 \name{DataBackendLazyTensors}
 \alias{DataBackendLazyTensors}
-\title{Data Backend for Lazy Tensors}
+\title{Special Backend for Lazy Tensors}
 \description{
-Special \strong{experimental} data backend that converts \code{\link{lazy_tensor}} columns to their R representation.
-However, \code{\link{LearnerTorch}} can directly operate on the lazy tensors.
+This backend essentially allows you to use a \code{\link[torch:dataset]{torch::dataset}} directly with
+an \code{\link[mlr3:Learner]{mlr3::Learner}}.
 \itemize{
 \item The data cannot contain missing values, as \code{\link{lazy_tensor}}s do not support them.
 For this reason, calling \verb{$missings()} will always return \code{0} for all columns.
@@ -15,9 +15,27 @@ This means, that it might be underreporting the number of distinct values of laz
 }
 }
 \examples{
+\dontshow{if (torch::torch_is_installed()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# used as feature in all backends
+x = torch_randn(100, 10)
 # regression
-ds = tensor_dataset(x = torch_randn(100, 10), y = torch_randn(100, 1))
-tbl = as
+ds_regr = tensor_dataset(x = x, y = torch_randn(100, 1))
+be_regr = as_data_backend(ds_regr, converter = list(y = as.numeric))
+be_regr$head()
+
+
+# binary classification: underlying target tensor must be float in [0, 1]
+ds_binary = tensor_dataset(x = x, y = torch_randint(0, 2, c(100, 1))$float())
+be_binary = as_data_backend(ds_binary, converter = list(
+  y = function(x) factor(as.integer(x), levels = c(0, 1), labels = c("A", "yes"))
+))
+be_binary$head()
+
+# multi-class classification: underlying target tensor must be integer in [1, K]
+ds_multiclass = tensor_dataset(x = x, y = torch_randint(1, 4, size = c(100, 1)))
+be_multiclass = as_data_backend(ds_multiclass, converter = list(y = as.numeric))
+be_multiclass$head()
+\dontshow{\}) # examplesIf}
 }
 \section{Super classes}{
 \code{\link[mlr3:DataBackend]{mlr3::DataBackend}} -> \code{\link[mlr3:DataBackendDataTable]{mlr3::DataBackendDataTable}} -> \code{DataBackendLazyTensors}

From 06595c78193529eaa418883847c620b6ca40d3b0 Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Thu, 17 Apr 2025 11:11:16 +0200
Subject: [PATCH 5/6] fixes

---
 R/DataBackendLazyTensors.R                   | 32 +++++++++
 R/DataDescriptor.R                           | 23 ++++---
 R/learner_torch_methods.R                    |  4 ++
 R/utils.R                                    |  5 +-
 tests/testthat/test_DataBackendLazyTensors.R | 68 ++++++++++++++------
 5 files changed, 101 insertions(+), 31 deletions(-)

diff --git a/R/DataBackendLazyTensors.R b/R/DataBackendLazyTensors.R
index 21697b860..2680634cc 100644
--- a/R/DataBackendLazyTensors.R
+++ b/R/DataBackendLazyTensors.R
@@ -96,6 +96,12 @@ DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
       set_names(rep(0L, length(cols)), cols)
     }
   ),
+  active = list(
+    converter = function(rhs) {
+      assert_ro_binding(rhs)
+      private$.converter
+    }
+  ),
   private = list(
     # call this function only with rows that are not in the cache yet
     .load_and_cache = function(rows, cols) {
@@ -145,4 +151,30 @@ col_info.DataBackendLazyTensors = function(x, ...) { # nolint
   discrete = setdiff(names(types)[types %chin% c("factor", "ordered")], x$primary_key)
   levels = insert_named(named_list(names(types)), map(first_row[, discrete, with = FALSE], levels))
   data.table(id = names(types), type = unname(types), levels = levels, key = "id")
+}
+
+
+# conservative check that avoids that a pseudo-lazy-tensor is preprocessed by some pipeop
+# @param be
+#   the backend
+# @param candidates
+#   the feature and target names
+# @param visited
+#  Union of all colnames already visited
+# @return visited
+check_lazy_tensors_backend = function(be, candidates, visited = character()) {
+  if (inherits(be, "DataBackendRbind") || inherits(be, "DataBackendCbind")) {
+    bs = be$.__enclos_env__$private$.data
+    # first we check b2, then b1, because b2 possibly overshadows some b1 rows/cols
+    visited = check_lazy_tensors_backend(bs$b2, candidates, visited)
+    check_lazy_tensors_backend(bs$b1, candidates, visited)
+  } else {
+    if (inherits(be, "DataBackendLazyTensors")) {
+      if (any(names(be$converter) %in% visited)) {
+        converter_cols = names(be$converter)[names(be$converter) %in% visited]
+        stopf("A converter column ('%s') from a DataBackendLazyTensors was presumably preprocessed by some PipeOp. This can cause inefficiencies and is therefore not allowed. If you want to preprocess them, please directly encode them as R types.", paste0(converter_cols, collapse = ", ")) # nolint
+      }
+    }
+    union(visited, intersect(candidates, be$colnames))
+  }
 }
\ No newline at end of file
diff --git a/R/DataDescriptor.R b/R/DataDescriptor.R
index 8377bdca4..6a1d65740 100644
--- a/R/DataDescriptor.R
+++ b/R/DataDescriptor.R
@@ -217,13 +217,14 @@ infer_shapes_from_getbatch = function(ds) {
 }
 
 assert_compatible_shapes = function(shapes, dataset) {
-  assert_shapes(shapes, null_ok = TRUE, unknown_batch = TRUE, named = TRUE)
+  shapes = assert_shapes(shapes, null_ok = TRUE, unknown_batch = TRUE, named = TRUE, coerce = TRUE)
 
   # prevent user from e.g. forgetting to wrap the return in a list
-  example = if (is.null(dataset$.getbatch)) {
-    dataset$.getitem(1L)
-  } else {
+  has_getbatch = !is.null(dataset$.getbatch)
+  example = if (has_getbatch) {
     dataset$.getbatch(1L)
+  } else {
+    dataset$.getitem(1L)
   }
   if (!test_list(example, names = "unique") || !test_permutation(names(example), names(shapes))) {
     stopf("Dataset must return a list with named elements that are a permutation of the dataset_shapes names.")
@@ -236,13 +237,15 @@ assert_compatible_shapes = function(shapes, dataset) {
 
   iwalk(shapes, function(dataset_shape, name) {
     observed_shape = example[[name]]$shape
-    observed_shape[1] = NA
-    if (!is.null(dataset_shape) && !test_equal(shapes[[name]], observed_shape)) {
-      expected_shape = example[[name]]$shape
-      expected_shape[1] = NA
+    if (has_getbatch) {
+      observed_shape[1L] = NA_integer_
+    } else {
+      observed_shape = c(NA_integer_, observed_shape)
+    }
+    if (!is.null(dataset_shape) && !test_equal(observed_shape, dataset_shape)) {
       stopf(paste0("First batch from dataset is incompatible with the provided shape of %s:\n",
-        "* Provided shape: %s.\n* Expected shape: %s."), name,
-        shape_to_str(unname(shapes[name])), shape_to_str(list(expected_shape)))
+        "* Provided shape: %s.\n* Observed shape: %s."), name,
+        shape_to_str(unname(shapes[name])), shape_to_str(list(observed_shape)))
     }
   })
 }
diff --git a/R/learner_torch_methods.R b/R/learner_torch_methods.R
index 79cebaa4a..7259bf587 100644
--- a/R/learner_torch_methods.R
+++ b/R/learner_torch_methods.R
@@ -18,8 +18,10 @@ learner_torch_predict = function(self, private, super, task, param_vals) {
   private$.encode_prediction(predict_tensor = predict_tensor, task = task)
 }
 
+
 learner_torch_train = function(self, private, super, task, param_vals) {
   # Here, all param_vals (like seed = "random" or device = "auto") have already been resolved
+  check_lazy_tensors_backend(task$backend, c(task$feature_names, task$target_names))
   dataset_train = private$.dataset(task, param_vals)
   dataset_train = as_multi_tensor_dataset(dataset_train, param_vals)
   loader_train = private$.dataloader(dataset_train, param_vals)
@@ -356,3 +358,5 @@ as_multi_tensor_dataset = function(dataset, param_vals) {
     dataset
   }
 }
+
+
diff --git a/R/utils.R b/R/utils.R
index 2680785fc..bcb17af66 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -191,9 +191,9 @@ list_to_batch = function(tensors) {
 
 auto_cache_lazy_tensors = function(lts) {
   if (length(lts) <= 1L) {
-    FALSE
+    return(FALSE)
   }
-  anyDuplicated(unlist(map_if(lts, function(x) length(x) > 0, function(x) dd(x)$dataset_hash)))
+  anyDuplicated(unlist(map_if(lts, function(x) length(x) > 0, function(x) dd(x)$dataset_hash))) > 0L
 }
 
 #' Replace the head of a network
@@ -304,7 +304,6 @@ infer_shapes = function(shapes_in, param_vals, output_names, fn, rowwise, id) {
 }
 
 get_or_check_dataset_shapes = function(dataset, dataset_shapes) {
-  dataset_shapes = map(dataset_shapes, as.integer)
   if (is.null(dataset_shapes)) {
     if (is.null(dataset$.getbatch)) {
       stopf("dataset_shapes must be provided if dataset does not have a `.getbatch` method.")
diff --git a/tests/testthat/test_DataBackendLazyTensors.R b/tests/testthat/test_DataBackendLazyTensors.R
index c248c9b67..d1272ed5a 100644
--- a/tests/testthat/test_DataBackendLazyTensors.R
+++ b/tests/testthat/test_DataBackendLazyTensors.R
@@ -53,7 +53,7 @@ test_that("main API works", {
 
   # head
   tbl = be$head(n = 3)
-  expect_data_table(tbl, nrow = 3, ncol = 3)
+  expect_data_table(tbl, nrows = 3, ncols = 3)
   expect_class(tbl$x, "lazy_tensor")
   expect_equal(materialize(tbl$x, rbind = TRUE), torch_tensor(matrix(100:98, nrow = 3, ncol = 1)))
   expect_class(tbl$y, "numeric")
@@ -136,8 +136,12 @@ test_that("caching works", {
     observed_n = ds$counter - counter_prev
     expect_equal(observed_n, n)
 
-    expect_equal(materialize(tbl$x, rbind = TRUE), ds$x[rows])
-    expect_equal(tbl$y, as.integer(ds$y[rows]))
+    if ("x" %in% cols) {
+      expect_equal(materialize(tbl$x, rbind = TRUE), ds$x[rows])
+    }
+    if ("y" %in% cols) {
+      expect_equal(tbl$y, as.integer(ds$y[rows]))
+    }
   }
   check(be, ds, 1, c("x", "y"), 1)
   # y is no in the cache, so .getitem() is not called on $data()
@@ -145,6 +149,8 @@ test_that("caching works", {
 
   # but x is not cached, so we still need to call .getitem below
   check(be, ds, 1, c("x", "y"), 1)
+  # lazy tensor causes no materialization
+  check(be, ds, 1, "x", 0)
 
   # more than one row also works
   check(be, ds, 2:1, "y", 1)
@@ -172,46 +178,72 @@ test_that("caching works", {
 })
 
 test_that("can train a regression learner", {
+  x = torch_randn(100, 1)
+  y = x + torch_randn(100, 1)
   ds = tensor_dataset(
-    x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))$float(),
-    y = torch_tensor(as.matrix(1:100, nrow = 100, ncol = 1))$float()
+    x = x,
+    y = y
   )
 
   be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)),
     converter = list(y = as.numeric))
   task = as_task_regr(be, target = "y")
 
-  learner = lrn("regr.mlp", epochs = 200, batch_size = 100, jit_trace = TRUE, opt.lr = 1, seed = 1)
+  learner = lrn("regr.mlp", epochs = 10, batch_size = 100, jit_trace = TRUE, opt.lr = 1, seed = 1)
   rr = resample(task, learner, rsmp("insample"))
-  expect_true(rr$aggregate(msr("regr.rmse")) < 3)
+  expect_true(rr$aggregate(msr("regr.rmse")) < 1.5)
 })
 
 test_that("can train a binary classification learner", {
   ds = tensor_dataset(
     x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))$float(),
-    y = torch_tensor(as.matrix(1:100, nrow = 100, ncol = 1))$float()
+    y = torch_tensor(rep(0:1, each = 50))$float()$unsqueeze(2L)
   )
 
   be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)),
-    converter = list(y = as.numeric))
-  task = as_task_regr(be, target = "y")
+    converter = list(y = function(x) factor(as.integer(x), levels = c(1, 0), labels = c("yes", "no"))))
+  task = as_task_classif(be, target = "y")
 
-  learner = lrn("regr.mlp", epochs = 200, batch_size = 100, jit_trace = TRUE, opt.lr = 1, seed = 1)
+  learner = lrn("classif.mlp", epochs = 10, batch_size = 100, jit_trace = TRUE, opt.lr = 10, seed = 1)
   rr = resample(task, learner, rsmp("insample"))
-  expect_true(rr$aggregate(msr("regr.rmse")) < 3)
+  expect_true(rr$aggregate(msr("classif.ce")) < 0.1)
 })
 
 test_that("can train a multiclass classification learner", {
   ds = tensor_dataset(
     x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))$float(),
-    y = torch_tensor(matrix(rep(c(0, 1), each = 50), nrow = 100, ncol = 1))$float()
+    y = torch_tensor(rep(1:4, each = 25))
   )
 
-  be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)),
-    converter = list(y = function(x) factor(as.integer(x), levels = c(0, 1), labels = c("yes", "no"))))
+  be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = NA),
+    converter = list(y = function(x) factor(as.integer(x), levels = 1:4, labels = c("a", "b", "c", "d"))))
   task = as_task_classif(be, target = "y")
 
-  learner = lrn("classif.mlp", epochs = 200, batch_size = 100, jit_trace = TRUE, opt.lr = 1, seed = 1)
+  learner = lrn("classif.mlp", epochs = 10, batch_size = 100, jit_trace = TRUE, opt.lr = 0.2, seed = 1,
+    neurons = 100)
   rr = resample(task, learner, rsmp("insample"))
-  expect_true(rr$aggregate(msr("regr.rmse")) < 3)
-})
\ No newline at end of file
+  # just ensures that we lear something
+  expect_true(rr$aggregate(msr("classif.ce")) < 0.6)
+})
+
+test_that("check_lazy_tensors_backend works", {
+  ds = tensor_dataset(
+    x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))$float(),
+    y = torch_tensor(as.matrix(1:100, nrow = 100, ncol = 1))$float()
+  )
+
+  be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)),
+    converter = list(y = as.numeric))
+  task_orig = as_task_regr(be, target = "y")
+
+  expect_error(check_lazy_tensors_backend(task_orig$backend, c("x", "y")),
+    regexp = NA)
+
+  task1 = task_orig$clone(deep = TRUE)$cbind(data.table(y = 1:100))
+  expect_error(check_lazy_tensors_backend(task1$backend, c("x", "y")),
+    regexp = "A converter column ('y')", fixed = TRUE)
+
+  task2 = task_orig$clone(deep = TRUE)$rbind(data.table(x = as_lazy_tensor(1), y = 2, row_id = 999))
+  expect_error(check_lazy_tensors_backend(task2$backend, c("x", "y")),
+    regexp = "A converter column ('y')", fixed = TRUE)
+})

From 61dc13b883829e0d23dc4f9f23e127938cd9f5c5 Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Thu, 24 Apr 2025 06:30:41 +0200
Subject: [PATCH 6/6] ...

---
 R/DataBackendLazyTensors.R                   |  93 ++++++++++++++---
 R/materialize.R                              |   4 +-
 TODO.md                                      |   3 +-
 man/DataBackendLazyTensors.Rd                |   3 +-
 tests/testthat/test_DataBackendLazyTensors.R | 104 +++++++++++++++++--
 5 files changed, 181 insertions(+), 26 deletions(-)

diff --git a/R/DataBackendLazyTensors.R b/R/DataBackendLazyTensors.R
index 2680634cc..2328a86b4 100644
--- a/R/DataBackendLazyTensors.R
+++ b/R/DataBackendLazyTensors.R
@@ -36,6 +36,7 @@ DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
   cloneable = FALSE,
   inherit = DataBackendDataTable,
   public = list(
+    chunk_size = NULL,
     #' @description
     #' Create a new instance of this [R6][R6::R6Class] class.
     #' @param data (`data.table`)\cr
@@ -48,10 +49,12 @@ DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
     #' @param cache (`character()`)\cr
     #'   Names of the columns that should be cached.
     #'   Per default, all columns that are converted are cached.
-    initialize = function(data, primary_key, converter, cache = names(converter)) {
+    initialize = function(data, primary_key, converter, cache = names(converter), chunk_size = 100) {
       private$.converter = assert_list(converter, types = "function", any.missing = FALSE)
       assert_subset(names(converter), colnames(data))
+      assert_subset(cache, names(converter), empty.ok = TRUE)
       private$.cached_cols = assert_subset(cache, names(converter))
+      self$chunk_size = assert_int(chunk_size, lower = 1L)
       walk(names(private$.converter), function(nm) {
         if (!inherits(data[[nm]], "lazy_tensor")) {
           stopf("Column '%s' is not a lazy tensor.", nm)
@@ -69,18 +72,25 @@ DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
         # no caching, no materialization as this is called in the training loop
         return(super$data(rows, cols))
       }
-      if (all(cols %in% names(private$.data_cache))) {
-        cache_hit = private$.data_cache[list(rows), cols, on = self$primary_key, with = FALSE]
+      if (all(intersect(cols, private$.cached_cols) %in% names(private$.data_cache))) {
+        expensive_cols = intersect(cols, private$.cached_cols)
+        other_cols = setdiff(cols, expensive_cols)
+        cache_hit = private$.data_cache[list(rows), expensive_cols, on = self$primary_key, with = FALSE]
         complete = complete.cases(cache_hit)
         cache_hit = cache_hit[complete]
         if (nrow(cache_hit) == length(rows)) {
-          return(cache_hit)
+          tbl = cbind(cache_hit, super$data(rows, other_cols))
+          setcolorder(tbl, cols)
+          return(tbl)
         }
-        combined = rbindlist(list(cache_hit, private$.load_and_cache(rows[!complete], cols)))
+        combined = rbindlist(list(cache_hit, private$.load_and_cache(rows[!complete], expensive_cols)))
         reorder = vector("integer", nrow(combined))
         reorder[complete] = seq_len(nrow(cache_hit))
         reorder[!complete] = nrow(cache_hit) + seq_len(nrow(combined) - nrow(cache_hit))
-        return(combined[reorder])
+
+        tbl = cbind(combined[reorder], super$data(rows, other_cols))
+        setcolorder(tbl, cols)
+        return(tbl)
       }
 
       private$.load_and_cache(rows, cols)
@@ -109,7 +119,17 @@ DataBackendLazyTensors = R6Class("DataBackendLazyTensors",
       tbl = super$data(rows, cols)
       cols_to_convert = intersect(names(private$.converter), names(tbl))
       tbl_to_mat = tbl[, cols_to_convert, with = FALSE]
-      tbl_mat = materialize(tbl_to_mat, rbind = TRUE)
+      # chunk the rows of tbl_to_mat into chunks of size self$chunk_size, apply materialize
+      n = nrow(tbl_to_mat)
+      chunks = split(seq_len(n), rep(seq_len(ceiling(n / self$chunk_size)), each = self$chunk_size, length.out = n))
+
+      tbl_mat = if (n == 0) {
+        set_names(list(torch_empty(0)), names(tbl_to_mat))
+      } else {
+        set_names(lapply(transpose_list(lapply(chunks, function(chunk) {
+          materialize(tbl_to_mat[chunk, ], rbind = TRUE)
+        })), torch_cat, dim = 1L), names(tbl_to_mat))
+      }
 
       for (nm in cols_to_convert) {
         converted = private$.converter[[nm]](tbl_mat[[nm]])
@@ -135,13 +155,62 @@ as_data_backend.dataset = function(x, dataset_shapes, ...) {
 }
 
 #' @export
-as_task_classif.dataset = function(x, dataset_shapes, target, ...) {
-  # TODO
+as_task_classif.dataset = function(x, target, levels, converter = NULL, dataset_shapes = NULL, chunk_size = 100, cache = names(converter), ...) {
+  if (length(x) < 2) {
+    stopf("Dataset must have at least 2 rows.")
+  }
+  batch = dataloader(x, batch_size = 2)$.iter()$.next()
+  if (is.null(converter)) {
+    if (length(levels) == 2) {
+      if (batch[[target]]$dtype != torch_float()) {
+        stopf("Target must be a float tensor, but has dtype %s", batch[[target]]$dtype)
+      }
+      if (test_equal(batch[[target]]$shape, c(2L, 1L))) {
+        converter = set_names(list(crate(function(x) factor(as.integer(x), levels = 0:1, labels = levels), levels)), target)
+      } else {
+        stopf("Target must be a float tensor of shape (batch_size, 1), but has shape (batch_size, %s)",
+          paste(batch[[target]]$shape[-1L], collapse = ", "))
+      }
+      converter = set_names(list(crate(function(x) factor(as.integer(x), levels = 0:1, labels = levels), levels)), target)
+    } else {
+      if (batch[[target]]$dtype != torch_int()) {
+        stopf("Target must be an integer tensor, but has dtype %s", batch[[target]]$dtype)
+      }
+      if (test_equal(batch[[target]]$shape, 2L)) {
+        converter = set_names(list(crate(function(x) factor(as.integer(x), labels = levels), levels)), target)
+      } else {
+        stopf("Target must be an integer tensor of shape (batch_size), but has shape (batch_size, %s)",
+          paste(batch[[target]]$shape[-1L], collapse = ", "))
+      }
+      converter = set_names(list(crate(function(x) factor(as.integer(x), labels = levels), levels)), target)
+    }
+  }
+  be = as_data_backend(x, dataset_shapes, converter = converter, cache = cache, chunk_size = chunk_size)
+  as_task_classif(be, target = target, ...)
 }
 
 #' @export
-as_task_regr.dataset = function(x, dataset_shapes, target, converter, ...) {
-  # TODO
+as_task_regr.dataset = function(x, target, converter = NULL, dataset_shapes = NULL, chunk_size = 100, cache = names(converter), ...) {
+  if (length(x) < 2) {
+    stopf("Dataset must have at least 2 rows.")
+  }
+  if (is.null(converter)) {
+    converter = set_names(list(as.numeric), target)
+  }
+  batch = dataloader(x, batch_size = 2)$.iter()$.next()
+
+  if (batch[[target]]$dtype != torch_float()) {
+    stopf("Target must be a float tensor, but has dtype %s", batch[[target]]$dtype)
+  }
+
+  if (!test_equal(batch[[target]]$shape, c(2L, 1L))) {
+    stopf("Target must be a float tensor of shape (batch_size, 1), but has shape (batch_size, %s)",
+      paste(batch[[target]]$shape[-1L], collapse = ", "))
+  }
+
+  dataset_shapes = get_or_check_dataset_shapes(x, dataset_shapes)
+  be = as_data_backend(x, dataset_shapes, converter = converter, cache = cache, chunk_size = chunk_size)
+  as_task_regr(be, target = target, ...)
 }
 
 #' @export
@@ -177,4 +246,4 @@ check_lazy_tensors_backend = function(be, candidates, visited = character()) {
     }
     union(visited, intersect(candidates, be$colnames))
   }
-}
\ No newline at end of file
+}
diff --git a/R/materialize.R b/R/materialize.R
index a2294b47a..ee113830d 100644
--- a/R/materialize.R
+++ b/R/materialize.R
@@ -106,7 +106,7 @@ materialize.lazy_tensor = function(x, device = "cpu", rbind = FALSE, ...) { # no
   materialize_internal(x = x, device = device, cache = NULL, rbind = rbind)
 }
 
-get_input = function(ds, ids, varying_shapes, rbind) {
+get_input = function(ds, ids, varying_shapes) {
   if (is.null(ds$.getbatch)) { # .getindex is never NULL but a function that errs if it was not defined
     x = map(ids, function(id) map(ds$.getitem(id), function(x) x$unsqueeze(1)))
     if (varying_shapes) {
@@ -201,7 +201,7 @@ materialize_internal = function(x, device = "cpu", cache = NULL, rbind) {
   }
 
   if (!do_caching || !input_hit) {
-    input = get_input(ds, ids, varying_shapes, rbind)
+    input = get_input(ds, ids, varying_shapes)
   }
 
   if (do_caching && !input_hit) {
diff --git a/TODO.md b/TODO.md
index ac96fb010..62d953809 100644
--- a/TODO.md
+++ b/TODO.md
@@ -21,4 +21,5 @@
   ```
 * Add checks on usage of `DataBackendLazyTensors` in `task_dataset`
 * Add optimization that truths values don't have to be loaded twice during resampling, i.e.
-  once for making the predictions and once for retrieving the truth column.
\ No newline at end of file
+  once for making the predictions and once for retrieving the truth column.
+* only allow caching converter columns in `DataBackendLazyTensors` (probably just remove the `cache` parameter)
\ No newline at end of file
diff --git a/man/DataBackendLazyTensors.Rd b/man/DataBackendLazyTensors.Rd
index 9ac930281..42880dec7 100644
--- a/man/DataBackendLazyTensors.Rd
+++ b/man/DataBackendLazyTensors.Rd
@@ -68,7 +68,8 @@ Create a new instance of this \link[R6:R6Class]{R6} class.
   data,
   primary_key,
   converter,
-  cache = names(converter)
+  cache = names(converter),
+  chunk_size = 100
 )}\if{html}{\out{</div>}}
 }
 
diff --git a/tests/testthat/test_DataBackendLazyTensors.R b/tests/testthat/test_DataBackendLazyTensors.R
index d1272ed5a..06fb91574 100644
--- a/tests/testthat/test_DataBackendLazyTensors.R
+++ b/tests/testthat/test_DataBackendLazyTensors.R
@@ -1,7 +1,3 @@
-test_that("correct input checks", {
-
-})
-
 test_that("main API works", {
   # regression target
   ds = tensor_dataset(
@@ -102,11 +98,71 @@ test_that("classif target works", {
 })
 
 test_that("errors when weird preprocessing", {
-  # test following example pipeops:
-  # - target trafo
-  # - fix factors
-  # - smote
+})
+
+test_that("chunking works ", {
+  ds = dataset(
+    initialize = function() {
+      self$x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))
+      self$y = torch_tensor(as.matrix(1:100, nrow = 100, ncol = 1))
+      self$counter = 0
+    },
+    .getbatch = function(i) {
+      self$counter = self$counter + 1
+      list(x = self$x[i, drop = FALSE], y = self$y[i, drop = FALSE])
+    },
+    .length = function() {
+      nrow(self$x)
+    }
+  )()
 
+  be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)), chunk_size = 3,
+    converter = list(y = as.numeric))
+
+  counter_prev = ds$counter
+  be$data(1:3, c("x", "y"))
+  expect_equal(ds$counter, counter_prev + 1)
+  counter_prev = ds$counter
+  be$data(4:10, c("x", "y"))
+  expect_equal(ds$counter, counter_prev + 3)
+})
+
+test_that("can retrieve 0 rows", {
+  ds = tensor_dataset(
+    x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1)),
+    y = torch_tensor(as.matrix(1:100, nrow = 100, ncol = 1))
+  )
+  be = as_data_backend(ds, dataset_shapes = list(x = c(NA, 1), y = c(NA, 1)),
+    converter = list(y = as.numeric))
+  res = be$data(integer(0), c("x", "y", "row_id"))
+  expect_data_table(res, nrows = 0, ncols = 3)
+  expect_class(res$x, "lazy_tensor")
+  expect_class(res$y, "numeric")
+  expect_equal(res$row_id, integer(0))
+})
+
+test_that("task converters work", {
+  # regression target
+  ds = tensor_dataset(
+    x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))$float(),
+    y = torch_tensor(as.matrix(1:100, nrow = 100, ncol = 1))$float()
+  )
+  task = as_task_regr(ds, target = "y", converter = list(y = as.numeric))
+  task$data(integer(0))
+  expect_equal(task$head(2)$y, 1:2)
+  expect_equal(task$feature_names, "x")
+  expect_equal(task$target_names, "y")
+  expect_task(task)
+
+
+  # binary classification
+  ds = tensor_dataset(
+    x = torch_tensor(matrix(100:1, nrow = 100, ncol = 1))$float(),
+    y = torch_tensor(rep(0:1, times = 50))$float()$unsqueeze(2L)
+  )
+  task = as_task_classif(ds, target = "y", levels = c("yes", "no"))
+  expect_task(task)
+  expect_equal(task$head()$y, factor(rep(c("yes", "no"), times = 3), levels = c("yes", "no")))
 })
 
 test_that("caching works", {
@@ -147,8 +203,8 @@ test_that("caching works", {
   # y is no in the cache, so .getitem() is not called on $data()
   check(be, ds, 1, "y", 0)
 
-  # but x is not cached, so we still need to call .getitem below
-  check(be, ds, 1, c("x", "y"), 1)
+  # everything is in the cache
+  check(be, ds, 1, c("x", "y"), 0)
   # lazy tensor causes no materialization
   check(be, ds, 1, "x", 0)
 
@@ -247,3 +303,31 @@ test_that("check_lazy_tensors_backend works", {
   expect_error(check_lazy_tensors_backend(task2$backend, c("x", "y")),
     regexp = "A converter column ('y')", fixed = TRUE)
 })
+
+
+test_that("...", {
+  ds = dataset(
+    initialize = function(x, y) {
+      self$x = torch_randn(100, 3)
+      self$y = torch_randn(100, 1)
+      self$counter = 0
+    },
+    .getbatch = function(i) {
+      print("hallo")
+      self$counter = self$counter + 1L
+      list(x = self$x[i, drop = FALSE], y = self$y[i, drop = FALSE])
+    },
+    .length = function() 100
+  )()
+
+task = as_task_regr(ds, target = "y")
+
+counter = ds$counter
+task$head()
+print(ds$counter - counter)
+counter = ds$counter
+task$head()
+expec
+print(ds$counter - counter)
+
+})