diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 46235ab22..0dada7d23 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -180,6 +180,7 @@ add_library(
   src/config.cpp
   src/cuda_event.cpp
   src/integrations/cudf/bloom_filter.cu
+  src/integrations/cudf/pack.cpp
   src/integrations/cudf/partition.cpp
   src/integrations/cudf/utils.cpp
   src/memory/buffer.cpp
diff --git a/cpp/include/rapidsmpf/integrations/cudf/pack.hpp b/cpp/include/rapidsmpf/integrations/cudf/pack.hpp
new file mode 100644
index 000000000..79786218c
--- /dev/null
+++ b/cpp/include/rapidsmpf/integrations/cudf/pack.hpp
@@ -0,0 +1,111 @@
+/**
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#pragma once
+
+#include <memory>
+
+#include <cudf/table/table_view.hpp>
+#include <rmm/cuda_stream_view.hpp>
+
+#include <rapidsmpf/memory/buffer.hpp>
+#include <rapidsmpf/memory/memory_reservation.hpp>
+#include <rapidsmpf/memory/memory_type.hpp>
+#include <rapidsmpf/memory/packed_data.hpp>
+
+namespace rapidsmpf {
+
+
+/**
+ * @brief Pack a cudf table view into a contiguous buffer using chunked packing.
+ *
+ * This function serializes the given table view into a `PackedData` object
+ * using a bounce buffer for chunked transfer. This is useful when packing to
+ * host memory to avoid allocating temporary device memory for the entire table.
+ *
+ * @param table The table view to pack.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param bounce_buffer Device buffer used as intermediate storage during chunked packing.
+ * @param pack_temp_mr Temporary memory resource used for packing.
+ * @param reservation Memory reservation to use for allocating the packed data buffer.
+ * @return A unique pointer to the packed data containing the serialized table.
+ *
+ * @throws rapidsmpf::reservation_error If the allocation size exceeds the reservation.
+ *
+ * @see cudf::chunked_pack
+ */
+[[nodiscard]] std::unique_ptr<PackedData> chunked_pack(
+    cudf::table_view const& table,
+    rmm::cuda_stream_view stream,
+    rmm::device_buffer& bounce_buffer,
+    rmm::device_async_resource_ref pack_temp_mr,
+    MemoryReservation& reservation
+);
+
+namespace detail {
+
+/**
+ * @brief Pack a cudf table view into a contiguous buffer of the specified memory type.
+ *
+ * - Device:
+ * Uses cudf::pack(). Returns a `Buffer` with a `rmm::device_buffer`.
+ *
+ * - Pinned Host:
+ * Uses cudf::pack() with a pinned mr as device mr. Returns a `Buffer` with a pinned
+ * `HostBuffer`.
+ *
+ * - Host:
+ * Uses cudf::chunked_pack() with a device bounce buffer, if available, otherwise uses a
+ * pinned bounce buffer. Returns a `Buffer` with a `HostBuffer`.
+ *
+ * This function serializes the given table view into a `PackedData` object
+ * with the data buffer residing in the memory type specified by the template parameter.
+ * The memory for the packed data is allocated using the provided reservation.
+ *
+ * @tparam Destination The destination memory type for the packed data buffer.
+ * @param table The table view to pack.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param reservation Memory reservation to use for allocating the packed data buffer.
+ *                    Must match the destination memory type.
+ * @return A unique pointer to the packed data containing the serialized table.
+ *
+ * @throws std::invalid_argument If the reservation's memory type does not match
+ * Destination.
+ * @throws rapidsmpf::reservation_error If the allocation size exceeds the reservation.
+ *
+ * @see rapidsmpf::pack
+ * @see cudf::pack
+ */
+template <MemoryType Destination>
+[[nodiscard]] std::unique_ptr<PackedData> pack(
+    cudf::table_view const& table,
+    rmm::cuda_stream_view stream,
+    MemoryReservation& reservation
+);
+
+}  // namespace detail
+
+/**
+ * @brief Pack a cudf table view into a contiguous buffer.
+ *
+ * This function serializes the given table view into a `PackedData` object
+ * with the data buffer residing in the memory type of the provided reservation.
+ * The memory for the packed data is allocated using the provided reservation.
+ *
+ * @param table The table view to pack.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param reservation Memory reservation to use for allocating the packed data buffer.
+ * @return A unique pointer to the packed data containing the serialized table.
+ *
+ * @throws rapidsmpf::reservation_error If the allocation size exceeds the reservation.
+ *
+ * @see cudf::pack
+ */
+[[nodiscard]] std::unique_ptr<PackedData> pack(
+    cudf::table_view const& table,
+    rmm::cuda_stream_view stream,
+    MemoryReservation& reservation
+);
+
+}  // namespace rapidsmpf
diff --git a/cpp/include/rapidsmpf/memory/buffer_resource.hpp b/cpp/include/rapidsmpf/memory/buffer_resource.hpp
index 3b1dc9a6b..e96ecebcb 100644
--- a/cpp/include/rapidsmpf/memory/buffer_resource.hpp
+++ b/cpp/include/rapidsmpf/memory/buffer_resource.hpp
@@ -136,6 +136,22 @@ class BufferResource {
      */
     [[nodiscard]] rmm::host_async_resource_ref pinned_mr();
 
+    /**
+     * @brief Get the RMM pinned host memory resource as a device resource reference.
+     *
+     * @return Reference to the RMM resource used for pinned host allocations.
+     */
+    [[nodiscard]] rmm::device_async_resource_ref pinned_mr_as_device();
+
+    /**
+     * @brief Check if pinned memory is available.
+     *
+     * @return true if pinned memory is available, false otherwise.
+     */
+    [[nodiscard]] bool is_pinned_memory_available() const noexcept {
+        return pinned_mr_ != PinnedMemoryResource::Disabled;
+    }
+
     /**
      * @brief Retrieves the memory availability function for a given memory type.
      *
@@ -293,15 +309,18 @@ class BufferResource {
     );
 
     /**
-     * @brief Move device buffer data into a Buffer.
+     * @brief Move rmm::device_buffer (resides in device or pinned host memory) into a
+     * Buffer.
      *
      * This operation is cheap; no copy is performed. The resulting Buffer resides in
-     * device memory.
+     * device memory or pinned host memory.
      *
      * If @p stream differs from the device buffer's current stream:
      *   - @p stream is synchronized with the device buffer's current stream, and
      *   - the device buffer's current stream is updated to @p stream.
      *
+     * @note If the @p data is empty, the resulting Buffer will be DEVICE memory type.
+     *
      * @param data Unique pointer to the device buffer.
      * @param stream CUDA stream associated with the new Buffer. Use or synchronize with
      * this stream when operating on the Buffer.
diff --git a/cpp/include/rapidsmpf/memory/host_buffer.hpp b/cpp/include/rapidsmpf/memory/host_buffer.hpp
index 3ad35d4a4..7078d1c72 100644
--- a/cpp/include/rapidsmpf/memory/host_buffer.hpp
+++ b/cpp/include/rapidsmpf/memory/host_buffer.hpp
@@ -209,7 +209,7 @@ class HostBuffer {
     static HostBuffer from_rmm_device_buffer(
         std::unique_ptr<rmm::device_buffer> pinned_host_buffer,
         rmm::cuda_stream_view stream,
-        PinnedMemoryResource& mr
+        rmm::host_async_resource_ref mr
     );
 
   private:
diff --git a/cpp/include/rapidsmpf/memory/memory_type.hpp b/cpp/include/rapidsmpf/memory/memory_type.hpp
index 621909d1b..410d97daf 100644
--- a/cpp/include/rapidsmpf/memory/memory_type.hpp
+++ b/cpp/include/rapidsmpf/memory/memory_type.hpp
@@ -10,6 +10,8 @@
 #include <ranges>
 #include <span>
 
+#include <rapidsmpf/utils/misc.hpp>
+
 namespace rapidsmpf {
 
 /// @brief Enum representing the type of memory sorted in decreasing order of preference.
@@ -66,6 +68,53 @@ static_assert(std::ranges::equal(
     leq_memory_types(static_cast<MemoryType>(-1)), std::ranges::empty_view<MemoryType>{}
 ));
 
+/**
+ * @brief Get the memory types that are device accessible.
+ *
+ * @return A span of memory types that are device accessible.
+ */
+constexpr std::span<MemoryType const> device_accessible_memory_types() noexcept {
+    return std::span{MEMORY_TYPES}.first<2>();
+}
+
+static_assert(std::ranges::equal(
+    device_accessible_memory_types(),
+    std::array{MemoryType::DEVICE, MemoryType::PINNED_HOST}
+));
+
+/**
+ * @brief Check if a memory type is device accessible.
+ *
+ * @param mem_type The memory type to check.
+ * @return true if the memory type is device accessible, false otherwise.
+ */
+constexpr bool is_device_accessible(MemoryType mem_type) noexcept {
+    return contains(device_accessible_memory_types(), mem_type);
+}
+
+/**
+ * @brief Get the memory types that are host accessible.
+ *
+ * @return A span of memory types that are host accessible.
+ */
+constexpr std::span<MemoryType const> host_accessible_memory_types() {
+    return std::span{MEMORY_TYPES}.last<2>();
+}
+
+static_assert(std::ranges::equal(
+    host_accessible_memory_types(), std::array{MemoryType::PINNED_HOST, MemoryType::HOST}
+));
+
+/**
+ * @brief Check if a memory type is host accessible.
+ *
+ * @param mem_type The memory type to check.
+ * @return true if the memory type is host accessible, false otherwise.
+ */
+constexpr bool is_host_accessible(MemoryType mem_type) noexcept {
+    return contains(host_accessible_memory_types(), mem_type);
+}
+
 /**
  * @brief Get the name of a MemoryType.
  *
diff --git a/cpp/src/integrations/cudf/pack.cpp b/cpp/src/integrations/cudf/pack.cpp
new file mode 100644
index 000000000..73cd33815
--- /dev/null
+++ b/cpp/src/integrations/cudf/pack.cpp
@@ -0,0 +1,229 @@
+/**
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cudf/contiguous_split.hpp>
+
+#include <rapidsmpf/cuda_stream.hpp>
+#include <rapidsmpf/error.hpp>
+#include <rapidsmpf/integrations/cudf/pack.hpp>
+#include <rapidsmpf/integrations/cudf/utils.hpp>
+#include <rapidsmpf/memory/buffer.hpp>
+#include <rapidsmpf/memory/buffer_resource.hpp>
+#include <rapidsmpf/memory/packed_data.hpp>
+#include <rapidsmpf/utils/string.hpp>
+
+namespace rapidsmpf {
+
+namespace detail {
+
+template <>
+std::unique_ptr<PackedData> pack<MemoryType::DEVICE>(
+    cudf::table_view const& table,
+    rmm::cuda_stream_view stream,
+    MemoryReservation& reservation
+) {
+    RAPIDSMPF_EXPECTS(
+        reservation.mem_type() == MemoryType::DEVICE,
+        "pack<DEVICE> requires a device memory reservation",
+        std::invalid_argument
+    );
+    auto packed_columns = cudf::pack(table, stream, reservation.br()->device_mr());
+    reservation.br()->release(reservation, packed_columns.gpu_data->size());
+
+    return std::make_unique<PackedData>(
+        std::move(packed_columns.metadata),
+        reservation.br()->move(std::move(packed_columns.gpu_data), stream)
+    );
+}
+
+template <>
+std::unique_ptr<PackedData> pack<MemoryType::PINNED_HOST>(
+    cudf::table_view const& table,
+    rmm::cuda_stream_view stream,
+    MemoryReservation& reservation
+) {
+    RAPIDSMPF_EXPECTS(
+        reservation.mem_type() == MemoryType::PINNED_HOST,
+        "pack<PINNED_HOST> requires a pinned host memory reservation",
+        std::invalid_argument
+    );
+    // benchmarks shows that using cudf::pack with pinned mr is sufficient.
+
+    auto packed_columns =
+        cudf::pack(table, stream, reservation.br()->pinned_mr_as_device());
+    reservation.br()->release(reservation, packed_columns.gpu_data->size());
+
+    // packed table is now in rmm::device_buffer. We need to move it to a
+    // HostBuffer as Pinned memory allocations are only used with HostBuffers.
+
+    auto host_buffer = reservation.br()->move(std::move(packed_columns.gpu_data), stream);
+    return std::make_unique<PackedData>(
+        std::move(packed_columns.metadata), std::move(host_buffer)
+    );
+}
+
+constexpr size_t cudf_chunked_pack_min_buffer_size = 1024 * 1024;  // 1MB
+
+/**
+ * @brief Packing to host memory uses chunked packing with a bounce buffer.
+ *
+ * Algorithm:
+ * 1. Special case: empty tables return immediately with empty packed data.
+ * 2. Estimate the table size (est_size), with a minimum of 1MB.
+ * 3. Try to reserve device memory for est_size with overbooking allowed.
+ * 4. If available device memory (reservation - overbooking) >= 1MB,
+ *    use chunked packing with the device bounce buffer.
+ * 5. Otherwise, if pinned memory is available, retry with pinned memory (steps 3-4).
+ * 6. If all attempts fail, throw an error.
+ */
+template <>
+std::unique_ptr<PackedData> pack<MemoryType::HOST>(
+    cudf::table_view const& table,
+    rmm::cuda_stream_view stream,
+    MemoryReservation& reservation
+) {
+    RAPIDSMPF_EXPECTS(
+        reservation.mem_type() == MemoryType::HOST,
+        "pack<HOST> requires a host memory reservation",
+        std::invalid_argument
+    );
+
+    auto br = reservation.br();
+    // special case for empty table
+    if (table.num_rows() == 0) {
+        cudf::chunked_pack cpack(
+            table, cudf_chunked_pack_min_buffer_size, stream, br->device_mr()
+        );
+        RAPIDSMPF_EXPECTS(
+            cpack.get_total_contiguous_size() == 0,
+            "empty table should have 0 contiguous size",
+            std::runtime_error
+        );
+        return std::make_unique<PackedData>(
+            cpack.build_metadata(), br->allocate(0, stream, reservation)
+        );
+    }
+
+    // estimate the size of the table with a minimum of 1MiB
+    auto const est_size = std::max(
+        estimated_memory_usage(table, stream), cudf_chunked_pack_min_buffer_size
+    );
+
+    // max available memory for bounce buffer
+    auto max_availble = [](size_t res_sz, size_t ob) -> size_t {
+        return res_sz > ob ? res_sz - ob : 0;
+    };
+
+    // try device memory first
+    auto [dev_res, dev_ob] =
+        br->reserve(MemoryType::DEVICE, est_size, AllowOverbooking::YES);
+
+    auto dev_avail = max_availble(dev_res.size(), dev_ob);
+    if (dev_avail >= cudf_chunked_pack_min_buffer_size) {
+        rmm::device_buffer bounce_buffer(dev_avail, stream, br->device_mr());
+        dev_res.clear();
+        return chunked_pack(table, stream, bounce_buffer, br->device_mr(), reservation);
+    }
+    dev_res.clear();  // Release unused device reservation.
+
+    if (br->is_pinned_memory_available()) {  // try pinned memory as fallback
+        auto [pinned_res, pinned_ob] =
+            br->reserve(MemoryType::PINNED_HOST, est_size, AllowOverbooking::YES);
+
+        auto pinned_avail = max_availble(pinned_res.size(), pinned_ob);
+        if (pinned_avail >= cudf_chunked_pack_min_buffer_size) {
+            rmm::device_buffer bounce_buffer(
+                pinned_avail, stream, br->pinned_mr_as_device()
+            );
+            pinned_res.clear();
+            return chunked_pack(
+                table, stream, bounce_buffer, br->pinned_mr_as_device(), reservation
+            );
+        }
+    }
+
+    // all attempts failed.
+    RAPIDSMPF_FAIL(
+        "Failed to allocate bounce buffer for chunked packing to host memory.",
+        std::runtime_error
+    );
+}
+
+}  // namespace detail
+
+std::unique_ptr<PackedData> pack(
+    cudf::table_view const& table,
+    rmm::cuda_stream_view stream,
+    MemoryReservation& reservation
+) {
+    switch (reservation.mem_type()) {
+    case MemoryType::DEVICE:
+        return detail::pack<MemoryType::DEVICE>(table, stream, reservation);
+    case MemoryType::PINNED_HOST:
+        return detail::pack<MemoryType::PINNED_HOST>(table, stream, reservation);
+    case MemoryType::HOST:
+        return detail::pack<MemoryType::HOST>(table, stream, reservation);
+    default:
+        RAPIDSMPF_FAIL("unknown memory type");
+    }
+}
+
+std::unique_ptr<PackedData> chunked_pack(
+    cudf::table_view const& table,
+    rmm::cuda_stream_view stream,
+    rmm::device_buffer& bounce_buffer,
+    rmm::device_async_resource_ref pack_temp_mr,
+    MemoryReservation& reservation
+) {
+    auto br = reservation.br();
+
+    cudf::chunked_pack cpack(table, bounce_buffer.size(), stream, pack_temp_mr);
+    const auto packed_size = cpack.get_total_contiguous_size();
+
+    // Handle the case where packing allocates slightly more than the
+    // input size. This can occur because cudf uses aligned allocations,
+    // which may exceed the requested size. To accommodate this, we
+    // allow some wiggle room.
+    if (packed_size > reservation.size()) {
+        auto const wiggle_room = 1024 * static_cast<std::size_t>(table.num_columns());
+        if (packed_size <= reservation.size() + wiggle_room) {
+            reservation =
+                br->reserve(reservation.mem_type(), packed_size, AllowOverbooking::YES)
+                    .first;
+        }
+    }
+
+    auto dest_buf = br->allocate(packed_size, stream, reservation);
+
+    if (packed_size > 0) {
+        dest_buf->write_access([&](std::byte* dest_ptr,
+                                   rmm::cuda_stream_view dest_buf_stream) {
+            // join the destination stream with the bounce buffer stream so that the
+            // copies can be queued
+            cuda_stream_join(dest_buf_stream, bounce_buffer.stream());
+            bounce_buffer.set_stream(dest_buf_stream);
+
+            std::size_t off = 0;
+            cudf::device_span<uint8_t> bounce_buf_span(
+                static_cast<std::uint8_t*>(bounce_buffer.data()), bounce_buffer.size()
+            );
+            while (cpack.has_next()) {
+                auto const bytes_copied = cpack.next(bounce_buf_span);
+                RAPIDSMPF_CUDA_TRY(cudaMemcpyAsync(
+                    dest_ptr + off,
+                    bounce_buf_span.data(),
+                    bytes_copied,
+                    cudaMemcpyDefault,
+                    dest_buf_stream
+                ));
+                off += bytes_copied;
+            }
+        });
+    }
+
+    return std::make_unique<PackedData>(cpack.build_metadata(), std::move(dest_buf));
+}
+
+}  // namespace rapidsmpf
diff --git a/cpp/src/memory/buffer_resource.cpp b/cpp/src/memory/buffer_resource.cpp
index fe1f020d0..dafedea86 100644
--- a/cpp/src/memory/buffer_resource.cpp
+++ b/cpp/src/memory/buffer_resource.cpp
@@ -7,6 +7,8 @@
 #include <stdexcept>
 #include <utility>
 
+#include <cuda/memory>
+
 #include <rapidsmpf/cuda_stream.hpp>
 #include <rapidsmpf/error.hpp>
 #include <rapidsmpf/memory/buffer_resource.hpp>
@@ -80,6 +82,13 @@ rmm::host_async_resource_ref BufferResource::pinned_mr() {
     return *pinned_mr_;
 }
 
+rmm::device_async_resource_ref BufferResource::pinned_mr_as_device() {
+    RAPIDSMPF_EXPECTS(
+        pinned_mr_, "no pinned memory resource is available", std::invalid_argument
+    );
+    return *pinned_mr_;
+}
+
 std::pair<MemoryReservation, std::size_t> BufferResource::reserve(
     MemoryType mem_type, std::size_t size, AllowOverbooking allow_overbooking
 ) {
@@ -185,7 +194,18 @@ std::unique_ptr<Buffer> BufferResource::move(
         cuda_stream_join(stream, upstream);
         data->set_stream(stream);
     }
-    return std::unique_ptr<Buffer>(new Buffer(std::move(data), MemoryType::DEVICE));
+    // data() will be host accessible if data.data() is in pinned host memory
+    if (cuda::is_host_accessible(data->data())) {
+        return std::unique_ptr<Buffer>(new Buffer(
+            std::make_unique<HostBuffer>(
+                HostBuffer::from_rmm_device_buffer(std::move(data), stream, pinned_mr())
+            ),
+            stream,
+            MemoryType::PINNED_HOST
+        ));
+    } else {  // if data is in device memory OR empty
+        return std::unique_ptr<Buffer>(new Buffer(std::move(data), MemoryType::DEVICE));
+    }
 }
 
 std::unique_ptr<Buffer> BufferResource::move(
diff --git a/cpp/src/memory/host_buffer.cpp b/cpp/src/memory/host_buffer.cpp
index 4c2594257..0bda5769d 100644
--- a/cpp/src/memory/host_buffer.cpp
+++ b/cpp/src/memory/host_buffer.cpp
@@ -141,7 +141,7 @@ HostBuffer HostBuffer::from_owned_vector(
 HostBuffer HostBuffer::from_rmm_device_buffer(
     std::unique_ptr<rmm::device_buffer> pinned_host_buffer,
     rmm::cuda_stream_view stream,
-    PinnedMemoryResource& mr
+    rmm::host_async_resource_ref mr
 ) {
     RAPIDSMPF_EXPECTS(
         pinned_host_buffer != nullptr,
diff --git a/cpp/src/streaming/cudf/table_chunk.cpp b/cpp/src/streaming/cudf/table_chunk.cpp
index 3d62f09f0..e0c0c4acf 100644
--- a/cpp/src/streaming/cudf/table_chunk.cpp
+++ b/cpp/src/streaming/cudf/table_chunk.cpp
@@ -7,6 +7,7 @@
 
 #include <cudf/contiguous_split.hpp>
 
+#include <rapidsmpf/integrations/cudf/pack.hpp>
 #include <rapidsmpf/integrations/cudf/utils.hpp>
 #include <rapidsmpf/memory/buffer.hpp>
 #include <rapidsmpf/streaming/cudf/table_chunk.hpp>
@@ -176,41 +177,24 @@ TableChunk TableChunk::copy(MemoryReservation& reservation) const {
                 br->release(reservation, data_alloc_size(MemoryType::DEVICE));
                 return TableChunk(std::move(table), stream());
             }
-        case MemoryType::HOST:
         case MemoryType::PINNED_HOST:
-            // Case 2.
+            // if data is not in a packed format (cudf table, non-owning table_view,
+            // etc), use packing; otherwise use buffer_copy below.
             if (packed_data_ == nullptr) {
-                // We use libcudf's pack() to serialize `table_view()` into a
-                // packed_columns and then we move the packed_columns' gpu_data to a
-                // new host buffer.
-                // TODO: use `cudf::chunked_pack()` with a bounce buffer. Currently,
-                // `cudf::pack()` allocates device memory we haven't reserved.
-                auto packed_columns = cudf::pack(table_view(), stream(), br->device_mr());
-                auto packed_data = std::make_unique<PackedData>(
-                    std::move(packed_columns.metadata),
-                    br->move(std::move(packed_columns.gpu_data), stream())
+                return detail::pack<MemoryType::PINNED_HOST>(
+                    table_view(), stream(), reservation
+                );
+            }
+            break;  // use buffer_copy below.
+        case MemoryType::HOST:
+            // if data is not in a packed format (cudf table, non-owning table_view,
+            // etc), use packing; otherwise use buffer_copy below.
+            if (packed_data_ == nullptr) {
+                return detail::pack<MemoryType::HOST>(
+                    table_view(), stream(), reservation
                 );
-
-                // Handle the case where `cudf::pack` allocates slightly more than the
-                // input size. This can occur because cudf uses aligned allocations,
-                // which may exceed the requested size. To accommodate this, we
-                // allow some wiggle room.
-                if (packed_data->data->size > reservation.size()) {
-                    auto const wiggle_room =
-                        1024 * static_cast<std::size_t>(table_view().num_columns());
-                    if (packed_data->data->size <= reservation.size() + wiggle_room) {
-                        reservation = br->reserve(
-                                            MemoryType::HOST,
-                                            packed_data->data->size,
-                                            AllowOverbooking::YES
-                        )
-                                          .first;
-                    }
-                }
-                packed_data->data = br->move(std::move(packed_data->data), reservation);
-                return TableChunk(std::move(packed_data));
             }
-            break;
+            break;  // use buffer_copy below.
         default:
             RAPIDSMPF_FAIL("MemoryType: unknown");
         }
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 54810cf46..1a35a291a 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -79,6 +79,7 @@ target_sources(
           test_communicator.cpp
           test_config.cpp
           test_cuda_stream.cpp
+          test_cudf_pack.cpp
           test_cudf_utils.cpp
           test_cupti_monitor.cpp
           test_error_macros.cpp
diff --git a/cpp/tests/test_cudf_pack.cpp b/cpp/tests/test_cudf_pack.cpp
new file mode 100644
index 000000000..d719df8d1
--- /dev/null
+++ b/cpp/tests/test_cudf_pack.cpp
@@ -0,0 +1,210 @@
+/**
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+#include <cudf/contiguous_split.hpp>
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <rapidsmpf/integrations/cudf/pack.hpp>
+#include <rapidsmpf/memory/buffer.hpp>
+#include <rapidsmpf/memory/buffer_resource.hpp>
+#include <rapidsmpf/memory/packed_data.hpp>
+
+#include "utils.hpp"
+
+using namespace rapidsmpf;
+
+namespace {
+
+/**
+ * @brief Verify that packed data can be unpacked and matches the expected table.
+ *
+ * @param expect The expected table to compare against.
+ * @param packed_data The packed data to verify.
+ * @param expected_mem_type The expected memory type of the packed data.
+ * @param br The buffer resource to use for moving data if needed.
+ * @param stream The CUDA stream to use.
+ */
+void verify_packed_data(
+    cudf::table const& expect,
+    std::unique_ptr<PackedData>& packed_data,
+    MemoryType expected_mem_type,
+    BufferResource* br,
+    rmm::cuda_stream_view stream
+) {
+    EXPECT_NE(packed_data, nullptr);
+    EXPECT_NE(packed_data->metadata, nullptr);
+    EXPECT_NE(packed_data->data, nullptr);
+
+    if (expect.num_rows() == 0) {  // Skip unpacking for empty tables
+        EXPECT_FALSE(packed_data->metadata->empty());
+        return;
+    }
+
+    EXPECT_EQ(packed_data->data->mem_type(), expected_mem_type);
+    EXPECT_FALSE(packed_data->empty());
+
+    // if the destination memory type is host, we need to move the data to device
+    if (!is_device_accessible(expected_mem_type)) {
+        auto res = br->reserve_or_fail(packed_data->data->size, MemoryType::DEVICE);
+        packed_data->data = br->move(std::move(packed_data->data), res);
+    }
+
+    auto unpacked = cudf::unpack(
+        packed_data->metadata->data(),
+        reinterpret_cast<std::uint8_t const*>(packed_data->data->data())
+    );
+
+    stream.synchronize();
+
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expect, unpacked);
+}
+
+}  // namespace
+
+class CudfPackTest
+    : public cudf::test::BaseFixtureWithParam<std::tuple<MemoryType, std::size_t>> {
+  protected:
+    void SetUp() override {
+        std::tie(mem_type_, num_rows_) = GetParam();
+
+        stream_ = cudf::get_default_stream();
+
+        if (mem_type_ == MemoryType::PINNED_HOST
+            && !is_pinned_memory_resources_supported())
+        {
+            GTEST_SKIP() << "Pinned memory resources are not supported on this system";
+        }
+
+        br_ = std::make_unique<BufferResource>(
+            cudf::get_current_device_resource_ref(),
+            PinnedMemoryResource::make_if_available()
+        );
+    }
+
+    static constexpr std::int64_t seed = 42;
+    MemoryType mem_type_;
+    std::size_t num_rows_;
+    rmm::cuda_stream_view stream_;
+    std::unique_ptr<BufferResource> br_;
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    MemoryTypesAndRows,
+    CudfPackTest,
+    ::testing::Combine(
+        ::testing::Values(MemoryType::DEVICE, MemoryType::PINNED_HOST, MemoryType::HOST),
+        ::testing::Values(0, 1, 100, 1000000)
+    ),
+    [](const ::testing::TestParamInfo<std::tuple<MemoryType, std::size_t>>& info) {
+        auto mem_type = std::get<0>(info.param);
+        auto num_rows = std::get<1>(info.param);
+        return std::string(to_string(mem_type)) + "_" + std::to_string(num_rows);
+    }
+);
+
+TEST_P(CudfPackTest, PackAndUnpack) {
+    cudf::table expect = random_table_with_index(seed, num_rows_, 0, 1000);
+
+    auto cudf_packed = cudf::pack(expect, stream_, br_->device_mr());
+    std::size_t packed_size = cudf_packed.gpu_data->size();
+
+    auto reservation = br_->reserve_or_fail(packed_size, mem_type_);
+    auto packed_data = pack(expect.view(), stream_, reservation);
+
+    EXPECT_NO_FATAL_FAILURE(
+        verify_packed_data(expect, packed_data, mem_type_, br_.get(), stream_)
+    );
+}
+
+// test chunked pack and unpack with device bounce buffer
+TEST_P(CudfPackTest, ChunkedPackAndUnpackDevice) {
+    cudf::table expect = random_table_with_index(seed, num_rows_, 0, 1000);
+
+    auto cudf_packed = cudf::pack(expect, stream_, br_->device_mr());
+    std::size_t packed_size = cudf_packed.gpu_data->size();
+
+    auto reservation = br_->reserve_or_fail(packed_size, mem_type_);
+
+    rmm::device_async_resource_ref pack_temp_mr = br_->device_mr();
+    rmm::device_buffer bounce_buffer(1_MiB, stream_, pack_temp_mr);
+    auto packed_data =
+        chunked_pack(expect.view(), stream_, bounce_buffer, pack_temp_mr, reservation);
+
+    EXPECT_NO_FATAL_FAILURE(
+        verify_packed_data(expect, packed_data, mem_type_, br_.get(), stream_)
+    );
+}
+
+// test chunked pack and unpack with pinned bounce buffer
+TEST_P(CudfPackTest, ChunkedPackAndUnpackPinned) {
+    if (!rapidsmpf::is_pinned_memory_resources_supported()) {
+        GTEST_SKIP() << "Skipping test for non-pinned memory type";
+    }
+
+    cudf::table expect = random_table_with_index(seed, num_rows_, 0, 1000);
+
+    auto cudf_packed = cudf::pack(expect, stream_, br_->device_mr());
+    std::size_t packed_size = cudf_packed.gpu_data->size();
+
+    auto reservation = br_->reserve_or_fail(packed_size, mem_type_);
+
+    rmm::device_async_resource_ref pack_temp_mr = br_->pinned_mr_as_device();
+    rmm::device_buffer bounce_buffer(1_MiB, stream_, pack_temp_mr);
+    auto packed_data =
+        chunked_pack(expect.view(), stream_, bounce_buffer, pack_temp_mr, reservation);
+
+    EXPECT_NO_FATAL_FAILURE(
+        verify_packed_data(expect, packed_data, mem_type_, br_.get(), stream_)
+    );
+}
+
+/**
+ * @brief Test pack<HOST> with zero device memory available.
+ *
+ * This test verifies that packing to host memory works when no device memory is
+ * available for the bounce buffer, falling back to pinned memory. If pinned memory
+ * is also unavailable, the test expects an exception.
+ */
+TEST(CudfPackHostTest, PackToHostWithZeroDeviceMemory) {
+    static constexpr std::int64_t seed = 42;
+    static constexpr std::size_t num_rows = 1000000;
+
+    auto stream = cudf::get_default_stream();
+
+    // Create a buffer resource with 0 device memory available.
+    auto pinned_mr = PinnedMemoryResource::make_if_available();
+    auto br = std::make_unique<BufferResource>(
+        cudf::get_current_device_resource_ref(),
+        pinned_mr,
+        std::unordered_map<MemoryType, BufferResource::MemoryAvailable>{
+            {MemoryType::DEVICE, []() -> std::int64_t { return 0; }}
+        }
+    );
+
+    cudf::table expect = random_table_with_index(seed, num_rows, 0, 1000);
+
+    // Get packed size using cudf::pack (we need device memory for this estimation).
+    auto cudf_packed = cudf::pack(expect, stream, br->device_mr());
+    std::size_t packed_size = cudf_packed.gpu_data->size();
+
+    auto reservation = br->reserve_or_fail(packed_size, MemoryType::HOST);
+
+    if (is_pinned_memory_resources_supported()) {
+        auto packed_data = pack(expect.view(), stream, reservation);
+        EXPECT_NO_FATAL_FAILURE(
+            verify_packed_data(expect, packed_data, MemoryType::HOST, br.get(), stream)
+        );
+    } else {
+        EXPECT_THROW(
+            std::ignore = pack(expect.view(), stream, reservation), std::runtime_error
+        );
+    }
+}