From b45e3cdcbc44dcf6438a6e2d5382dcf824a0e2f3 Mon Sep 17 00:00:00 2001 From: Yuan Date: Tue, 17 Mar 2026 22:11:31 +0000 Subject: [PATCH] [VL] adding kfaka read support Signed-off-by: Yuan --- backends-velox/pom.xml | 26 +++++++++ cpp/velox/operators/reader/KafkaReader.cc | 30 ++++++++++ cpp/velox/operators/reader/KafkaReader.h | 65 +++++++++++++++++++++ cpp/velox/substrait/SubstraitToVeloxPlan.cc | 7 +++ 4 files changed, 128 insertions(+) create mode 100644 cpp/velox/operators/reader/KafkaReader.cc create mode 100644 cpp/velox/operators/reader/KafkaReader.h diff --git a/backends-velox/pom.xml b/backends-velox/pom.xml index eb5830bddb61..bf4c731903bc 100644 --- a/backends-velox/pom.xml +++ b/backends-velox/pom.xml @@ -592,5 +592,31 @@ + + kafka + + false + + + + org.apache.gluten + gluten-kafka + ${project.version} + + + org.apache.gluten + gluten-kafka + ${project.version} + test-jar + test + + + org.apache.spark + spark-sql-kafka-0-10_${scala.binary.version} + ${spark.version} + provided + + + diff --git a/cpp/velox/operators/reader/KafkaReader.cc b/cpp/velox/operators/reader/KafkaReader.cc new file mode 100644 index 000000000000..1bc4a9680b79 --- /dev/null +++ b/cpp/velox/operators/reader/KafkaReader.cc @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KafkaReader.h" + +namespace gluten { + +// Implementation placeholder for KafkaReader +// This file will contain the actual implementation of Kafka reading logic +// including: +// - Kafka consumer initialization +// - Message polling and deserialization +// - Offset management +// - Error handling and retry logic + +} // namespace gluten diff --git a/cpp/velox/operators/reader/KafkaReader.h b/cpp/velox/operators/reader/KafkaReader.h new file mode 100644 index 000000000000..32d8b41bebcb --- /dev/null +++ b/cpp/velox/operators/reader/KafkaReader.h @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "velox/connectors/Connector.h" +#include "velox/exec/Operator.h" + +namespace gluten { + +/// Kafka reader operator for streaming Kafka data +/// This is a placeholder implementation that will be extended +/// to support actual Kafka consumption in the Velox backend +class KafkaReader : public facebook::velox::exec::SourceOperator { + public: + KafkaReader( + int32_t operatorId, + facebook::velox::exec::DriverCtx* driverCtx, + const std::shared_ptr& planNode) + : SourceOperator( + driverCtx, + planNode->outputType(), + operatorId, + planNode->id(), + "KafkaReader") {} + + facebook::velox::RowVectorPtr getOutput() override { + // TODO: Implement actual Kafka reading logic + // This should: + // 1. Connect to Kafka broker + // 2. Read messages from the specified topic/partition + // 3. Convert Kafka messages to RowVector format + // 4. Handle offset management + return nullptr; + } + + facebook::velox::BlockingReason isBlocked( + facebook::velox::ContinueFuture* future) override { + return facebook::velox::BlockingReason::kNotBlocked; + } + + bool isFinished() override { + return noMoreSplits_ && !hasSplit_; + } + + private: + bool noMoreSplits_ = false; + bool hasSplit_ = false; +}; + +} // namespace gluten diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index adb7fc5f45b6..86a0f9a85979 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -1682,6 +1682,13 @@ std::string SubstraitToVeloxPlanConverter::findFuncSpec(uint64_t id) { } int32_t SubstraitToVeloxPlanConverter::getStreamIndex(const ::substrait::ReadRel& sRead) { + // Check if this is a Kafka stream + if (sRead.stream_kafka()) { + // For Kafka streams, we don't use the iterator pattern + // Return -1 to indicate this should be handled as a regular scan + return -1; + } + if (sRead.has_local_files()) { const auto& fileList = sRead.local_files().items(); if (fileList.size() == 0) {