From f8eab28c5dac47a917c92ed3ddfe79132be4d7ca Mon Sep 17 00:00:00 2001 From: arnavb Date: Mon, 18 Aug 2025 05:35:55 +0000 Subject: [PATCH 1/2] update --- .../column/values/plain/BinaryPlainValuesReader.java | 8 ++++++++ .../parquet/column/values/plain/PlainValuesWriter.java | 10 +++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/BinaryPlainValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/BinaryPlainValuesReader.java index 6ce2f31a43..569d0af675 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/BinaryPlainValuesReader.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/BinaryPlainValuesReader.java @@ -27,6 +27,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * ValuesReader for variable-length {@code BYTE_ARRAY} columns. + * + *

When the column carries the logical type {@code DECIMAL}, the bytes read + * here are the big-endian two's-complement form of the un-scaled integer. + * It slices the requested number of bytes without flipping and + * returns them as a {@link org.apache.parquet.io.api.Binary}. + */ public class BinaryPlainValuesReader extends ValuesReader { private static final Logger LOG = LoggerFactory.getLogger(BinaryPlainValuesReader.class); private ByteBufferInputStream in; diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java index c7069bc092..3965286f32 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java @@ -32,7 +32,15 @@ import org.slf4j.LoggerFactory; /** - * Plain encoding except for booleans + * Plain encoding except for booleans. + * + *

Endianness note for DECIMAL: when a DECIMAL value is stored in a + * BYTE_ARRAY column, the {@link org.apache.parquet.io.api.Binary} + * passed to this writer already contains the big-endian two's-complement bytes + * of the un-scaled integer (the same bytes produced by + * {@link java.math.BigInteger#toByteArray()}). This writer keeps those bytes + * exactly as they are and only adds the 4-byte little-endian length prefix + * required by the PLAIN encoding. Bytes are not re-ordered..

*/ public class PlainValuesWriter extends ValuesWriter { private static final Logger LOG = LoggerFactory.getLogger(PlainValuesWriter.class); From b3430d0cf5a3509472167d34b0b829d72f5cea39 Mon Sep 17 00:00:00 2001 From: arnavb Date: Mon, 18 Aug 2025 05:39:23 +0000 Subject: [PATCH 2/2] update --- .../apache/parquet/column/values/plain/PlainValuesWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java index 3965286f32..ec725597cf 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java @@ -40,7 +40,7 @@ * of the un-scaled integer (the same bytes produced by * {@link java.math.BigInteger#toByteArray()}). This writer keeps those bytes * exactly as they are and only adds the 4-byte little-endian length prefix - * required by the PLAIN encoding. Bytes are not re-ordered..

+ * required by the PLAIN encoding. Bytes are not re-ordered.

*/ public class PlainValuesWriter extends ValuesWriter { private static final Logger LOG = LoggerFactory.getLogger(PlainValuesWriter.class);