Skip to content

Commit 9f68d08

Browse files
authored
GH-762: Implement VectorAppender for RunEndEncodedVector (#884)
## What's Changed Implement VectorAppender for RunEndEncodedVector Closes #762.
1 parent b8a23dd commit 9f68d08

File tree

2 files changed

+166
-0
lines changed

2 files changed

+166
-0
lines changed

vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,17 @@
2424
import org.apache.arrow.memory.util.MemoryUtil;
2525
import org.apache.arrow.util.Preconditions;
2626
import org.apache.arrow.vector.BaseFixedWidthVector;
27+
import org.apache.arrow.vector.BaseIntVector;
2728
import org.apache.arrow.vector.BaseLargeVariableWidthVector;
2829
import org.apache.arrow.vector.BaseVariableWidthVector;
2930
import org.apache.arrow.vector.BaseVariableWidthViewVector;
31+
import org.apache.arrow.vector.BigIntVector;
3032
import org.apache.arrow.vector.BitVector;
3133
import org.apache.arrow.vector.BitVectorHelper;
3234
import org.apache.arrow.vector.ExtensionTypeVector;
35+
import org.apache.arrow.vector.IntVector;
3336
import org.apache.arrow.vector.NullVector;
37+
import org.apache.arrow.vector.SmallIntVector;
3438
import org.apache.arrow.vector.ValueVector;
3539
import org.apache.arrow.vector.compare.TypeEqualsVisitor;
3640
import org.apache.arrow.vector.compare.VectorVisitor;
@@ -39,6 +43,7 @@
3943
import org.apache.arrow.vector.complex.LargeListVector;
4044
import org.apache.arrow.vector.complex.ListVector;
4145
import org.apache.arrow.vector.complex.NonNullableStructVector;
46+
import org.apache.arrow.vector.complex.RunEndEncodedVector;
4247
import org.apache.arrow.vector.complex.UnionVector;
4348

4449
/** Utility to append two vectors together. */
@@ -698,4 +703,98 @@ public ValueVector visit(ExtensionTypeVector<?> deltaVector, Void value) {
698703
deltaVector.getUnderlyingVector().accept(underlyingAppender, null);
699704
return targetVector;
700705
}
706+
707+
@Override
708+
public ValueVector visit(RunEndEncodedVector deltaVector, Void value) {
709+
Preconditions.checkArgument(
710+
typeVisitor.equals(deltaVector),
711+
"The deltaVector to append must have the same type as the targetVector");
712+
713+
if (deltaVector.getValueCount() == 0) {
714+
return targetVector; // optimization, nothing to append, return
715+
}
716+
717+
RunEndEncodedVector targetEncodedVector = (RunEndEncodedVector) targetVector;
718+
719+
final int targetLogicalValueCount = targetEncodedVector.getValueCount();
720+
721+
// Append the values vector first.
722+
VectorAppender valueAppender = new VectorAppender(targetEncodedVector.getValuesVector());
723+
deltaVector.getValuesVector().accept(valueAppender, null);
724+
725+
// Then append the run-ends vector.
726+
BaseIntVector targetRunEndsVector = (BaseIntVector) targetEncodedVector.getRunEndsVector();
727+
BaseIntVector deltaRunEndsVector = (BaseIntVector) deltaVector.getRunEndsVector();
728+
appendRunEndsVector(targetRunEndsVector, deltaRunEndsVector, targetLogicalValueCount);
729+
730+
targetEncodedVector.setValueCount(targetLogicalValueCount + deltaVector.getValueCount());
731+
return targetVector;
732+
}
733+
734+
private void appendRunEndsVector(
735+
BaseIntVector targetRunEndsVector,
736+
BaseIntVector deltaRunEndsVector,
737+
int targetLogicalValueCount) {
738+
int targetPhysicalValueCount = targetRunEndsVector.getValueCount();
739+
int newPhysicalValueCount = targetPhysicalValueCount + deltaRunEndsVector.getValueCount();
740+
741+
// make sure there is enough capacity
742+
while (targetVector.getValueCapacity() < newPhysicalValueCount) {
743+
targetVector.reAlloc();
744+
}
745+
746+
// append validity buffer
747+
BitVectorHelper.concatBits(
748+
targetRunEndsVector.getValidityBuffer(),
749+
targetRunEndsVector.getValueCount(),
750+
deltaRunEndsVector.getValidityBuffer(),
751+
deltaRunEndsVector.getValueCount(),
752+
targetRunEndsVector.getValidityBuffer());
753+
754+
// shift and append data buffer
755+
shiftAndAppendRunEndsDataBuffer(
756+
targetRunEndsVector,
757+
targetPhysicalValueCount,
758+
deltaRunEndsVector.getDataBuffer(),
759+
targetLogicalValueCount,
760+
deltaRunEndsVector.getValueCount());
761+
762+
targetRunEndsVector.setValueCount(newPhysicalValueCount);
763+
}
764+
765+
private void shiftAndAppendRunEndsDataBuffer(
766+
BaseIntVector toRunEndVector,
767+
int toIndex,
768+
ArrowBuf fromRunEndBuffer,
769+
int offset,
770+
int physicalLength) {
771+
ArrowBuf toRunEndBuffer = toRunEndVector.getDataBuffer();
772+
if (toRunEndVector instanceof SmallIntVector) {
773+
byte typeWidth = SmallIntVector.TYPE_WIDTH;
774+
for (int i = 0; i < physicalLength; i++) {
775+
toRunEndBuffer.setShort(
776+
(long) (i + toIndex) * typeWidth,
777+
fromRunEndBuffer.getShort((long) (i) * typeWidth) + offset);
778+
}
779+
780+
} else if (toRunEndVector instanceof IntVector) {
781+
byte typeWidth = IntVector.TYPE_WIDTH;
782+
for (int i = 0; i < physicalLength; i++) {
783+
toRunEndBuffer.setInt(
784+
(long) (i + toIndex) * typeWidth,
785+
fromRunEndBuffer.getInt((long) (i) * typeWidth) + offset);
786+
}
787+
788+
} else if (toRunEndVector instanceof BigIntVector) {
789+
byte typeWidth = BigIntVector.TYPE_WIDTH;
790+
for (int i = 0; i < physicalLength; i++) {
791+
toRunEndBuffer.setLong(
792+
(long) (i + toIndex) * typeWidth,
793+
fromRunEndBuffer.getLong((long) (i) * typeWidth) + offset);
794+
}
795+
} else {
796+
throw new IllegalArgumentException(
797+
"Run-end vector and must be of type int with size 16, 32, or 64 bits.");
798+
}
799+
}
701800
}

vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.apache.arrow.vector.complex.FixedSizeListVector;
4848
import org.apache.arrow.vector.complex.LargeListVector;
4949
import org.apache.arrow.vector.complex.ListVector;
50+
import org.apache.arrow.vector.complex.RunEndEncodedVector;
5051
import org.apache.arrow.vector.complex.StructVector;
5152
import org.apache.arrow.vector.complex.UnionVector;
5253
import org.apache.arrow.vector.holders.NullableBigIntHolder;
@@ -1025,6 +1026,72 @@ public void testAppendDenseUnionVectorMismatch() {
10251026
}
10261027
}
10271028

1029+
@Test
1030+
public void testAppendRunEndEncodedVector() {
1031+
final FieldType reeFieldType = FieldType.notNullable(ArrowType.RunEndEncoded.INSTANCE);
1032+
final Field runEndsField =
1033+
new Field("runEnds", FieldType.notNullable(Types.MinorType.INT.getType()), null);
1034+
final Field valuesField = Field.nullable("values", Types.MinorType.INT.getType());
1035+
final List<Field> children = Arrays.asList(runEndsField, valuesField);
1036+
1037+
final Field targetField = new Field("target", reeFieldType, children);
1038+
final Field deltaField = new Field("delta", reeFieldType, children);
1039+
try (RunEndEncodedVector target = new RunEndEncodedVector(targetField, allocator, null);
1040+
RunEndEncodedVector delta = new RunEndEncodedVector(deltaField, allocator, null)) {
1041+
1042+
// populate target
1043+
target.allocateNew();
1044+
// data: [1, 1, 2, null, 3, 3, 3] (7 values)
1045+
// values: [1, 2, null, 3]
1046+
// runEnds: [2, 3, 4, 7]
1047+
ValueVectorDataPopulator.setVector((IntVector) target.getValuesVector(), 1, 2, null, 3);
1048+
ValueVectorDataPopulator.setVector((IntVector) target.getRunEndsVector(), 2, 3, 4, 7);
1049+
target.setValueCount(7);
1050+
1051+
// populate delta
1052+
delta.allocateNew();
1053+
// data: [3, 4, 4, 5, null, null] (6 values)
1054+
// values: [3, 4, 5, null]
1055+
// runEnds: [1, 3, 4, 6]
1056+
ValueVectorDataPopulator.setVector((IntVector) delta.getValuesVector(), 3, 4, 5, null);
1057+
ValueVectorDataPopulator.setVector((IntVector) delta.getRunEndsVector(), 1, 3, 4, 6);
1058+
delta.setValueCount(6);
1059+
1060+
VectorAppender appender = new VectorAppender(target);
1061+
delta.accept(appender, null);
1062+
1063+
assertEquals(13, target.getValueCount());
1064+
1065+
final Field expectedField = new Field("expected", reeFieldType, children);
1066+
try (RunEndEncodedVector expected = new RunEndEncodedVector(expectedField, allocator, null)) {
1067+
expected.allocateNew();
1068+
// expected data: [1, 1, 2, null, 3, 3, 3, 3, 4, 4, 5, null, null] (13 values)
1069+
// expected values: [1, 2, null, 3, 3, 4, 5, null]
1070+
// expected runEnds: [2, 3, 4, 7, 8, 10, 11, 13]
1071+
ValueVectorDataPopulator.setVector(
1072+
(IntVector) expected.getValuesVector(), 1, 2, null, 3, 3, 4, 5, null);
1073+
ValueVectorDataPopulator.setVector(
1074+
(IntVector) expected.getRunEndsVector(), 2, 3, 4, 7, 8, 10, 11, 13);
1075+
expected.setValueCount(13);
1076+
1077+
assertVectorsEqual(expected, target);
1078+
}
1079+
1080+
// Check that delta is unchanged.
1081+
final Field expectedDeltaField = new Field("expectedDelta", reeFieldType, children);
1082+
try (RunEndEncodedVector expectedDelta =
1083+
new RunEndEncodedVector(expectedDeltaField, allocator, null)) {
1084+
expectedDelta.allocateNew();
1085+
ValueVectorDataPopulator.setVector(
1086+
(IntVector) expectedDelta.getValuesVector(), 3, 4, 5, null);
1087+
ValueVectorDataPopulator.setVector(
1088+
(IntVector) expectedDelta.getRunEndsVector(), 1, 3, 4, 6);
1089+
expectedDelta.setValueCount(6);
1090+
assertVectorsEqual(expectedDelta, delta);
1091+
}
1092+
}
1093+
}
1094+
10281095
@Test
10291096
public void testAppendVectorNegative() {
10301097
final int vectorLength = 10;

0 commit comments

Comments
 (0)