From fd3c8a40d81a4fc2cf0a30e3a99d87a1bc97e93a Mon Sep 17 00:00:00 2001 From: zhixingheyi-tian Date: Wed, 1 Dec 2021 15:02:01 +0800 Subject: [PATCH] Add complex data types validation for ORC file format in Arrow side --- .../dataset/file/TestFileSystemDataset.java | 30 ++++++++++++++++++ .../test/resources/data/struct_example.orc | Bin 0 -> 738 bytes 2 files changed, 30 insertions(+) create mode 100644 java/dataset/src/test/resources/data/struct_example.orc diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java index 3480a3cd8c6c..3b80c114145f 100644 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java +++ b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java @@ -231,6 +231,36 @@ public void testStructTypeRead() throws Exception { AutoCloseables.close(vsr, allocator); } + @Test + public void testOrcStructTypeRead() throws Exception { + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); + FileSystemDatasetFactory factory = new FileSystemDatasetFactory(allocator, + NativeMemoryPool.getDefault(), OrcFileFormat.createDefault(), "file://" + resourcePath("data/struct_example.orc")); + ScanOptions options = new ScanOptions(new String[] {"_1"}, Filter.EMPTY, 100); + Schema schema = factory.inspect(); + NativeDataset dataset = factory.finish(schema); + NativeScanner nativeScanner = dataset.newScan(options); + List scanTasks = collect(nativeScanner.scan()); + Assert.assertEquals(1, scanTasks.size()); + ScanTask scanTask = scanTasks.get(0); + ScanTask.BatchIterator itr = scanTask.execute(); + + VectorSchemaRoot vsr = VectorSchemaRoot.create(schema, allocator); + VectorLoader loader = new VectorLoader(vsr); + int rowCount = 0; + while (itr.hasNext()) { + try (ArrowRecordBatch next = itr.next()) { + loader.load(next); + } + rowCount += vsr.getRowCount(); + + } + Assert.assertEquals(50, rowCount); + assertEquals(1, schema.getFields().size()); + assertEquals("_1", schema.getFields().get(0).getName()); + AutoCloseables.close(vsr, allocator); + } + @Test public void testStructTypeReadWithEmptyProjector() throws Exception { RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); diff --git a/java/dataset/src/test/resources/data/struct_example.orc b/java/dataset/src/test/resources/data/struct_example.orc new file mode 100644 index 0000000000000000000000000000000000000000..4847869cc7a357402ab1d08549b0379c6958ea3f GIT binary patch literal 738 zcmaLUOHRT-90u^|w9s)xtwrO6g~4t#E*j?H16CR~t^`@JF{B!zMk53g4&VulM{uP( zkHErq_!*qIWUXBB%y5zg7oDvc*F%^f5L>J~;g{QF=Ptb$dr3Rb}?SOu$LHLT8uh1IYcR>Nvo4Xa@dtbsMKCjX_d z2G+nDSOaUCVQIcywtqY;M?^5RS2Pvtxb{T_9f-PZFQOu_O(WlSXs-kDOmQO+<(M*3 zENUBzAM^as@?80Gs(tra_ULWb{y5D33uS)v;nx;zoryPEKi^Tw0;|NHSEB=&EZZ%m zzT4ZS63T+4#CYrlb{p<)aWR@)rP(;UydI|0aW=V4XT!<#W^^BybShKXP%X?>nU8{xyJ?{|9bwJ&y-q>umr literal 0 HcmV?d00001