From 0a3d1867da37096babd7bd09d53ebda79bf349f2 Mon Sep 17 00:00:00 2001 From: Kevin Rathbun Date: Tue, 15 Apr 2025 10:22:46 -0400 Subject: [PATCH 1/5] Fixes bug with getting disk usage of the ROOT table Code did not consider ROOT table, and was always returning 0 for the disk usage as it would scan the ROOT table if the input was the METADATA table and would scan the METADATA table otherwise. Should have been scanning ZK if the input was ROOT. Changed to use Ample instead, which handles this issue nicely. --- .../accumulo/server/util/TableDiskUsage.java | 82 ++++++++----------- 1 file changed, 36 insertions(+), 46 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java b/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java index 4284f8f8d0d..9278ba64c3f 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java @@ -36,23 +36,16 @@ import org.apache.accumulo.core.client.Accumulo; import org.apache.accumulo.core.client.AccumuloClient; -import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.clientImpl.ClientContext; -import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.dataImpl.KeyExtent; -import org.apache.accumulo.core.metadata.MetadataTable; -import org.apache.accumulo.core.metadata.RootTable; -import org.apache.accumulo.core.metadata.TabletFile; +import org.apache.accumulo.core.fate.zookeeper.ZooCache; +import org.apache.accumulo.core.metadata.StoredTabletFile; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.metadata.schema.MetadataSchema; -import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.trace.TraceUtil; import org.apache.accumulo.core.util.NumUtil; import org.apache.accumulo.server.cli.ServerUtilOpts; -import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -203,47 +196,44 @@ public static Map,Long> getDiskUsage(Set tableIds, // For each table ID for (TableId tableId : tableIds) { - // if the table to compute usage is for the metadata table itself then we need to scan the - // root table, else we scan the metadata table - try (Scanner mdScanner = tableId.equals(MetadataTable.ID) - ? client.createScanner(RootTable.NAME, Authorizations.EMPTY) - : client.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) { - mdScanner.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME); - mdScanner.setRange(new KeyExtent(tableId, null, null).toMetaRange()); - - final Set files = new HashSet<>(); - - // Read each file referenced by that table - for (Map.Entry entry : mdScanner) { - final TabletFile file = - new TabletFile(new Path(entry.getKey().getColumnQualifier().toString())); - - // get the table referenced by the file which may not be the same as the current - // table we are scanning if the file is shared between multiple tables - final TableId fileTableRef = file.getTableId(); - - // if this is a ref to a different table than the one we are scanning then we need - // to make sure the table is also linked for this shared file if the table is - // part of the set of tables we are running du on so we can track shared usages - if (!fileTableRef.equals(tableId) && tableIds.contains(fileTableRef)) { - // link the table and the shared file for computing shared sizes - tdu.linkFileAndTable(fileTableRef, file.getFileName()); - } - - // link the file to the table we are scanning for - tdu.linkFileAndTable(tableId, file.getFileName()); - - // add the file size for the table if not already seen for this scan - if (files.add(file)) { - // This tracks the file size for individual files for computing shared file statistics - // later - tdu.addFileSize(file.getFileName(), - new DataFileValue(entry.getValue().get()).getSize()); + // read the metadata + try (var tabletsMetadata = + ((ClientContext) client).getAmple().readTablets().forTable(tableId).build()) { + final Set allFiles = new HashSet<>(); + + for (var tm : tabletsMetadata) { + final Map tmFiles = tm.getFilesMap(); + + for (var file : tmFiles.entrySet()) { + final var stf = file.getKey(); + final var dataFileValue = file.getValue(); + + // get the table referenced by the file which may not be the same as the current + // table we are scanning if the file is shared between multiple tables + final TableId fileTableRef = stf.getTableId(); + + // if this is a ref to a different table than the one we are scanning then we need + // to make sure the table is also linked for this shared file if the table is + // part of the set of tables we are running du on so we can track shared usages + if (!fileTableRef.equals(tableId) && tableIds.contains(fileTableRef)) { + // link the table and the shared file for computing shared sizes + tdu.linkFileAndTable(fileTableRef, stf.getFileName()); + } + + // link the file to the table we are scanning for + tdu.linkFileAndTable(tableId, stf.getFileName()); + + // add the file size for the table if not already seen for this scan + if (allFiles.add(stf)) { + // This tracks the file size for individual files for computing shared file statistics + // later + tdu.addFileSize(stf.getFileName(), dataFileValue.getSize()); + } } } // Track tables that are empty with no metadata - if (files.isEmpty()) { + if (allFiles.isEmpty()) { emptyTableIds.add(tableId); } } From 92172cfb52e87f474ede8371ce073c45aca7f779 Mon Sep 17 00:00:00 2001 From: Kevin Rathbun Date: Tue, 15 Apr 2025 10:30:02 -0400 Subject: [PATCH 2/5] formatting --- .../java/org/apache/accumulo/server/util/TableDiskUsage.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java b/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java index 9278ba64c3f..a33f7d3caa9 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java @@ -39,7 +39,6 @@ import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.clientImpl.ClientContext; import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.fate.zookeeper.ZooCache; import org.apache.accumulo.core.metadata.StoredTabletFile; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.metadata.schema.MetadataSchema; From 02f2e270d791f38e2641b52a3f13b56a3a93b2ea Mon Sep 17 00:00:00 2001 From: Kevin Rathbun Date: Wed, 16 Apr 2025 14:17:29 -0400 Subject: [PATCH 3/5] rewrote TableDiskUsageTest --- .../server/util/TableDiskUsageTest.java | 229 ++++++++++-------- 1 file changed, 123 insertions(+), 106 deletions(-) diff --git a/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java b/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java index a275424f742..0c99d33560f 100644 --- a/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java +++ b/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java @@ -21,7 +21,9 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.ArrayList; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -31,20 +33,15 @@ import java.util.stream.Collectors; import org.apache.accumulo.core.Constants; -import org.apache.accumulo.core.client.Scanner; -import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.clientImpl.ClientContext; import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; -import org.apache.accumulo.core.metadata.TabletFile; +import org.apache.accumulo.core.metadata.StoredTabletFile; +import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.DataFileValue; -import org.apache.accumulo.core.metadata.schema.MetadataSchema; -import org.apache.accumulo.core.security.Authorizations; -import org.apache.accumulo.server.ServerContext; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.Text; +import org.apache.accumulo.core.metadata.schema.TabletMetadata; +import org.apache.accumulo.core.metadata.schema.TabletsMetadata; import org.easymock.EasyMock; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -75,17 +72,20 @@ public static void beforeClass() { @Test public void testSingleTableMultipleTablets() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - mockScan(client, scanner, 1); + final ClientContext client = EasyMock.createMock(ClientContext.class); + EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + final TabletsMetadata mockTabletsMetadata = mockTabletsMetadata(client, tableId1); - Map tableEntries = new HashMap<>(); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0001.rf"), 1024); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0002.rf"), 1024); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName2, "C0003.rf"), 2048); - mockTableScan(scanner, tableEntries, tableId1); + List realTabletsMetadata = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), tableId1, 1024); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), tableId1, 1024); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName2, "C0003.rf"), tableId1, 2048); + mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); - EasyMock.replay(client, scanner); + EasyMock.replay(client, mockTabletsMetadata); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(tableId1), client); @@ -97,25 +97,27 @@ public void testSingleTableMultipleTablets() throws Exception { assertTrue(firstResult.getKey().contains(getTableName(tableId1))); assertEquals(4096, firstResult.getValue()); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata); } @Test public void testMultipleVolumes() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - mockScan(client, scanner, 1); + final ClientContext client = EasyMock.createMock(ClientContext.class); + EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + final TabletsMetadata mockTabletsMetadata = mockTabletsMetadata(client, tableId1); - Map tableEntries = new HashMap<>(); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0001.rf"), 1024); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0002.rf"), 1024); - appendFileMetadata(tableEntries, getTabletFile(volume2, tableId1, tabletName2, "C0003.rf"), - 2048); - appendFileMetadata(tableEntries, getTabletFile(volume2, tableId1, tabletName2, "C0004.rf"), - 10000); - mockTableScan(scanner, tableEntries, tableId1); + List realTabletsMetadata = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), tableId1, 1024); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), tableId1, 1024); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume2, tableId1, tabletName2, "C0003.rf"), tableId1, 2048); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume2, tableId1, tabletName2, "C0004.rf"), tableId1, 10000); + mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); - EasyMock.replay(client, scanner); + EasyMock.replay(client, mockTabletsMetadata); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(tableId1), client); @@ -126,24 +128,22 @@ public void testMultipleVolumes() throws Exception { assertEquals(1, firstResult.getKey().size()); assertEquals(14096, firstResult.getValue()); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata); } @Test public void testMetadataTable() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - - // Expect root table instead to be scanned - EasyMock.expect(client.createScanner(RootTable.NAME, Authorizations.EMPTY)).andReturn(scanner); + final ClientContext client = EasyMock.createMock(ClientContext.class); EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + final TabletsMetadata mockTabletsMetadata = mockTabletsMetadata(client, MetadataTable.ID); - Map tableEntries = new HashMap<>(); - appendFileMetadata(tableEntries, - getTabletFile(MetadataTable.ID, MetadataTable.NAME, "C0001.rf"), 1024); - mockTableScan(scanner, tableEntries, MetadataTable.ID); + List realTabletsMetadata = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, MetadataTable.ID, MetadataTable.NAME, "C0001.rf"), MetadataTable.ID, + 1024); + mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); - EasyMock.replay(client, scanner); + EasyMock.replay(client, mockTabletsMetadata); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(MetadataTable.ID), client); @@ -154,21 +154,23 @@ public void testMetadataTable() throws Exception { result.entrySet().stream().findFirst().orElseThrow(); assertEquals(1024, firstResult.getValue()); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata); } @Test public void testDuplicateFile() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - mockScan(client, scanner, 1); + final ClientContext client = EasyMock.createMock(ClientContext.class); + EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + final TabletsMetadata mockTabletsMetadata = mockTabletsMetadata(client, tableId1); - Map tableEntries = new HashMap<>(); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0001.rf"), 1024); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0001.rf"), 1024); - mockTableScan(scanner, tableEntries, tableId1); + List realTabletsMetadata = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), tableId1, 1024); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), tableId1, 1024); + mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); - EasyMock.replay(client, scanner); + EasyMock.replay(client, mockTabletsMetadata); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(tableId1), client); @@ -180,19 +182,19 @@ public void testDuplicateFile() throws Exception { assertTrue(firstResult.getKey().contains(getTableName(tableId1))); assertEquals(1024, firstResult.getValue()); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata); } @Test public void testEmptyTable() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - mockScan(client, scanner, 1); + final ClientContext client = EasyMock.createMock(ClientContext.class); + EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + final TabletsMetadata mockTabletsMetadata = mockTabletsMetadata(client, tableId1); - Map tableEntries = new HashMap<>(); - mockTableScan(scanner, tableEntries, tableId1); + List realTabletsMetadata = new ArrayList<>(); + mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); - EasyMock.replay(client, scanner); + EasyMock.replay(client, mockTabletsMetadata); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(tableId1), client); @@ -203,34 +205,44 @@ public void testEmptyTable() throws Exception { assertEquals(1, firstResult.getKey().size()); assertEquals(0, firstResult.getValue()); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata); } @Test public void testMultipleTables() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - mockScan(client, scanner, 3); - - Map tableEntries1 = new HashMap<>(); - appendFileMetadata(tableEntries1, getTabletFile(tableId1, tabletName1, "C0001.rf"), 1024); - appendFileMetadata(tableEntries1, getTabletFile(tableId1, tabletName1, "C0002.rf"), 4096); - mockTableScan(scanner, tableEntries1, tableId1); - - Map tableEntries2 = new HashMap<>(); - appendFileMetadata(tableEntries2, getTabletFile(tableId2, tabletName2, "C0003.rf"), 2048); - appendFileMetadata(tableEntries2, getTabletFile(tableId2, tabletName2, "C0004.rf"), 3000); - mockTableScan(scanner, tableEntries2, tableId2); + final ClientContext client = EasyMock.createMock(ClientContext.class); + EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); - Map tableEntries3 = new HashMap<>(); + final TabletsMetadata mockTabletsMetadata1 = mockTabletsMetadata(client, tableId1); + List realTabletsMetadata1 = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata1, + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), tableId1, 1024); + appendFileMetadata(realTabletsMetadata1, + getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), tableId1, 4096); + mockTabletsMetadataIter(mockTabletsMetadata1, realTabletsMetadata1.iterator()); + + final TabletsMetadata mockTabletsMetadata2 = mockTabletsMetadata(client, tableId2); + List realTabletsMetadata2 = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata2, + getTabletFile(volume1, tableId2, tabletName2, "C0003.rf"), tableId2, 2048); + appendFileMetadata(realTabletsMetadata2, + getTabletFile(volume1, tableId2, tabletName2, "C0004.rf"), tableId2, 3000); + mockTabletsMetadataIter(mockTabletsMetadata2, realTabletsMetadata2.iterator()); + + final TabletsMetadata mockTabletsMetadata3 = mockTabletsMetadata(client, tableId3); + List realTabletsMetadata3 = new ArrayList<>(); // shared file - appendFileMetadata(tableEntries3, getTabletFile(tableId2, tabletName2, "C0003.rf"), 2048); - appendFileMetadata(tableEntries3, getTabletFile(tableId3, tabletName3, "C0005.rf"), 84520); - appendFileMetadata(tableEntries3, getTabletFile(tableId3, tabletName3, "C0006.rf"), 3000); - appendFileMetadata(tableEntries3, getTabletFile(tableId3, tabletName4, "C0007.rf"), 98456); - mockTableScan(scanner, tableEntries3, tableId3); - - EasyMock.replay(client, scanner); + appendFileMetadata(realTabletsMetadata3, + getTabletFile(volume1, tableId2, tabletName2, "C0003.rf"), tableId2, 2048); + appendFileMetadata(realTabletsMetadata3, + getTabletFile(volume1, tableId3, tabletName3, "C0005.rf"), tableId3, 84520); + appendFileMetadata(realTabletsMetadata3, + getTabletFile(volume1, tableId3, tabletName3, "C0006.rf"), tableId3, 3000); + appendFileMetadata(realTabletsMetadata3, + getTabletFile(volume1, tableId3, tabletName4, "C0007.rf"), tableId3, 98456); + mockTabletsMetadataIter(mockTabletsMetadata3, realTabletsMetadata3.iterator()); + + EasyMock.replay(client, mockTabletsMetadata1, mockTabletsMetadata2, mockTabletsMetadata3); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(tableId1, tableId2, tableId3), client); @@ -252,7 +264,7 @@ public void testMultipleTables() throws Exception { assertEquals(2048, result.get(tableNameSet(tableId2, tableId3))); assertEquals(185976, result.get(tableNameSet(tableId3))); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata1, mockTabletsMetadata2, mockTabletsMetadata3); } private static TreeSet tableNameSet(TableId... tableIds) { @@ -276,36 +288,41 @@ private static String getTableName(TableId tableId) { return tableIdToNameMap.get(tableId); } - private static void appendFileMetadata(Map tableEntries, TabletFile file, long size) { - tableEntries.put( - new Key(new Text(file.getTableId() + "<"), - MetadataSchema.TabletsSection.DataFileColumnFamily.NAME, file.getMetaInsertText()), - new DataFileValue(size, 1).encodeAsValue()); + private static void appendFileMetadata(List realTabletsMetadata, + StoredTabletFile file, TableId id, long size) throws Exception { + Map files = Map.of(file, new DataFileValue(size, 1)); + TabletMetadata tm = EasyMock.createMock(TabletMetadata.class); + EasyMock.expect(tm.getFilesMap()).andReturn(files); + EasyMock.replay(tm); + realTabletsMetadata.add(tm); } - private static TabletFile getTabletFile(String volume, TableId tableId, String tablet, + private static StoredTabletFile getTabletFile(String volume, TableId tableId, String tablet, String fileName) { - return new TabletFile(new Path( - volume + Constants.HDFS_TABLES_DIR + "/" + tableId + "/" + tablet + "/" + fileName)); + return new StoredTabletFile( + volume + Constants.HDFS_TABLES_DIR + "/" + tableId + "/" + tablet + "/" + fileName); } - private static TabletFile getTabletFile(TableId tableId, String tablet, String fileName) { - return getTabletFile(volume1, tableId, tablet, fileName); - } - - private void mockScan(ServerContext client, Scanner scanner, int times) throws Exception { - EasyMock.expect(client.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) - .andReturn(scanner).times(times); - EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + private TabletsMetadata mockTabletsMetadata(ClientContext client, TableId tableId) + throws Exception { + final Ample ample = EasyMock.createMock(Ample.class); + final TabletsMetadata.TableOptions tableOptions = + EasyMock.createMock(TabletsMetadata.TableOptions.class); + final TabletsMetadata.TableRangeOptions tableRangeOptions = + EasyMock.createMock(TabletsMetadata.TableRangeOptions.class); + final TabletsMetadata tabletsMetadata = EasyMock.createMock(TabletsMetadata.class); + EasyMock.expect(client.getAmple()).andReturn(ample); + EasyMock.expect(ample.readTablets()).andReturn(tableOptions); + EasyMock.expect(tableOptions.forTable(tableId)).andReturn(tableRangeOptions); + EasyMock.expect(tableRangeOptions.build()).andReturn(tabletsMetadata); + EasyMock.replay(ample, tableOptions, tableRangeOptions); + return tabletsMetadata; } - private void mockTableScan(Scanner scanner, Map tableEntries, TableId tableId) { - scanner.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME); - EasyMock.expectLastCall().once(); - scanner.setRange(new KeyExtent(tableId, null, null).toMetaRange()); - EasyMock.expectLastCall().once(); - EasyMock.expect(scanner.iterator()).andReturn(tableEntries.entrySet().iterator()); - scanner.close(); - EasyMock.expectLastCall().once(); + private void mockTabletsMetadataIter(TabletsMetadata tabletsMetadata, + Iterator tableEntries) { + EasyMock.expect(tabletsMetadata.iterator()).andReturn(tableEntries); + tabletsMetadata.close(); + EasyMock.expectLastCall().andAnswer(() -> null); } } From ab6206d19fc6cb6985a38baebf902ff23a8a2c4a Mon Sep 17 00:00:00 2001 From: Kevin Rathbun Date: Thu, 17 Apr 2025 15:46:13 -0400 Subject: [PATCH 4/5] fetch FILES, avoid mocking TabletMetadata, misc test cleanup --- .../accumulo/server/util/TableDiskUsage.java | 6 +- .../server/util/TableDiskUsageTest.java | 76 +++++++++++-------- 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java b/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java index a33f7d3caa9..029ad0bd80f 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java @@ -18,6 +18,8 @@ */ package org.apache.accumulo.server.util; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -196,8 +198,8 @@ public static Map,Long> getDiskUsage(Set tableIds, // For each table ID for (TableId tableId : tableIds) { // read the metadata - try (var tabletsMetadata = - ((ClientContext) client).getAmple().readTablets().forTable(tableId).build()) { + try (var tabletsMetadata = ((ClientContext) client).getAmple().readTablets().forTable(tableId) + .fetch(FILES).build()) { final Set allFiles = new HashSet<>(); for (var tm : tabletsMetadata) { diff --git a/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java b/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java index 0c99d33560f..e0dd6bb1628 100644 --- a/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java +++ b/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java @@ -18,28 +18,35 @@ */ package org.apache.accumulo.server.util; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; +import java.util.EnumSet; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.SortedMap; import java.util.SortedSet; +import java.util.TreeMap; import java.util.TreeSet; import java.util.stream.Collectors; import org.apache.accumulo.core.Constants; import org.apache.accumulo.core.clientImpl.ClientContext; +import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.StoredTabletFile; import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.DataFileValue; +import org.apache.accumulo.core.metadata.schema.MetadataSchema; import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.metadata.schema.TabletsMetadata; import org.easymock.EasyMock; @@ -78,11 +85,11 @@ public void testSingleTableMultipleTablets() throws Exception { List realTabletsMetadata = new ArrayList<>(); appendFileMetadata(realTabletsMetadata, - getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), tableId1, 1024); + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), 1024); appendFileMetadata(realTabletsMetadata, - getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), tableId1, 1024); + getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), 1024); appendFileMetadata(realTabletsMetadata, - getTabletFile(volume1, tableId1, tabletName2, "C0003.rf"), tableId1, 2048); + getTabletFile(volume1, tableId1, tabletName2, "C0003.rf"), 2048); mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); EasyMock.replay(client, mockTabletsMetadata); @@ -108,13 +115,13 @@ public void testMultipleVolumes() throws Exception { List realTabletsMetadata = new ArrayList<>(); appendFileMetadata(realTabletsMetadata, - getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), tableId1, 1024); + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), 1024); appendFileMetadata(realTabletsMetadata, - getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), tableId1, 1024); + getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), 1024); appendFileMetadata(realTabletsMetadata, - getTabletFile(volume2, tableId1, tabletName2, "C0003.rf"), tableId1, 2048); + getTabletFile(volume2, tableId1, tabletName2, "C0003.rf"), 2048); appendFileMetadata(realTabletsMetadata, - getTabletFile(volume2, tableId1, tabletName2, "C0004.rf"), tableId1, 10000); + getTabletFile(volume2, tableId1, tabletName2, "C0004.rf"), 10000); mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); EasyMock.replay(client, mockTabletsMetadata); @@ -139,8 +146,7 @@ public void testMetadataTable() throws Exception { List realTabletsMetadata = new ArrayList<>(); appendFileMetadata(realTabletsMetadata, - getTabletFile(volume1, MetadataTable.ID, MetadataTable.NAME, "C0001.rf"), MetadataTable.ID, - 1024); + getTabletFile(volume1, MetadataTable.ID, MetadataTable.NAME, "C0001.rf"), 1024); mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); EasyMock.replay(client, mockTabletsMetadata); @@ -165,9 +171,9 @@ public void testDuplicateFile() throws Exception { List realTabletsMetadata = new ArrayList<>(); appendFileMetadata(realTabletsMetadata, - getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), tableId1, 1024); + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), 1024); appendFileMetadata(realTabletsMetadata, - getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), tableId1, 1024); + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), 1024); mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); EasyMock.replay(client, mockTabletsMetadata); @@ -216,30 +222,30 @@ public void testMultipleTables() throws Exception { final TabletsMetadata mockTabletsMetadata1 = mockTabletsMetadata(client, tableId1); List realTabletsMetadata1 = new ArrayList<>(); appendFileMetadata(realTabletsMetadata1, - getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), tableId1, 1024); + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), 1024); appendFileMetadata(realTabletsMetadata1, - getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), tableId1, 4096); + getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), 4096); mockTabletsMetadataIter(mockTabletsMetadata1, realTabletsMetadata1.iterator()); final TabletsMetadata mockTabletsMetadata2 = mockTabletsMetadata(client, tableId2); List realTabletsMetadata2 = new ArrayList<>(); appendFileMetadata(realTabletsMetadata2, - getTabletFile(volume1, tableId2, tabletName2, "C0003.rf"), tableId2, 2048); + getTabletFile(volume1, tableId2, tabletName2, "C0003.rf"), 2048); appendFileMetadata(realTabletsMetadata2, - getTabletFile(volume1, tableId2, tabletName2, "C0004.rf"), tableId2, 3000); + getTabletFile(volume1, tableId2, tabletName2, "C0004.rf"), 3000); mockTabletsMetadataIter(mockTabletsMetadata2, realTabletsMetadata2.iterator()); final TabletsMetadata mockTabletsMetadata3 = mockTabletsMetadata(client, tableId3); List realTabletsMetadata3 = new ArrayList<>(); // shared file appendFileMetadata(realTabletsMetadata3, - getTabletFile(volume1, tableId2, tabletName2, "C0003.rf"), tableId2, 2048); + getTabletFile(volume1, tableId2, tabletName2, "C0003.rf"), 2048); appendFileMetadata(realTabletsMetadata3, - getTabletFile(volume1, tableId3, tabletName3, "C0005.rf"), tableId3, 84520); + getTabletFile(volume1, tableId3, tabletName3, "C0005.rf"), 84520); appendFileMetadata(realTabletsMetadata3, - getTabletFile(volume1, tableId3, tabletName3, "C0006.rf"), tableId3, 3000); + getTabletFile(volume1, tableId3, tabletName3, "C0006.rf"), 3000); appendFileMetadata(realTabletsMetadata3, - getTabletFile(volume1, tableId3, tabletName4, "C0007.rf"), tableId3, 98456); + getTabletFile(volume1, tableId3, tabletName4, "C0007.rf"), 98456); mockTabletsMetadataIter(mockTabletsMetadata3, realTabletsMetadata3.iterator()); EasyMock.replay(client, mockTabletsMetadata1, mockTabletsMetadata2, mockTabletsMetadata3); @@ -268,7 +274,7 @@ public void testMultipleTables() throws Exception { } private static TreeSet tableNameSet(TableId... tableIds) { - return Set.of(tableIds).stream().map(tableId -> getTableName(tableId)) + return Set.of(tableIds).stream().map(TableDiskUsageTest::getTableName) .collect(Collectors.toCollection(TreeSet::new)); } @@ -281,7 +287,7 @@ private static Set tableSet(TableId... tableIds) { private static Long getTotalUsage(Map,Long> result, TableId tableId) { return result.entrySet().stream() .filter(entry -> entry.getKey().contains(getTableName(tableId))) - .mapToLong(entry -> entry.getValue()).sum(); + .mapToLong(Map.Entry::getValue).sum(); } private static String getTableName(TableId tableId) { @@ -289,11 +295,16 @@ private static String getTableName(TableId tableId) { } private static void appendFileMetadata(List realTabletsMetadata, - StoredTabletFile file, TableId id, long size) throws Exception { - Map files = Map.of(file, new DataFileValue(size, 1)); - TabletMetadata tm = EasyMock.createMock(TabletMetadata.class); - EasyMock.expect(tm.getFilesMap()).andReturn(files); - EasyMock.replay(tm); + StoredTabletFile file, long size) { + Key key = new Key((file.getTableId() + "<").getBytes(UTF_8), + MetadataSchema.TabletsSection.DataFileColumnFamily.STR_NAME.getBytes(UTF_8), + file.getMetaInsert().getBytes(UTF_8), 123L); + Value val = new DataFileValue(size, 1).encodeAsValue(); + SortedMap map = new TreeMap<>(); + map.put(key, val); + + TabletMetadata tm = TabletMetadata.convertRow(map.entrySet().iterator(), + EnumSet.of(TabletMetadata.ColumnType.FILES), true); realTabletsMetadata.add(tm); } @@ -303,25 +314,26 @@ private static StoredTabletFile getTabletFile(String volume, TableId tableId, St volume + Constants.HDFS_TABLES_DIR + "/" + tableId + "/" + tablet + "/" + fileName); } - private TabletsMetadata mockTabletsMetadata(ClientContext client, TableId tableId) - throws Exception { + private TabletsMetadata mockTabletsMetadata(ClientContext client, TableId tableId) { final Ample ample = EasyMock.createMock(Ample.class); final TabletsMetadata.TableOptions tableOptions = EasyMock.createMock(TabletsMetadata.TableOptions.class); final TabletsMetadata.TableRangeOptions tableRangeOptions = EasyMock.createMock(TabletsMetadata.TableRangeOptions.class); + final TabletsMetadata.Options options = EasyMock.createMock(TabletsMetadata.Options.class); final TabletsMetadata tabletsMetadata = EasyMock.createMock(TabletsMetadata.class); EasyMock.expect(client.getAmple()).andReturn(ample); EasyMock.expect(ample.readTablets()).andReturn(tableOptions); EasyMock.expect(tableOptions.forTable(tableId)).andReturn(tableRangeOptions); - EasyMock.expect(tableRangeOptions.build()).andReturn(tabletsMetadata); - EasyMock.replay(ample, tableOptions, tableRangeOptions); + EasyMock.expect(tableRangeOptions.fetch(TabletMetadata.ColumnType.FILES)).andReturn(options); + EasyMock.expect(options.build()).andReturn(tabletsMetadata); + EasyMock.replay(ample, tableOptions, tableRangeOptions, options); return tabletsMetadata; } private void mockTabletsMetadataIter(TabletsMetadata tabletsMetadata, - Iterator tableEntries) { - EasyMock.expect(tabletsMetadata.iterator()).andReturn(tableEntries); + Iterator realTabletsMetadata) { + EasyMock.expect(tabletsMetadata.iterator()).andReturn(realTabletsMetadata); tabletsMetadata.close(); EasyMock.expectLastCall().andAnswer(() -> null); } From 04ea11035cd0e2affb8078985c0e148e02eb92fe Mon Sep 17 00:00:00 2001 From: Kevin Rathbun Date: Thu, 17 Apr 2025 16:04:13 -0400 Subject: [PATCH 5/5] trivial change to appendFileMetadata --- .../apache/accumulo/server/util/TableDiskUsageTest.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java b/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java index e0dd6bb1628..5b9ee7fed2a 100644 --- a/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java +++ b/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java @@ -18,7 +18,6 @@ */ package org.apache.accumulo.server.util; -import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -49,6 +48,7 @@ import org.apache.accumulo.core.metadata.schema.MetadataSchema; import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.metadata.schema.TabletsMetadata; +import org.apache.hadoop.io.Text; import org.easymock.EasyMock; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -296,9 +296,8 @@ private static String getTableName(TableId tableId) { private static void appendFileMetadata(List realTabletsMetadata, StoredTabletFile file, long size) { - Key key = new Key((file.getTableId() + "<").getBytes(UTF_8), - MetadataSchema.TabletsSection.DataFileColumnFamily.STR_NAME.getBytes(UTF_8), - file.getMetaInsert().getBytes(UTF_8), 123L); + Key key = new Key(new Text(file.getTableId() + "<"), + MetadataSchema.TabletsSection.DataFileColumnFamily.NAME, file.getMetaInsertText()); Value val = new DataFileValue(size, 1).encodeAsValue(); SortedMap map = new TreeMap<>(); map.put(key, val);