diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java b/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java index 4284f8f8d0d..029ad0bd80f 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/TableDiskUsage.java @@ -18,6 +18,8 @@ */ package org.apache.accumulo.server.util; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -36,23 +38,15 @@ import org.apache.accumulo.core.client.Accumulo; import org.apache.accumulo.core.client.AccumuloClient; -import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.clientImpl.ClientContext; -import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.dataImpl.KeyExtent; -import org.apache.accumulo.core.metadata.MetadataTable; -import org.apache.accumulo.core.metadata.RootTable; -import org.apache.accumulo.core.metadata.TabletFile; +import org.apache.accumulo.core.metadata.StoredTabletFile; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.metadata.schema.MetadataSchema; -import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.trace.TraceUtil; import org.apache.accumulo.core.util.NumUtil; import org.apache.accumulo.server.cli.ServerUtilOpts; -import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -203,47 +197,44 @@ public static Map,Long> getDiskUsage(Set tableIds, // For each table ID for (TableId tableId : tableIds) { - // if the table to compute usage is for the metadata table itself then we need to scan the - // root table, else we scan the metadata table - try (Scanner mdScanner = tableId.equals(MetadataTable.ID) - ? client.createScanner(RootTable.NAME, Authorizations.EMPTY) - : client.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) { - mdScanner.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME); - mdScanner.setRange(new KeyExtent(tableId, null, null).toMetaRange()); - - final Set files = new HashSet<>(); - - // Read each file referenced by that table - for (Map.Entry entry : mdScanner) { - final TabletFile file = - new TabletFile(new Path(entry.getKey().getColumnQualifier().toString())); - - // get the table referenced by the file which may not be the same as the current - // table we are scanning if the file is shared between multiple tables - final TableId fileTableRef = file.getTableId(); - - // if this is a ref to a different table than the one we are scanning then we need - // to make sure the table is also linked for this shared file if the table is - // part of the set of tables we are running du on so we can track shared usages - if (!fileTableRef.equals(tableId) && tableIds.contains(fileTableRef)) { - // link the table and the shared file for computing shared sizes - tdu.linkFileAndTable(fileTableRef, file.getFileName()); - } - - // link the file to the table we are scanning for - tdu.linkFileAndTable(tableId, file.getFileName()); - - // add the file size for the table if not already seen for this scan - if (files.add(file)) { - // This tracks the file size for individual files for computing shared file statistics - // later - tdu.addFileSize(file.getFileName(), - new DataFileValue(entry.getValue().get()).getSize()); + // read the metadata + try (var tabletsMetadata = ((ClientContext) client).getAmple().readTablets().forTable(tableId) + .fetch(FILES).build()) { + final Set allFiles = new HashSet<>(); + + for (var tm : tabletsMetadata) { + final Map tmFiles = tm.getFilesMap(); + + for (var file : tmFiles.entrySet()) { + final var stf = file.getKey(); + final var dataFileValue = file.getValue(); + + // get the table referenced by the file which may not be the same as the current + // table we are scanning if the file is shared between multiple tables + final TableId fileTableRef = stf.getTableId(); + + // if this is a ref to a different table than the one we are scanning then we need + // to make sure the table is also linked for this shared file if the table is + // part of the set of tables we are running du on so we can track shared usages + if (!fileTableRef.equals(tableId) && tableIds.contains(fileTableRef)) { + // link the table and the shared file for computing shared sizes + tdu.linkFileAndTable(fileTableRef, stf.getFileName()); + } + + // link the file to the table we are scanning for + tdu.linkFileAndTable(tableId, stf.getFileName()); + + // add the file size for the table if not already seen for this scan + if (allFiles.add(stf)) { + // This tracks the file size for individual files for computing shared file statistics + // later + tdu.addFileSize(stf.getFileName(), dataFileValue.getSize()); + } } } // Track tables that are empty with no metadata - if (files.isEmpty()) { + if (allFiles.isEmpty()) { emptyTableIds.add(tableId); } } diff --git a/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java b/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java index a275424f742..5b9ee7fed2a 100644 --- a/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java +++ b/server/base/src/test/java/org/apache/accumulo/server/util/TableDiskUsageTest.java @@ -21,29 +21,33 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.ArrayList; +import java.util.EnumSet; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.SortedMap; import java.util.SortedSet; +import java.util.TreeMap; import java.util.TreeSet; import java.util.stream.Collectors; import org.apache.accumulo.core.Constants; -import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.clientImpl.ClientContext; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; -import org.apache.accumulo.core.metadata.TabletFile; +import org.apache.accumulo.core.metadata.StoredTabletFile; +import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.metadata.schema.MetadataSchema; -import org.apache.accumulo.core.security.Authorizations; -import org.apache.accumulo.server.ServerContext; -import org.apache.hadoop.fs.Path; +import org.apache.accumulo.core.metadata.schema.TabletMetadata; +import org.apache.accumulo.core.metadata.schema.TabletsMetadata; import org.apache.hadoop.io.Text; import org.easymock.EasyMock; import org.junit.jupiter.api.BeforeAll; @@ -75,17 +79,20 @@ public static void beforeClass() { @Test public void testSingleTableMultipleTablets() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - mockScan(client, scanner, 1); + final ClientContext client = EasyMock.createMock(ClientContext.class); + EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + final TabletsMetadata mockTabletsMetadata = mockTabletsMetadata(client, tableId1); - Map tableEntries = new HashMap<>(); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0001.rf"), 1024); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0002.rf"), 1024); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName2, "C0003.rf"), 2048); - mockTableScan(scanner, tableEntries, tableId1); + List realTabletsMetadata = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), 1024); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), 1024); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName2, "C0003.rf"), 2048); + mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); - EasyMock.replay(client, scanner); + EasyMock.replay(client, mockTabletsMetadata); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(tableId1), client); @@ -97,25 +104,27 @@ public void testSingleTableMultipleTablets() throws Exception { assertTrue(firstResult.getKey().contains(getTableName(tableId1))); assertEquals(4096, firstResult.getValue()); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata); } @Test public void testMultipleVolumes() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - mockScan(client, scanner, 1); + final ClientContext client = EasyMock.createMock(ClientContext.class); + EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + final TabletsMetadata mockTabletsMetadata = mockTabletsMetadata(client, tableId1); - Map tableEntries = new HashMap<>(); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0001.rf"), 1024); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0002.rf"), 1024); - appendFileMetadata(tableEntries, getTabletFile(volume2, tableId1, tabletName2, "C0003.rf"), - 2048); - appendFileMetadata(tableEntries, getTabletFile(volume2, tableId1, tabletName2, "C0004.rf"), - 10000); - mockTableScan(scanner, tableEntries, tableId1); + List realTabletsMetadata = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), 1024); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), 1024); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume2, tableId1, tabletName2, "C0003.rf"), 2048); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume2, tableId1, tabletName2, "C0004.rf"), 10000); + mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); - EasyMock.replay(client, scanner); + EasyMock.replay(client, mockTabletsMetadata); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(tableId1), client); @@ -126,24 +135,21 @@ public void testMultipleVolumes() throws Exception { assertEquals(1, firstResult.getKey().size()); assertEquals(14096, firstResult.getValue()); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata); } @Test public void testMetadataTable() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - - // Expect root table instead to be scanned - EasyMock.expect(client.createScanner(RootTable.NAME, Authorizations.EMPTY)).andReturn(scanner); + final ClientContext client = EasyMock.createMock(ClientContext.class); EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + final TabletsMetadata mockTabletsMetadata = mockTabletsMetadata(client, MetadataTable.ID); - Map tableEntries = new HashMap<>(); - appendFileMetadata(tableEntries, - getTabletFile(MetadataTable.ID, MetadataTable.NAME, "C0001.rf"), 1024); - mockTableScan(scanner, tableEntries, MetadataTable.ID); + List realTabletsMetadata = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, MetadataTable.ID, MetadataTable.NAME, "C0001.rf"), 1024); + mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); - EasyMock.replay(client, scanner); + EasyMock.replay(client, mockTabletsMetadata); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(MetadataTable.ID), client); @@ -154,21 +160,23 @@ public void testMetadataTable() throws Exception { result.entrySet().stream().findFirst().orElseThrow(); assertEquals(1024, firstResult.getValue()); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata); } @Test public void testDuplicateFile() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - mockScan(client, scanner, 1); + final ClientContext client = EasyMock.createMock(ClientContext.class); + EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + final TabletsMetadata mockTabletsMetadata = mockTabletsMetadata(client, tableId1); - Map tableEntries = new HashMap<>(); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0001.rf"), 1024); - appendFileMetadata(tableEntries, getTabletFile(tableId1, tabletName1, "C0001.rf"), 1024); - mockTableScan(scanner, tableEntries, tableId1); + List realTabletsMetadata = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), 1024); + appendFileMetadata(realTabletsMetadata, + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), 1024); + mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); - EasyMock.replay(client, scanner); + EasyMock.replay(client, mockTabletsMetadata); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(tableId1), client); @@ -180,19 +188,19 @@ public void testDuplicateFile() throws Exception { assertTrue(firstResult.getKey().contains(getTableName(tableId1))); assertEquals(1024, firstResult.getValue()); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata); } @Test public void testEmptyTable() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - mockScan(client, scanner, 1); + final ClientContext client = EasyMock.createMock(ClientContext.class); + EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + final TabletsMetadata mockTabletsMetadata = mockTabletsMetadata(client, tableId1); - Map tableEntries = new HashMap<>(); - mockTableScan(scanner, tableEntries, tableId1); + List realTabletsMetadata = new ArrayList<>(); + mockTabletsMetadataIter(mockTabletsMetadata, realTabletsMetadata.iterator()); - EasyMock.replay(client, scanner); + EasyMock.replay(client, mockTabletsMetadata); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(tableId1), client); @@ -203,34 +211,44 @@ public void testEmptyTable() throws Exception { assertEquals(1, firstResult.getKey().size()); assertEquals(0, firstResult.getValue()); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata); } @Test public void testMultipleTables() throws Exception { - final ServerContext client = EasyMock.createMock(ServerContext.class); - final Scanner scanner = EasyMock.createMock(Scanner.class); - mockScan(client, scanner, 3); - - Map tableEntries1 = new HashMap<>(); - appendFileMetadata(tableEntries1, getTabletFile(tableId1, tabletName1, "C0001.rf"), 1024); - appendFileMetadata(tableEntries1, getTabletFile(tableId1, tabletName1, "C0002.rf"), 4096); - mockTableScan(scanner, tableEntries1, tableId1); - - Map tableEntries2 = new HashMap<>(); - appendFileMetadata(tableEntries2, getTabletFile(tableId2, tabletName2, "C0003.rf"), 2048); - appendFileMetadata(tableEntries2, getTabletFile(tableId2, tabletName2, "C0004.rf"), 3000); - mockTableScan(scanner, tableEntries2, tableId2); + final ClientContext client = EasyMock.createMock(ClientContext.class); + EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); - Map tableEntries3 = new HashMap<>(); + final TabletsMetadata mockTabletsMetadata1 = mockTabletsMetadata(client, tableId1); + List realTabletsMetadata1 = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata1, + getTabletFile(volume1, tableId1, tabletName1, "C0001.rf"), 1024); + appendFileMetadata(realTabletsMetadata1, + getTabletFile(volume1, tableId1, tabletName1, "C0002.rf"), 4096); + mockTabletsMetadataIter(mockTabletsMetadata1, realTabletsMetadata1.iterator()); + + final TabletsMetadata mockTabletsMetadata2 = mockTabletsMetadata(client, tableId2); + List realTabletsMetadata2 = new ArrayList<>(); + appendFileMetadata(realTabletsMetadata2, + getTabletFile(volume1, tableId2, tabletName2, "C0003.rf"), 2048); + appendFileMetadata(realTabletsMetadata2, + getTabletFile(volume1, tableId2, tabletName2, "C0004.rf"), 3000); + mockTabletsMetadataIter(mockTabletsMetadata2, realTabletsMetadata2.iterator()); + + final TabletsMetadata mockTabletsMetadata3 = mockTabletsMetadata(client, tableId3); + List realTabletsMetadata3 = new ArrayList<>(); // shared file - appendFileMetadata(tableEntries3, getTabletFile(tableId2, tabletName2, "C0003.rf"), 2048); - appendFileMetadata(tableEntries3, getTabletFile(tableId3, tabletName3, "C0005.rf"), 84520); - appendFileMetadata(tableEntries3, getTabletFile(tableId3, tabletName3, "C0006.rf"), 3000); - appendFileMetadata(tableEntries3, getTabletFile(tableId3, tabletName4, "C0007.rf"), 98456); - mockTableScan(scanner, tableEntries3, tableId3); - - EasyMock.replay(client, scanner); + appendFileMetadata(realTabletsMetadata3, + getTabletFile(volume1, tableId2, tabletName2, "C0003.rf"), 2048); + appendFileMetadata(realTabletsMetadata3, + getTabletFile(volume1, tableId3, tabletName3, "C0005.rf"), 84520); + appendFileMetadata(realTabletsMetadata3, + getTabletFile(volume1, tableId3, tabletName3, "C0006.rf"), 3000); + appendFileMetadata(realTabletsMetadata3, + getTabletFile(volume1, tableId3, tabletName4, "C0007.rf"), 98456); + mockTabletsMetadataIter(mockTabletsMetadata3, realTabletsMetadata3.iterator()); + + EasyMock.replay(client, mockTabletsMetadata1, mockTabletsMetadata2, mockTabletsMetadata3); Map,Long> result = TableDiskUsage.getDiskUsage(tableSet(tableId1, tableId2, tableId3), client); @@ -252,11 +270,11 @@ public void testMultipleTables() throws Exception { assertEquals(2048, result.get(tableNameSet(tableId2, tableId3))); assertEquals(185976, result.get(tableNameSet(tableId3))); - EasyMock.verify(client, scanner); + EasyMock.verify(client, mockTabletsMetadata1, mockTabletsMetadata2, mockTabletsMetadata3); } private static TreeSet tableNameSet(TableId... tableIds) { - return Set.of(tableIds).stream().map(tableId -> getTableName(tableId)) + return Set.of(tableIds).stream().map(TableDiskUsageTest::getTableName) .collect(Collectors.toCollection(TreeSet::new)); } @@ -269,43 +287,53 @@ private static Set tableSet(TableId... tableIds) { private static Long getTotalUsage(Map,Long> result, TableId tableId) { return result.entrySet().stream() .filter(entry -> entry.getKey().contains(getTableName(tableId))) - .mapToLong(entry -> entry.getValue()).sum(); + .mapToLong(Map.Entry::getValue).sum(); } private static String getTableName(TableId tableId) { return tableIdToNameMap.get(tableId); } - private static void appendFileMetadata(Map tableEntries, TabletFile file, long size) { - tableEntries.put( - new Key(new Text(file.getTableId() + "<"), - MetadataSchema.TabletsSection.DataFileColumnFamily.NAME, file.getMetaInsertText()), - new DataFileValue(size, 1).encodeAsValue()); + private static void appendFileMetadata(List realTabletsMetadata, + StoredTabletFile file, long size) { + Key key = new Key(new Text(file.getTableId() + "<"), + MetadataSchema.TabletsSection.DataFileColumnFamily.NAME, file.getMetaInsertText()); + Value val = new DataFileValue(size, 1).encodeAsValue(); + SortedMap map = new TreeMap<>(); + map.put(key, val); + + TabletMetadata tm = TabletMetadata.convertRow(map.entrySet().iterator(), + EnumSet.of(TabletMetadata.ColumnType.FILES), true); + realTabletsMetadata.add(tm); } - private static TabletFile getTabletFile(String volume, TableId tableId, String tablet, + private static StoredTabletFile getTabletFile(String volume, TableId tableId, String tablet, String fileName) { - return new TabletFile(new Path( - volume + Constants.HDFS_TABLES_DIR + "/" + tableId + "/" + tablet + "/" + fileName)); + return new StoredTabletFile( + volume + Constants.HDFS_TABLES_DIR + "/" + tableId + "/" + tablet + "/" + fileName); } - private static TabletFile getTabletFile(TableId tableId, String tablet, String fileName) { - return getTabletFile(volume1, tableId, tablet, fileName); - } - - private void mockScan(ServerContext client, Scanner scanner, int times) throws Exception { - EasyMock.expect(client.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) - .andReturn(scanner).times(times); - EasyMock.expect(client.getTableIdToNameMap()).andReturn(tableIdToNameMap); + private TabletsMetadata mockTabletsMetadata(ClientContext client, TableId tableId) { + final Ample ample = EasyMock.createMock(Ample.class); + final TabletsMetadata.TableOptions tableOptions = + EasyMock.createMock(TabletsMetadata.TableOptions.class); + final TabletsMetadata.TableRangeOptions tableRangeOptions = + EasyMock.createMock(TabletsMetadata.TableRangeOptions.class); + final TabletsMetadata.Options options = EasyMock.createMock(TabletsMetadata.Options.class); + final TabletsMetadata tabletsMetadata = EasyMock.createMock(TabletsMetadata.class); + EasyMock.expect(client.getAmple()).andReturn(ample); + EasyMock.expect(ample.readTablets()).andReturn(tableOptions); + EasyMock.expect(tableOptions.forTable(tableId)).andReturn(tableRangeOptions); + EasyMock.expect(tableRangeOptions.fetch(TabletMetadata.ColumnType.FILES)).andReturn(options); + EasyMock.expect(options.build()).andReturn(tabletsMetadata); + EasyMock.replay(ample, tableOptions, tableRangeOptions, options); + return tabletsMetadata; } - private void mockTableScan(Scanner scanner, Map tableEntries, TableId tableId) { - scanner.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME); - EasyMock.expectLastCall().once(); - scanner.setRange(new KeyExtent(tableId, null, null).toMetaRange()); - EasyMock.expectLastCall().once(); - EasyMock.expect(scanner.iterator()).andReturn(tableEntries.entrySet().iterator()); - scanner.close(); - EasyMock.expectLastCall().once(); + private void mockTabletsMetadataIter(TabletsMetadata tabletsMetadata, + Iterator realTabletsMetadata) { + EasyMock.expect(tabletsMetadata.iterator()).andReturn(realTabletsMetadata); + tabletsMetadata.close(); + EasyMock.expectLastCall().andAnswer(() -> null); } }