Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
5.1
* Use LWTs for all auto-repair history mutations (CASSANDRA-20996)
* Add cqlsh autocompletion for the identity mapping feature (CASSANDRA-20021)
* Add DDL Guardrail enabling administrators to disallow creation/modification of keyspaces with durable_writes = false (CASSANDRA-20913)
* Optimize Counter, Meter and Histogram metrics using thread local counters (CASSANDRA-20250)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,27 +150,27 @@ public class AutoRepairUtils
COL_DELETE_HOSTS, COL_DELETE_HOSTS_UPDATE_TIME, COL_REPAIR_TYPE, COL_HOST_ID);

final static String DEL_AUTO_REPAIR_HISTORY = String.format(
"DELETE FROM %s.%s WHERE %s = ? AND %s = ?"
"DELETE FROM %s.%s WHERE %s = ? AND %s = ? IF EXISTS"
, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE,
COL_HOST_ID);

final static String RECORD_START_REPAIR_HISTORY = String.format(
"UPDATE %s.%s SET %s= ?, repair_turn = ? WHERE %s = ? AND %s = ?"
"UPDATE %s.%s SET %s= ?, repair_turn = ? WHERE %s = ? AND %s = ? IF EXISTS"
, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_START_TS,
COL_REPAIR_TYPE, COL_HOST_ID);

final static String RECORD_FINISH_REPAIR_HISTORY = String.format(
"UPDATE %s.%s SET %s= ?, %s=false WHERE %s = ? AND %s = ?"
"UPDATE %s.%s SET %s= ?, %s=false WHERE %s = ? AND %s = ? IF EXISTS"
, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_FINISH_TS,
COL_FORCE_REPAIR, COL_REPAIR_TYPE, COL_HOST_ID);

final static String CLEAR_DELETE_HOSTS = String.format(
"UPDATE %s.%s SET %s= {} WHERE %s = ? AND %s = ?"
"UPDATE %s.%s SET %s= {} WHERE %s = ? AND %s = ? IF EXISTS"
, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_DELETE_HOSTS,
COL_REPAIR_TYPE, COL_HOST_ID);

final static String SET_FORCE_REPAIR = String.format(
"UPDATE %s.%s SET %s=true WHERE %s = ? AND %s = ?"
"UPDATE %s.%s SET %s=true WHERE %s = ? AND %s = ? IF EXISTS"
, SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_FORCE_REPAIR,
COL_REPAIR_TYPE, COL_HOST_ID);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -538,4 +538,46 @@ public void testSkipSystemTraces()
{
assertFalse(AutoRepairUtils.shouldConsiderKeyspace(Keyspace.open(SchemaConstants.TRACE_KEYSPACE_NAME)));
}

@Test
public void testAutoRepairHistoryOutOfOrderDeleteRaceCondition()
{
// Setup: Create a node that will be deleted
UUID nodeToDelete = UUID.randomUUID();
UUID votingNode = UUID.randomUUID();

// Insert an initial row for the node to be deleted
QueryProcessor.executeInternal(String.format(
"INSERT INTO %s.%s (repair_type, host_id, repair_start_ts, repair_finish_ts) VALUES ('%s', %s, 100, 200)",
SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY,
repairType, nodeToDelete));

// Verify the row exists
UntypedResultSet beforeDelete = QueryProcessor.executeInternal(String.format(
"SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s",
SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY,
repairType, nodeToDelete));
assertNotNull(beforeDelete);
assertEquals(1, beforeDelete.size());

// Simulate the race condition:
// 1. First, the delete is executed (this should create a tombstone)
AutoRepairUtils.deleteAutoRepairHistory(repairType, nodeToDelete);

// 2. Then, a vote to delete arrives after the row has already been deleted
AutoRepairUtils.addHostIdToDeleteHosts(repairType, votingNode, nodeToDelete);

// 3. Simulate other operations that can be called by other nodes in the cluster for the deleted node
AutoRepairUtils.clearDeleteHosts(repairType, nodeToDelete);
AutoRepairUtils.setForceRepair(repairType, nodeToDelete);

// Verify that the row is still deleted despite the out-of-order operations
UntypedResultSet afterRace = QueryProcessor.executeInternal(String.format(
"SELECT * FROM %s.%s WHERE repair_type = '%s' AND host_id = %s",
SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY,
repairType, nodeToDelete));
assertNotNull(afterRace);
// The row should not exist - the delete should win despite the vote arriving later
assertEquals("Row should remain deleted despite out-of-order vote", 0, afterRace.size());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ public void prepare()
QueryProcessor.executeInternal(String.format(
"TRUNCATE %s.%s",
SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_PRIORITY));
QueryProcessor.executeInternal(String.format(
"INSERT INTO %s.%s (host_id, repair_type) VALUES (%s, '%s')",
SchemaConstants.DISTRIBUTED_KEYSPACE_NAME, SystemDistributedKeyspace.AUTO_REPAIR_HISTORY,
StorageService.instance.getHostIdForEndpoint(InetAddressAndPort.getLocalHost()), repairTypeStr));
}

@Test
Expand Down