Skip to content

Commit 3ab7bb9

Browse files
committed
GODRIVER-3638 Prohibit using failpoints on sharded topologies.
1 parent 5b79d94 commit 3ab7bb9

File tree

8 files changed

+93
-37
lines changed

8 files changed

+93
-37
lines changed

internal/integration/crud_prose_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,10 @@ func TestClientBulkWriteProse(t *testing.T) {
499499
assert.Equal(mt, 1, opsCnt[1], "expected %d secondEvent.command.ops, got: %d", 1, opsCnt[1])
500500
})
501501

502-
mt.Run("5. MongoClient.bulkWrite collects WriteConcernErrors across batches", func(mt *mtest.T) {
502+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
503+
// topologies. Allow running on sharded topologies once that is fixed.
504+
noShardedOpts := mtest.NewOptions().Topologies(mtest.Single, mtest.ReplicaSet, mtest.LoadBalanced)
505+
mt.RunOpts("5. MongoClient.bulkWrite collects WriteConcernErrors across batches", noShardedOpts, func(mt *mtest.T) {
503506
var eventCnt int
504507
monitor := &event.CommandMonitor{
505508
Started: func(_ context.Context, e *event.CommandStartedEvent) {
@@ -715,7 +718,9 @@ func TestClientBulkWriteProse(t *testing.T) {
715718
assert.Equal(mt, 1, getMoreCalled, "expected %d getMore call, got: %d", 1, getMoreCalled)
716719
})
717720

718-
mt.Run("9. MongoClient.bulkWrite handles a getMore error", func(mt *mtest.T) {
721+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
722+
// topologies. Allow running on sharded topologies once that is fixed.
723+
mt.RunOpts("9. MongoClient.bulkWrite handles a getMore error", noShardedOpts, func(mt *mtest.T) {
719724
var getMoreCalled int
720725
var killCursorsCalled int
721726
monitor := &event.CommandMonitor{

internal/integration/csot_prose_test.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,10 @@ func TestCSOTProse_GridFS(t *testing.T) {
238238
mt := mtest.New(t, mtest.NewOptions().CreateClient(false))
239239

240240
mt.RunOpts("6. gridfs - upload", mtest.NewOptions().MinServerVersion("4.4"), func(mt *mtest.T) {
241-
mt.Run("uploads via openUploadStream can be timed out", func(mt *mtest.T) {
241+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
242+
// topologies. Allow running on sharded topologies once that is fixed.
243+
noShardedOpts := mtest.NewOptions().Topologies(mtest.Single, mtest.ReplicaSet, mtest.LoadBalanced)
244+
mt.RunOpts("uploads via openUploadStream can be timed out", noShardedOpts, func(mt *mtest.T) {
242245
// Drop and re-create the db.fs.files and db.fs.chunks collections.
243246
err := mt.Client.Database("db").Collection("fs.files").Drop(context.Background())
244247
assert.NoError(mt, err, "failed to drop files")
@@ -298,7 +301,9 @@ func TestCSOTProse_GridFS(t *testing.T) {
298301
assert.Error(t, err, context.DeadlineExceeded)
299302
})
300303

301-
mt.Run("Aborting an upload stream can be timed out", func(mt *mtest.T) {
304+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
305+
// topologies. Allow running on sharded topologies once that is fixed.
306+
mt.RunOpts("Aborting an upload stream can be timed out", noShardedOpts, func(mt *mtest.T) {
302307
// Drop and re-create the db.fs.files and db.fs.chunks collections.
303308
err := mt.Client.Database("db").Collection("fs.files").Drop(context.Background())
304309
assert.NoError(mt, err, "failed to drop files")
@@ -414,7 +419,12 @@ func TestCSOTProse_GridFS(t *testing.T) {
414419
})
415420

416421
const test62 = "6.2 gridfs - upload with operation-level timeout"
417-
mt.RunOpts(test62, mtest.NewOptions().MinServerVersion("4.4"), func(mt *mtest.T) {
422+
mtOpts := mtest.NewOptions().
423+
MinServerVersion("4.4").
424+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
425+
// topologies. Allow running on sharded topologies once that is fixed.
426+
Topologies(mtest.Single, mtest.ReplicaSet, mtest.LoadBalanced)
427+
mt.RunOpts(test62, mtOpts, func(mt *mtest.T) {
418428
// Drop and re-create the db.fs.files and db.fs.chunks collections.
419429
err := mt.Client.Database("db").Collection("fs.files").Drop(context.Background())
420430
assert.NoError(mt, err, "failed to drop files")

internal/integration/mtest/mongotest.go

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -60,24 +60,25 @@ type T struct {
6060
*testing.T
6161

6262
// members for only this T instance
63-
createClient *bool
64-
createCollection *bool
65-
runOn []RunOnBlock
66-
mockDeployment *drivertest.MockDeployment // nil if the test is not being run against a mock
67-
mockResponses []bson.D
68-
createdColls []*Collection // collections created in this test
69-
proxyDialer *proxyDialer
70-
dbName, collName string
71-
failPointNames []string
72-
minServerVersion string
73-
maxServerVersion string
74-
validTopologies []TopologyKind
75-
auth *bool
76-
enterprise *bool
77-
dataLake *bool
78-
ssl *bool
79-
collCreateOpts *options.CreateCollectionOptionsBuilder
80-
requireAPIVersion *bool
63+
createClient *bool
64+
createCollection *bool
65+
runOn []RunOnBlock
66+
mockDeployment *drivertest.MockDeployment // nil if the test is not being run against a mock
67+
mockResponses []bson.D
68+
createdColls []*Collection // collections created in this test
69+
proxyDialer *proxyDialer
70+
dbName, collName string
71+
failPointNames []string
72+
minServerVersion string
73+
maxServerVersion string
74+
validTopologies []TopologyKind
75+
auth *bool
76+
enterprise *bool
77+
dataLake *bool
78+
ssl *bool
79+
collCreateOpts *options.CreateCollectionOptionsBuilder
80+
requireAPIVersion *bool
81+
allowFailPointsOnSharded bool
8182

8283
// options copied to sub-tests
8384
clientType ClientType
@@ -501,6 +502,21 @@ func (t *T) ClearCollections() {
501502
// SetFailPoint sets a fail point for the client associated with T. Commands to create the failpoint will appear
502503
// in command monitoring channels. The fail point will automatically be disabled after this test has run.
503504
func (t *T) SetFailPoint(fp failpoint.FailPoint) {
505+
// Do not allow failpoints to be used on sharded topologies unless
506+
// specifically configured to allow it.
507+
//
508+
// On sharded topologies, failpoints are applied to only a single mongoS. If
509+
// the driver is connected to multiple mongoS instances, there's a
510+
// possibility a different mongoS will be selected for a subsequent command.
511+
// In that case, the failpoint is effectively ignored, leading to a test
512+
// failure that is extremely difficult to diagnoise.
513+
//
514+
// TODO(GODRIVER-3328): Remove this once we set failpoints on every mongoS
515+
// in sharded topologies.
516+
if testContext.topoKind == Sharded && !t.allowFailPointsOnSharded {
517+
t.Fatalf("cannot use failpoints with sharded topologies unless AllowFailPointsOnSharded is set")
518+
}
519+
504520
// ensure mode fields are int32
505521
if modeMap, ok := fp.Mode.(map[string]any); ok {
506522
var key string

internal/integration/mtest/options.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,3 +281,18 @@ func (op *Options) RequireAPIVersion(rav bool) *Options {
281281
})
282282
return op
283283
}
284+
285+
// AllowFailPointsOnSharded bypasses the check for failpoints used on sharded
286+
// topologies.
287+
//
288+
// Failpoints are generally unreliable on sharded topologies, but can be used if
289+
// the failpoint is explicitly applied to every mongoS node in the cluster.
290+
//
291+
// TODO(GODRIVER-3328): Remove this option once we set failpoints on every
292+
// mongoS in sharded topologies.
293+
func (op *Options) AllowFailPointsOnSharded() *Options {
294+
op.optFuncs = append(op.optFuncs, func(t *T) {
295+
t.allowFailPointsOnSharded = true
296+
})
297+
return op
298+
}

internal/integration/retryable_reads_prose_test.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,17 @@ func TestRetryableReadsProse(t *testing.T) {
3434
SetPoolMonitor(tpm.PoolMonitor).SetHeartbeatInterval(500 * time.Millisecond).
3535
SetHosts(hosts[:1])
3636

37-
mtOpts := mtest.NewOptions().ClientOptions(clientOpts).MinServerVersion("4.3")
38-
mt := mtest.New(t, mtOpts)
39-
40-
mt.Run("PoolClearedError retryability", func(mt *mtest.T) {
41-
if mtest.ClusterTopologyKind() == mtest.LoadBalanced {
42-
mt.Skip("skipping as load balanced topology has different pool clearing behavior")
43-
}
44-
37+
mt := mtest.New(t, mtest.NewOptions().ClientOptions(clientOpts))
38+
39+
mtOpts := mtest.NewOptions().
40+
MinServerVersion("4.3").
41+
// Load-balanced topologies have a different behavior for clearing the
42+
// pool, so don't run the test on load-balanced topologies
43+
//
44+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
45+
// topologies. Allow running on sharded topologies once that is fixed.
46+
Topologies(mtest.Single, mtest.ReplicaSet)
47+
mt.RunOpts("PoolClearedError retryability", mtOpts, func(mt *mtest.T) {
4548
// Insert a document to test collection.
4649
_, err := mt.Coll.InsertOne(context.Background(), bson.D{{"x", 1}})
4750
assert.Nil(mt, err, "InsertOne error: %v", err)
@@ -106,7 +109,7 @@ func TestRetryableReadsProse(t *testing.T) {
106109
}
107110
})
108111

109-
mtOpts = mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.2")
112+
mtOpts = mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.2").AllowFailPointsOnSharded()
110113
mt.RunOpts("retrying in sharded cluster", mtOpts, func(mt *mtest.T) {
111114
tests := []struct {
112115
name string

internal/integration/retryable_writes_prose_test.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,12 @@ func TestRetryableWritesProse(t *testing.T) {
155155
SetPoolMonitor(tpm.PoolMonitor).SetHeartbeatInterval(500 * time.Millisecond).
156156
SetHosts(hosts[:1])
157157

158-
mtPceOpts := mtest.NewOptions().ClientOptions(pceOpts).MinServerVersion("4.3").
159-
Topologies(mtest.ReplicaSet, mtest.Sharded)
158+
mtPceOpts := mtest.NewOptions().
159+
ClientOptions(pceOpts).
160+
MinServerVersion("4.3").
161+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
162+
// topologies. Allow running on sharded topologies once that is fixed.
163+
Topologies(mtest.ReplicaSet)
160164
mt.RunOpts("PoolClearedError retryability", mtPceOpts, func(mt *mtest.T) {
161165
// Force Find to block for 1 second once.
162166
mt.SetFailPoint(failpoint.FailPoint{
@@ -287,7 +291,7 @@ func TestRetryableWritesProse(t *testing.T) {
287291
require.True(mt, err.(mongo.WriteException).HasErrorCode(int(shutdownInProgressErrorCode)))
288292
})
289293

290-
mtOpts = mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.2")
294+
mtOpts = mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.2").AllowFailPointsOnSharded()
291295
mt.RunOpts("retrying in sharded cluster", mtOpts, func(mt *mtest.T) {
292296
tests := []struct {
293297
name string

internal/integration/sdam_prose_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,10 @@ func TestSDAMProse(t *testing.T) {
9898
SetAppName("streamingRttTest")
9999
mtOpts := mtest.NewOptions().
100100
MinServerVersion("4.4").
101-
ClientOptions(clientOpts)
101+
ClientOptions(clientOpts).
102+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
103+
// clusters. Remove this exclusion once we fix that.
104+
Topologies(mtest.Single, mtest.ReplicaSet, mtest.LoadBalanced)
102105
mt.RunOpts("rtt is continuously updated", mtOpts, func(mt *mtest.T) {
103106
// Test that the RTT monitor updates the RTT for server descriptions.
104107

internal/integration/server_selection_prose_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func TestServerSelectionProse(t *testing.T) {
112112

113113
mt := mtest.New(t, mtest.NewOptions().CreateClient(false))
114114

115-
mtOpts := mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.9")
115+
mtOpts := mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.9").AllowFailPointsOnSharded()
116116
mt.RunOpts("operationCount-based selection within latency window, with failpoint", mtOpts, func(mt *mtest.T) {
117117
_, err := mt.Coll.InsertOne(context.Background(), bson.D{})
118118
require.NoError(mt, err, "InsertOne() error")

0 commit comments

Comments
 (0)