@@ -2,7 +2,9 @@ package operator
2
2
3
3
import (
4
4
"context"
5
+ goerrors "errors"
5
6
"fmt"
7
+ "k8s.io/apimachinery/pkg/api/errors"
6
8
"slices"
7
9
"sort"
8
10
"strings"
@@ -11,7 +13,6 @@ import (
11
13
"github.com/hashicorp/go-multierror"
12
14
"go.uber.org/zap"
13
15
"golang.org/x/xerrors"
14
- "k8s.io/apimachinery/pkg/api/errors"
15
16
"k8s.io/apimachinery/pkg/runtime"
16
17
"k8s.io/apimachinery/pkg/util/intstr"
17
18
"k8s.io/utils/ptr"
@@ -1611,6 +1612,7 @@ func (r *ShardedClusterReconcileHelper) cleanOpsManagerState(ctx context.Context
1611
1612
}
1612
1613
1613
1614
logDiffOfProcessNames (processNames , r .getHealthyProcessNames (), log .With ("ctx" , "cleanOpsManagerState" ))
1615
+ // we're onDelete, we cannot requeue, so we need to wait
1614
1616
if err := om .WaitForReadyState (conn , r .getHealthyProcessNames (), false , log ); err != nil {
1615
1617
return err
1616
1618
}
@@ -1849,13 +1851,12 @@ func (r *ShardedClusterReconcileHelper) updateOmDeploymentShardedCluster(ctx con
1849
1851
1850
1852
healthyProcessesToWaitForReadyState := r .getHealthyProcessNamesToWaitForReadyState (conn , log )
1851
1853
logDiffOfProcessNames (processNames , healthyProcessesToWaitForReadyState , log .With ("ctx" , "updateOmDeploymentShardedCluster" ))
1852
- if err = om .WaitForReadyState (conn , healthyProcessesToWaitForReadyState , isRecovering , log ); err != nil {
1853
- if ! isRecovering {
1854
- if shardsRemoving {
1855
- return workflow .Pending ("automation agents haven't reached READY state: shards removal in progress: %v" , err )
1856
- }
1857
- return workflow .Failed (err )
1854
+
1855
+ if ! isRecovering {
1856
+ if workflowStatus := om .CheckForReadyState (conn , healthyProcessesToWaitForReadyState , log ); ! workflowStatus .IsOK () {
1857
+ return workflowStatus
1858
1858
}
1859
+ } else {
1859
1860
logWarnIgnoredDueToRecovery (log , err )
1860
1861
}
1861
1862
@@ -1873,12 +1874,16 @@ func (r *ShardedClusterReconcileHelper) updateOmDeploymentShardedCluster(ctx con
1873
1874
1874
1875
healthyProcessesToWaitForReadyState := r .getHealthyProcessNamesToWaitForReadyState (conn , log )
1875
1876
logDiffOfProcessNames (processNames , healthyProcessesToWaitForReadyState , log .With ("ctx" , "shardsRemoving" ))
1876
- if err = om .WaitForReadyState (conn , healthyProcessesToWaitForReadyState , isRecovering , log ); err != nil {
1877
- if ! isRecovering {
1878
- return workflow .Failed (xerrors .Errorf ("automation agents haven't reached READY state while cleaning replica set and processes: %w" , err ))
1879
- }
1877
+ if isRecovering {
1880
1878
logWarnIgnoredDueToRecovery (log , err )
1881
1879
}
1880
+ if err = om .CheckForReadyStateReturningError (conn , healthyProcessesToWaitForReadyState , log ); err != nil {
1881
+ pendingErr := om.PendingErr {}
1882
+ if ok := goerrors .As (err , & pendingErr ); ok {
1883
+ return workflow .Pending (pendingErr .Error ())
1884
+ }
1885
+ return workflow .Failed (err )
1886
+ }
1882
1887
}
1883
1888
1884
1889
currentHosts := r .getAllHostnames (false )
@@ -2042,8 +2047,13 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c
2042
2047
2043
2048
healthyProcessesToWaitForReadyState = r .getHealthyProcessNamesToWaitForReadyState (conn , log )
2044
2049
logDiffOfProcessNames (opts .processNames , healthyProcessesToWaitForReadyState , log .With ("ctx" , "publishDeployment" ))
2045
- if err := om .WaitForReadyState (conn , healthyProcessesToWaitForReadyState , isRecovering , log ); err != nil {
2046
- return nil , shardsRemoving , workflow .Failed (err )
2050
+
2051
+ if ! isRecovering {
2052
+ if workflowStatus := om .CheckForReadyState (conn , healthyProcessesToWaitForReadyState , log ); workflowStatus != workflow .OK () {
2053
+ return nil , shardsRemoving , workflowStatus
2054
+ }
2055
+ } else {
2056
+ log .Warnf ("Ignoring checking for ready state due to recovering" )
2047
2057
}
2048
2058
2049
2059
if additionalReconciliationRequired {
0 commit comments