Skip to content

Commit a8e1967

Browse files
authored
[Feature] Add ResignLeadership to plan execution (#687)
1 parent cb1f17a commit a8e1967

File tree

7 files changed

+164
-1
lines changed

7 files changed

+164
-1
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
- Add support for spec.ClusterDomain to be able to use FQDN in ArangoDB cluster communication
55
- Add Version Check feature with extended Upgrade checks
66
- Fix Upgrade failures recovery
7+
- Add ResignLeadership action before Upgrade, Restart and Shutdown actions
78

89
## [1.1.3](https://github.com/arangodb/kube-arangodb/tree/1.1.3) (2020-12-16)
910
- Add v2alpha1 API for ArangoDeployment and ArangoDeploymentReplication

pkg/apis/deployment/v1/plan.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ const (
5151
ActionTypeCleanOutMember ActionType = "CleanOutMember"
5252
// ActionTypeShutdownMember causes a member to be shutdown and removed from the cluster.
5353
ActionTypeShutdownMember ActionType = "ShutdownMember"
54+
// ActionTypeResignLeadership causes a member to resign leadership.
55+
ActionTypeResignLeadership ActionType = "ResignLeadership"
5456
// ActionTypeRotateMember causes a member to be shutdown and have it's pod removed.
5557
ActionTypeRotateMember ActionType = "RotateMember"
5658
// ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover.

pkg/apis/deployment/v2alpha1/plan.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ const (
5151
ActionTypeCleanOutMember ActionType = "CleanOutMember"
5252
// ActionTypeShutdownMember causes a member to be shutdown and removed from the cluster.
5353
ActionTypeShutdownMember ActionType = "ShutdownMember"
54+
// ActionTypeResignLeadership causes a member to resign leadership.
55+
ActionTypeResignLeadership ActionType = "ResignLeadership"
5456
// ActionTypeRotateMember causes a member to be shutdown and have it's pod removed.
5557
ActionTypeRotateMember ActionType = "RotateMember"
5658
// ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover.
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2021 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
// Author Adam Janikowski
21+
//
22+
23+
package reconcile
24+
25+
import (
26+
"context"
27+
28+
"github.com/arangodb/go-driver"
29+
"github.com/arangodb/kube-arangodb/pkg/util/arangod"
30+
31+
"github.com/arangodb/kube-arangodb/pkg/util/errors"
32+
33+
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
34+
"github.com/rs/zerolog"
35+
)
36+
37+
func init() {
38+
registerAction(api.ActionTypeResignLeadership, newResignLeadershipAction)
39+
}
40+
41+
// newResignLeadershipAction creates a new Action that implements the given
42+
// planned ResignLeadership action.
43+
func newResignLeadershipAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action {
44+
a := &actionResignLeadership{}
45+
46+
a.actionImpl = newActionImplDefRef(log, action, actionCtx, shutdownMemberTimeout)
47+
48+
return a
49+
}
50+
51+
// actionResignLeadership implements an ResignLeadershipAction.
52+
type actionResignLeadership struct {
53+
actionImpl
54+
}
55+
56+
// Start performs the start of the ReasignLeadership process on DBServer.
57+
func (a *actionResignLeadership) Start(ctx context.Context) (bool, error) {
58+
log := a.log
59+
group := a.action.Group
60+
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
61+
if !ok {
62+
log.Error().Msg("No such member")
63+
return true, nil
64+
}
65+
66+
if a.actionCtx.GetSpec().Mode.Get() != api.DeploymentModeCluster {
67+
log.Debug().Msg("Resign only allowed in cluster mode")
68+
return true, nil
69+
}
70+
71+
client, err := a.actionCtx.GetDatabaseClient(ctx)
72+
if err != nil {
73+
log.Error().Err(err).Msgf("Unable to get client")
74+
return true, errors.WithStack(err)
75+
}
76+
77+
switch group {
78+
case api.ServerGroupDBServers:
79+
cluster, err := client.Cluster(ctx)
80+
if err != nil {
81+
log.Error().Err(err).Msgf("Unable to get cluster client")
82+
return true, errors.WithStack(err)
83+
}
84+
85+
var jobID string
86+
jobCtx := driver.WithJobIDResponse(ctx, &jobID)
87+
log.Debug().Msg("Temporary shutdown, resign leadership")
88+
if err := cluster.ResignServer(jobCtx, m.ID); err != nil {
89+
log.Debug().Err(err).Msg("Failed to resign server")
90+
return true, errors.WithStack(err)
91+
}
92+
93+
m.CleanoutJobID = jobID
94+
95+
if err := a.actionCtx.UpdateMember(m); err != nil {
96+
return true, errors.WithStack(err)
97+
}
98+
99+
return false, nil
100+
default:
101+
return true, nil
102+
}
103+
}
104+
105+
// CheckProgress checks if Job is completed.
106+
func (a *actionResignLeadership) CheckProgress(ctx context.Context) (bool, bool, error) {
107+
log := a.log
108+
109+
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
110+
if !ok {
111+
log.Error().Msg("No such member")
112+
return true, false, nil
113+
}
114+
115+
agency, err := a.actionCtx.GetAgency(ctx)
116+
if err != nil {
117+
log.Debug().Err(err).Msg("Failed to create agency client")
118+
return false, false, errors.WithStack(err)
119+
}
120+
121+
c, err := a.actionCtx.GetDatabaseClient(ctx)
122+
if err != nil {
123+
log.Debug().Err(err).Msg("Failed to create member client")
124+
return false, false, errors.WithStack(err)
125+
}
126+
127+
jobStatus, err := arangod.CleanoutServerJobStatus(ctx, m.CleanoutJobID, c, agency)
128+
if err != nil {
129+
if driver.IsNotFound(err) {
130+
log.Debug().Err(err).Msg("Job not found, but proceeding")
131+
return true, false, nil
132+
}
133+
log.Debug().Err(err).Msg("Failed to fetch job status")
134+
return false, false, errors.WithStack(err)
135+
}
136+
137+
if jobStatus.IsFailed() {
138+
m.CleanoutJobID = ""
139+
if err := a.actionCtx.UpdateMember(m); err != nil {
140+
return false, false, errors.WithStack(err)
141+
}
142+
log.Error().Msg("Resign server job failed")
143+
return true, false, nil
144+
}
145+
146+
if jobStatus.IsFinished() {
147+
m.CleanoutJobID = ""
148+
if err := a.actionCtx.UpdateMember(m); err != nil {
149+
return false, false, errors.WithStack(err)
150+
}
151+
return true, false, nil
152+
}
153+
154+
return false, false, nil
155+
}

pkg/deployment/reconcile/plan_builder.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ func createRotateMemberPlan(log zerolog.Logger, member api.MemberStatus,
300300
Msg("Creating rotation plan")
301301
plan := api.Plan{
302302
api.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, group, member.ID, "Remove server keyfile and enforce renewal/recreation"),
303+
api.NewAction(api.ActionTypeResignLeadership, group, member.ID, reason),
303304
api.NewAction(api.ActionTypeRotateMember, group, member.ID, reason),
304305
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID),
305306
api.NewAction(api.ActionTypeWaitForMemberInSync, group, member.ID),

pkg/deployment/reconcile/plan_builder_rotate_upgrade.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,8 +362,9 @@ func createUpgradeMemberPlan(log zerolog.Logger, member api.MemberStatus,
362362
)
363363
}
364364
plan = append(plan,
365+
api.NewAction(api.ActionTypeResignLeadership, group, member.ID, reason),
365366
api.NewAction(upgradeAction, group, member.ID, reason),
366367
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID),
367368
)
368-
return plan
369+
return withMaintenance(plan...)
369370
}

pkg/deployment/reconcile/plan_builder_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,7 @@ func TestCreatePlan(t *testing.T) {
720720
},
721721
ExpectedPlan: []api.Action{
722722
api.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, api.ServerGroupAgents, "", "Remove server keyfile and enforce renewal/recreation"),
723+
api.NewAction(api.ActionTypeResignLeadership, api.ServerGroupAgents, ""),
723724
api.NewAction(api.ActionTypeRotateMember, api.ServerGroupAgents, ""),
724725
api.NewAction(api.ActionTypeWaitForMemberUp, api.ServerGroupAgents, ""),
725726
api.NewAction(api.ActionTypeWaitForMemberInSync, api.ServerGroupAgents, ""),

0 commit comments

Comments
 (0)