Skip to content

Commit a0f656b

Browse files
committed
add updater for EKS AMI
1 parent 1628652 commit a0f656b

File tree

8 files changed

+450
-0
lines changed

8 files changed

+450
-0
lines changed

cluster-update/LICENSE

Whitespace-only changes.

cluster-update/action.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: 'EKS managed NodeGroup AMI Updater'
2+
description: 'Updates the AMI used by managed EKS NodeGroups'
3+
author: 'Cameron Larsen @cam3ron2'
4+
branding:
5+
icon: 'terminal'
6+
color: 'blue'
7+
8+
inputs:
9+
cluster_name:
10+
description: 'name of the cluster we will be updating'
11+
required: true
12+
region:
13+
description: 'Region of k8s resources'
14+
required: true
15+
16+
17+
runs:
18+
using: "composite"
19+
steps:
20+
- uses: actions/setup-go@v3
21+
with:
22+
go-version: '1.18.1'
23+
- run: ${{ github.action_path }}/setup.sh
24+
shell: bash
25+
- run: ${{ github.action_path }}/run.sh ${{ inputs.cluster_name }} ${{ inputs.region }}
26+
id: open
27+
shell: bash

cluster-update/cmd/root.go

Lines changed: 354 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,354 @@
1+
/*
2+
Copyright © 2022 Cameron Larsen <cameron.larsen@kuali.co>
3+
4+
*/
5+
package cmd
6+
7+
import (
8+
"fmt"
9+
"log"
10+
"math/rand"
11+
"os"
12+
"time"
13+
14+
"github.com/aws/aws-sdk-go/aws"
15+
"github.com/aws/aws-sdk-go/aws/credentials"
16+
"github.com/aws/aws-sdk-go/aws/session"
17+
"github.com/aws/aws-sdk-go/service/eks"
18+
"github.com/spf13/cobra"
19+
)
20+
21+
// rootCmd represents the base command when called without any subcommands
22+
var (
23+
// init flag vars
24+
region string
25+
clusterName string
26+
quick bool
27+
envExist bool
28+
29+
rootCmd = &cobra.Command{
30+
Use: "clusterupdate",
31+
Short: "Update EKS Cluster AMI's",
32+
Long: `A CLI tool to be used with github actions to update
33+
EKS cluster AMI's. For example:
34+
clusterupdate --quick --region us-east-1 --cluster-name my-cluster`,
35+
Args: cobra.MinimumNArgs(0),
36+
Run: func(cmd *cobra.Command, args []string) {
37+
}, // do things here
38+
}
39+
)
40+
41+
// Execute adds all child commands to the root command and sets flags appropriately.
42+
// This is called by main.main(). It only needs to happen once to the rootCmd.
43+
func Execute() {
44+
err := rootCmd.Execute()
45+
if err != nil {
46+
os.Exit(1)
47+
}
48+
}
49+
50+
func init() {
51+
// Here you will define your flags and configuration settings.
52+
cobra.OnInitialize(initConfig)
53+
54+
rootCmd.PersistentFlags().StringVarP(&region, "region", "r", "us-east-1", "AWS region (required)")
55+
rootCmd.PersistentFlags().StringVarP(&clusterName, "cluster-name", "c", "", "EKS cluster name (required)")
56+
rootCmd.PersistentFlags().BoolVar(&quick, "quick", true, "Target empty nodegroups first for quick update")
57+
58+
// mark some flags as required
59+
rootCmd.MarkFlagRequired("region")
60+
rootCmd.MarkFlagRequired("cluster-name")
61+
62+
}
63+
64+
func waitUntilActive(attempts int, sleep time.Duration, clusterName string, client *eks.EKS) bool {
65+
// set max backoff of 1m
66+
var maxTime = time.Duration(60000000000)
67+
active, state := isClusterActive(clusterName, client)
68+
log.Printf(" * %s state is: '%s'", clusterName, state)
69+
if active == true {
70+
return true
71+
}
72+
73+
if attempts--; attempts > 0 {
74+
// Add some randomness to prevent creating a Thundering Herd
75+
jitter := time.Duration(rand.Int63n(int64(sleep)))
76+
sleep = sleep + jitter/2
77+
log.Printf(" * Checking again in %s", sleep)
78+
time.Sleep(sleep)
79+
sleep = sleep * 2
80+
if sleep > maxTime {
81+
sleep = maxTime
82+
}
83+
return waitUntilActive(attempts, sleep, clusterName, client)
84+
}
85+
86+
return false
87+
}
88+
89+
func waitUntilUpdateComplete(attempts int, sleep time.Duration, nodeGroup, clusterName string, client *eks.EKS) bool {
90+
// set max backoff of 1m
91+
var maxTime = time.Duration(60000000000)
92+
active, state := isUpdateComplete(nodeGroup, clusterName, client)
93+
if active == true {
94+
log.Printf(" * %s state: '%s' - resuming work.", nodeGroup, state)
95+
return true
96+
} else {
97+
log.Printf(" * %s state: '%s'", nodeGroup, state)
98+
}
99+
100+
if attempts--; attempts > 0 {
101+
// Add some randomness to prevent creating a Thundering Herd
102+
jitter := time.Duration(rand.Int63n(int64(sleep)))
103+
sleep = sleep + jitter/2
104+
time.Sleep(sleep)
105+
sleep = sleep * 2
106+
if sleep > maxTime {
107+
sleep = maxTime
108+
}
109+
return waitUntilUpdateComplete(attempts, sleep, nodeGroup, clusterName, client)
110+
}
111+
112+
return false
113+
}
114+
115+
func isEnvExist(key string) bool {
116+
// verify if env var is set
117+
if _, ok := os.LookupEnv(key); ok {
118+
return true
119+
}
120+
return false
121+
}
122+
123+
func isUpdateComplete(nodeGroup string, clusterName string, client *eks.EKS) (bool, string) {
124+
// checks update status for a specified nodegroup
125+
var (
126+
describeNodegroupOutput *eks.DescribeNodegroupOutput
127+
err error
128+
)
129+
describeNodegroupOutput, err = client.DescribeNodegroup(&eks.DescribeNodegroupInput{
130+
ClusterName: &clusterName,
131+
NodegroupName: &nodeGroup,
132+
})
133+
if err != nil {
134+
log.Fatalf("%v", err)
135+
}
136+
137+
if *describeNodegroupOutput.Nodegroup.Status == "ACTIVE" {
138+
return true, *describeNodegroupOutput.Nodegroup.Status
139+
}
140+
141+
return false, *describeNodegroupOutput.Nodegroup.Status
142+
}
143+
144+
func isClusterExist(clusterName string, client *eks.EKS) bool {
145+
// verify if cluster exists in specified region
146+
_, err := client.DescribeCluster(&eks.DescribeClusterInput{
147+
Name: &clusterName,
148+
})
149+
if err != nil {
150+
log.Fatalf("%v", err)
151+
return false
152+
}
153+
154+
return true
155+
}
156+
157+
func isClusterActive(clusterName string, client *eks.EKS) (bool, string) {
158+
// verify if cluster is ready
159+
var (
160+
clusterStatus string
161+
describeClusterOutput *eks.DescribeClusterOutput
162+
err error
163+
)
164+
describeClusterOutput, err = client.DescribeCluster(&eks.DescribeClusterInput{
165+
Name: &clusterName,
166+
})
167+
if err != nil {
168+
log.Fatalf("%v", err)
169+
return false, "ERROR"
170+
}
171+
clusterStatus = aws.StringValue(describeClusterOutput.Cluster.Status)
172+
if clusterStatus == "ACTIVE" {
173+
return true, clusterStatus
174+
}
175+
return false, clusterStatus
176+
}
177+
178+
func updateNodeGroup(nodeGroup, clusterName string, client *eks.EKS) bool {
179+
// updates a specified nodegroup
180+
_, err := client.UpdateNodegroupVersion(&eks.UpdateNodegroupVersionInput{
181+
ClusterName: &clusterName,
182+
NodegroupName: &nodeGroup,
183+
})
184+
if err != nil {
185+
log.Fatalf("%v", err)
186+
return false
187+
}
188+
189+
return true
190+
}
191+
192+
func listNodeGroups(clusterName string, client *eks.EKS, nextToken string) []string {
193+
// list nodegroups in a specified cluster
194+
var (
195+
nodeGroups []string
196+
listNodegroupsOutput *eks.ListNodegroupsOutput
197+
err error
198+
)
199+
200+
if nextToken == "" {
201+
listNodegroupsOutput, err = client.ListNodegroups(&eks.ListNodegroupsInput{
202+
ClusterName: &clusterName,
203+
MaxResults: aws.Int64(2),
204+
})
205+
} else {
206+
listNodegroupsOutput, err = client.ListNodegroups(&eks.ListNodegroupsInput{
207+
ClusterName: &clusterName,
208+
MaxResults: aws.Int64(2),
209+
NextToken: aws.String(nextToken),
210+
})
211+
}
212+
213+
if err != nil {
214+
log.Fatalf("%v", err)
215+
}
216+
217+
for _, nodeGroupName := range listNodegroupsOutput.Nodegroups {
218+
nodeGroups = append(nodeGroups, *nodeGroupName)
219+
}
220+
221+
if listNodegroupsOutput.NextToken != nil {
222+
additional := listNodeGroups(clusterName, client, aws.StringValue(listNodegroupsOutput.NextToken))
223+
for _, nodeGroup := range additional {
224+
nodeGroups = append(nodeGroups, nodeGroup)
225+
}
226+
}
227+
228+
return nodeGroups
229+
}
230+
231+
func describeNodeGroups(nodeGroups []string, client *eks.EKS) map[string]*eks.Nodegroup {
232+
// describe a list of nodegroups; returns map
233+
var (
234+
nodeGroupsMap = make(map[string]*eks.Nodegroup)
235+
describeNodegroupsOutput *eks.DescribeNodegroupOutput
236+
err error
237+
)
238+
239+
for _, nodeGroup := range nodeGroups {
240+
describeNodegroupsOutput, err = client.DescribeNodegroup(&eks.DescribeNodegroupInput{
241+
ClusterName: &clusterName,
242+
NodegroupName: &nodeGroup,
243+
})
244+
if err != nil {
245+
log.Fatalf("%v", err)
246+
}
247+
nodeGroupsMap[nodeGroup] = describeNodegroupsOutput.Nodegroup
248+
}
249+
250+
return nodeGroupsMap
251+
}
252+
253+
func startSession(config *aws.Config) *session.Session {
254+
// start a session
255+
sess, err := session.NewSession(config)
256+
if err != nil {
257+
log.Fatalf("%v", err)
258+
}
259+
return sess
260+
}
261+
262+
func initConfig() {
263+
rand.Seed(time.Now().UnixNano())
264+
265+
// get AWS credentials
266+
var (
267+
creds *credentials.Credentials
268+
err error
269+
order []string
270+
last []string
271+
)
272+
// if env vars are set use them, otherwise use profile
273+
if isEnvExist("AWS_ACCESS_KEY_ID") || isEnvExist("AWS_SECRET_ACCESS_KEY") {
274+
creds = credentials.NewEnvCredentials()
275+
} else if isEnvExist("AWS_PROFILE") {
276+
creds = credentials.NewSharedCredentials("", os.Getenv("AWS_PROFILE"))
277+
} else {
278+
err = fmt.Errorf("Either both 'AWS_ACCESS_KEY_ID' and 'AWS_SECRET_ACCESS_KEY' or 'AWS_PROFILE' are required")
279+
log.Fatalf("%v", err)
280+
}
281+
282+
// Retrieve the credentials value
283+
credValue, err := creds.Get()
284+
if err != nil {
285+
log.Fatalf("%v", err)
286+
}
287+
288+
if credValue.AccessKeyID == "" || credValue.SecretAccessKey == "" {
289+
log.Fatalf("Unable to retrieve AWS credentials")
290+
} else {
291+
log.Printf("AWS credentials retrieved")
292+
}
293+
294+
awsConfig := aws.NewConfig().
295+
WithRegion(region).
296+
WithCredentials(creds).
297+
WithDisableSSL(false).
298+
WithMaxRetries(20)
299+
300+
session := startSession(awsConfig)
301+
eksClient := eks.New(session)
302+
if eksClient != nil {
303+
log.Printf("EKS client initialized")
304+
}
305+
306+
if isClusterExist(clusterName, eksClient) {
307+
log.Printf("Located cluster %s in region %s", clusterName, region)
308+
} else {
309+
log.Fatalf("Unable to locate cluser %s in region %s", clusterName, region)
310+
}
311+
312+
log.Printf("Dumping nodegroups in cluster %s", clusterName)
313+
314+
nodeGroups := listNodeGroups(clusterName, eksClient, "")
315+
316+
if quick == true {
317+
nodeGroupsDetail := describeNodeGroups(nodeGroups, eksClient)
318+
319+
for _, ng := range nodeGroupsDetail {
320+
if *ng.ScalingConfig.DesiredSize == 0 {
321+
log.Printf("NodeGroup %v has 0 nodes, prioritizing for upgrade", *ng.NodegroupName)
322+
order = append(order, *ng.NodegroupName)
323+
} else {
324+
last = append(last, *ng.NodegroupName)
325+
}
326+
}
327+
328+
for _, i := range last {
329+
order = append(order, i)
330+
}
331+
} else {
332+
order = nodeGroups
333+
}
334+
335+
log.Printf("Pausing until %v state is 'ACTIVE'", clusterName)
336+
waitUntilActive(60, time.Second, clusterName, eksClient)
337+
338+
// begin updates
339+
for _, nodeGroup := range order {
340+
// wait for cluster to be ready
341+
log.Printf("Pausing until NodeGroup %v state is 'ACTIVE'", nodeGroup)
342+
waitUntilUpdateComplete(60, time.Second, nodeGroup, clusterName, eksClient)
343+
344+
if updateNodeGroup(nodeGroup, clusterName, eksClient) {
345+
log.Printf("Updated nodegroup %s", nodeGroup)
346+
}
347+
348+
// wait for cluster to be ready
349+
log.Printf("Pausing until NodeGroup %v state is 'ACTIVE'", nodeGroup)
350+
waitUntilUpdateComplete(60, time.Second, nodeGroup, clusterName, eksClient)
351+
}
352+
353+
log.Printf("Updates complete!")
354+
}

cluster-update/go.mod

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
module clusterupdate
2+
3+
go 1.18
4+
5+
require (
6+
github.com/aws/aws-sdk-go v1.44.10
7+
github.com/spf13/cobra v1.4.0
8+
)
9+
10+
require (
11+
github.com/inconshreveable/mousetrap v1.0.0 // indirect
12+
github.com/jmespath/go-jmespath v0.4.0 // indirect
13+
github.com/spf13/pflag v1.0.5 // indirect
14+
)

0 commit comments

Comments
 (0)