Skip to content

Commit 4e42650

Browse files
committed
UPSTREAM: 8520: update node info processors to include unschedulable nodes (kubernetes#8520)
* pass allNodes to node info provider Process This change passes all the nodes to the mixed node info provider processor that is called from `RunOnce`. The change is to allow unschedulable and unready nodes to be processed as bad canidates during the node info template generation. The Process function has been updated to separate nodes into good and bad candidates to make the filtering match the original intent. * add --scale-from-unschedulable flag This change introduces a flag which will instruct the CA to ignore a node's `.spec.unschedulable` field when creating node template for considering which node group to scale.
1 parent c53c715 commit 4e42650

File tree

7 files changed

+38
-11
lines changed

7 files changed

+38
-11
lines changed

cluster-autoscaler/FAQ.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,7 @@ The following startup parameters are supported for cluster autoscaler:
10281028
| `debugging-snapshot-enabled` | Whether the debugging snapshot of cluster autoscaler feature is enabled. | false
10291029
| `node-delete-delay-after-taint` | How long to wait before deleting a node after tainting it. | 5 seconds
10301030
| `enable-provisioning-requests` | Whether the clusterautoscaler will be handling the ProvisioningRequest CRs. | false
1031+
| `scale-from-unschedulable` | Should CA ignore a node's .spec.unschedulable field when creating a node template for considering to scale a node group. | false |
10311032

10321033
# Troubleshooting
10331034

cluster-autoscaler/config/autoscaling_options.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ type AutoscalingOptions struct {
234234
BalancingLabels []string
235235
// AWSUseStaticInstanceList tells if AWS cloud provider use static instance type list or dynamically fetch from remote APIs.
236236
AWSUseStaticInstanceList bool
237+
// ScaleFromUnschedulable tells the autoscaler to ignore a node's .spec.unschedulable field when creating a node template.
238+
// Specifically, this will cause the autoscaler to set the node template's .spec.unschedulable field to false.
239+
ScaleFromUnschedulable bool
237240
// GCEOptions contain autoscaling options specific to GCE cloud provider.
238241
GCEOptions GCEOptions
239242
// KubeClientOpts specify options for kube client

cluster-autoscaler/core/static_autoscaler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
358358
return typedErr.AddPrefix("failed to initialize RemainingPdbTracker: ")
359359
}
360360

361-
nodeInfosForGroups, autoscalerError := a.processors.TemplateNodeInfoProvider.Process(autoscalingContext, readyNodes, daemonsets, a.taintConfig, currentTime)
361+
nodeInfosForGroups, autoscalerError := a.processors.TemplateNodeInfoProvider.Process(autoscalingContext, allNodes, daemonsets, a.taintConfig, currentTime)
362362
if autoscalerError != nil {
363363
klog.Errorf("Failed to get node infos for groups: %v", autoscalerError)
364364
return autoscalerError.AddPrefix("failed to build node infos for node groups: ")

cluster-autoscaler/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ var (
225225
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
226226
balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use for comparing if two node groups are similar, rather than the built in heuristics. Setting this flag disables all other comparison logic, and cannot be combined with --balancing-ignore-label.")
227227
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
228+
scaleFromUnschedulable = flag.Bool("scale-from-unschedulable", false, "Specifies that the CA should ignore a node's .spec.unschedulable field in node templates when considering to scale a node group.")
228229

229230
// GCE specific flags
230231
concurrentGceRefreshes = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.")
@@ -418,6 +419,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
418419
},
419420
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
420421
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
422+
ScaleFromUnschedulable: *scaleFromUnschedulable,
421423
GCEOptions: config.GCEOptions{
422424
ConcurrentRefreshes: *concurrentGceRefreshes,
423425
MigInstancesMinRefreshWaitTime: *gceMigInstancesMinRefreshWaitTime,

cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,18 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext,
7878
result := make(map[string]*framework.NodeInfo)
7979
seenGroups := make(map[string]bool)
8080

81+
// sort nodes into those good and bad candidates for templates. the bad candidates will be processed
82+
// at the end of this function as a last resort for a node info template.
83+
goodCandidates := make([]*apiv1.Node, 0)
84+
badCandidates := make([]*apiv1.Node, 0)
85+
for _, node := range nodes {
86+
if isNodeGoodTemplateCandidate(node, now) {
87+
goodCandidates = append(goodCandidates, node)
88+
} else {
89+
badCandidates = append(badCandidates, node)
90+
}
91+
}
92+
8193
// processNode returns information whether the nodeTemplate was generated and if there was an error.
8294
processNode := func(node *apiv1.Node) (bool, string, caerror.AutoscalerError) {
8395
nodeGroup, err := ctx.CloudProvider.NodeGroupForNode(node)
@@ -103,11 +115,7 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext,
103115
return false, "", nil
104116
}
105117

106-
for _, node := range nodes {
107-
// Broken nodes might have some stuff missing. Skipping.
108-
if !isNodeGoodTemplateCandidate(node, now) {
109-
continue
110-
}
118+
for _, node := range goodCandidates {
111119
added, id, typedErr := processNode(node)
112120
if typedErr != nil {
113121
return map[string]*framework.NodeInfo{}, typedErr
@@ -158,11 +166,7 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext,
158166
}
159167

160168
// Last resort - unready/unschedulable nodes.
161-
for _, node := range nodes {
162-
// Allowing broken nodes
163-
if isNodeGoodTemplateCandidate(node, now) {
164-
continue
165-
}
169+
for _, node := range badCandidates {
166170
added, _, typedErr := processNode(node)
167171
if typedErr != nil {
168172
return map[string]*framework.NodeInfo{}, typedErr

cluster-autoscaler/simulator/node_info_utils.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,12 @@ func createSanitizedNode(node *apiv1.Node, newName string, taintConfig *taints.T
112112
}
113113
newNode.Labels[apiv1.LabelHostname] = newName
114114

115+
if taintConfig != nil {
116+
if taintConfig.ShouldScaleFromUnschedulable() {
117+
newNode.Spec.Unschedulable = false
118+
}
119+
}
120+
115121
if taintConfig != nil {
116122
newNode.Spec.Taints = taints.SanitizeTaints(newNode.Spec.Taints, *taintConfig)
117123
}

cluster-autoscaler/utils/taints/taints.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ type TaintConfig struct {
9696
startupTaintPrefixes []string
9797
statusTaintPrefixes []string
9898
explicitlyReportedTaints TaintKeySet
99+
// The scaleFromUnschedulable field helps to inform the CA when
100+
// to ignore .spec.unschedulable for a node. It is being added to this
101+
// struct for convenience as it will be used in similar places that check
102+
// for taints to ignore.
103+
scaleFromUnschedulable bool
99104
}
100105

101106
// NewTaintConfig returns the taint config extracted from options
@@ -127,6 +132,7 @@ func NewTaintConfig(opts config.AutoscalingOptions) TaintConfig {
127132
startupTaintPrefixes: []string{IgnoreTaintPrefix, StartupTaintPrefix},
128133
statusTaintPrefixes: []string{StatusTaintPrefix},
129134
explicitlyReportedTaints: explicitlyReportedTaints,
135+
scaleFromUnschedulable: opts.ScaleFromUnschedulable,
130136
}
131137
}
132138

@@ -146,6 +152,11 @@ func (tc TaintConfig) IsStatusTaint(taint string) bool {
146152
return matchesAnyPrefix(tc.statusTaintPrefixes, taint)
147153
}
148154

155+
// ShouldScaleFromUnschedulable returns whether a node's .spec.unschedulable field should be ignored.
156+
func (tc TaintConfig) ShouldScaleFromUnschedulable() bool {
157+
return tc.scaleFromUnschedulable
158+
}
159+
149160
func (tc TaintConfig) isExplicitlyReportedTaint(taint string) bool {
150161
_, ok := tc.explicitlyReportedTaints[taint]
151162
return ok

0 commit comments

Comments
 (0)