Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cluster-autoscaler/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,7 @@ The following startup parameters are supported for cluster autoscaler:
| `grpc-expander-cert` | Path to cert used by gRPC server over TLS | |
| `grpc-expander-url` | URL to reach gRPC expander server. | |
| `ignore-daemonsets-utilization` | Should CA ignore DaemonSet pods when calculating resource utilization for scaling down | |
| `ignore-node-unschedulable` | Should CA ignore a node's .spec.unschedulable field when creating a node template for considering to scale a node group. | false |
| `ignore-mirror-pods-utilization` | Should CA ignore Mirror pods when calculating resource utilization for scaling down | |
| `ignore-taint` | Specifies a taint to ignore in node templates when considering to scale a node group (Deprecated, use startup-taints instead) | [] |
| `initial-node-group-backoff-duration` | initialNodeGroupBackoffDuration is the duration of first backoff after a new node failed to start. | 5m0s |
Expand Down
3 changes: 3 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,9 @@ type AutoscalingOptions struct {
BalancingLabels []string
// AWSUseStaticInstanceList tells if AWS cloud provider use static instance type list or dynamically fetch from remote APIs.
AWSUseStaticInstanceList bool
// IgnoreNodeUnschedulable tells the autoscaler to ignore a node's .spec.unschedulable field when creating a node template.
// Specifically, this will cause the autoscaler to set the node template's .spec.unschedulable field to false.
IgnoreNodeUnschedulable bool
// GCEOptions contain autoscaling options specific to GCE cloud provider.
GCEOptions GCEOptions
// KubeClientOpts specify options for kube client
Expand Down
2 changes: 2 additions & 0 deletions cluster-autoscaler/config/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ var (
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use for comparing if two node groups are similar, rather than the built in heuristics. Setting this flag disables all other comparison logic, and cannot be combined with --balancing-ignore-label.")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
ignoreNodeUnschedulable = flag.Bool("ignore-node-unschedulable", false, "Specifies that the CA should ignore a node's .spec.unschedulable field in node templates when considering to scale a node group.")

// GCE specific flags
concurrentGceRefreshes = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.")
Expand Down Expand Up @@ -351,6 +352,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
},
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
IgnoreNodeUnschedulable: *ignoreNodeUnschedulable,
GCEOptions: config.GCEOptions{
ConcurrentRefreshes: *concurrentGceRefreshes,
MigInstancesMinRefreshWaitTime: *gceMigInstancesMinRefreshWaitTime,
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/core/static_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
return typedErr.AddPrefix("failed to initialize RemainingPdbTracker: ")
}

nodeInfosForGroups, autoscalerError := a.processors.TemplateNodeInfoProvider.Process(autoscalingCtx, readyNodes, daemonsets, a.taintConfig, currentTime)
nodeInfosForGroups, autoscalerError := a.processors.TemplateNodeInfoProvider.Process(autoscalingCtx, allNodes, daemonsets, a.taintConfig, currentTime)
if autoscalerError != nil {
klog.Errorf("Failed to get node infos for groups: %v", autoscalerError)
return autoscalerError.AddPrefix("failed to build node infos for node groups: ")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@ func (p *MixedTemplateNodeInfoProvider) Process(autoscalingCtx *ca_context.Autos
result := make(map[string]*framework.NodeInfo)
seenGroups := make(map[string]bool)

// sort nodes into those good and bad candidates for templates. the bad candidates will be processed
// at the end of this function as a last resort for a node info template.
goodCandidates := make([]*apiv1.Node, 0)
badCandidates := make([]*apiv1.Node, 0)
for _, node := range nodes {
if isNodeGoodTemplateCandidate(node, now) {
goodCandidates = append(goodCandidates, node)
} else {
badCandidates = append(badCandidates, node)
}
}

// processNode returns information whether the nodeTemplate was generated and if there was an error.
processNode := func(node *apiv1.Node) (bool, string, caerror.AutoscalerError) {
nodeGroup, err := autoscalingCtx.CloudProvider.NodeGroupForNode(node)
Expand All @@ -103,11 +115,7 @@ func (p *MixedTemplateNodeInfoProvider) Process(autoscalingCtx *ca_context.Autos
return false, "", nil
}

for _, node := range nodes {
// Broken nodes might have some stuff missing. Skipping.
if !isNodeGoodTemplateCandidate(node, now) {
continue
}
for _, node := range goodCandidates {
added, id, typedErr := processNode(node)
if typedErr != nil {
return map[string]*framework.NodeInfo{}, typedErr
Expand Down Expand Up @@ -156,11 +164,7 @@ func (p *MixedTemplateNodeInfoProvider) Process(autoscalingCtx *ca_context.Autos
}

// Last resort - unready/unschedulable nodes.
for _, node := range nodes {
// Allowing broken nodes
if isNodeGoodTemplateCandidate(node, now) {
continue
}
for _, node := range badCandidates {
added, _, typedErr := processNode(node)
if typedErr != nil {
return map[string]*framework.NodeInfo{}, typedErr
Expand Down
4 changes: 4 additions & 0 deletions cluster-autoscaler/simulator/node_info_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ func createSanitizedNode(node *apiv1.Node, newName string, taintConfig *taints.T
}
newNode.Labels[apiv1.LabelHostname] = newName

if taintConfig != nil && taintConfig.ShouldIgnoreNodeUnschedulable() {
newNode.Spec.Unschedulable = false
}

if taintConfig != nil {
newNode.Spec.Taints = taints.SanitizeTaints(newNode.Spec.Taints, *taintConfig)
}
Expand Down
11 changes: 11 additions & 0 deletions cluster-autoscaler/utils/taints/taints.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ type TaintConfig struct {
startupTaintPrefixes []string
statusTaintPrefixes []string
explicitlyReportedTaints TaintKeySet
// The ignoreNodeUnschedulable field helps to inform the CA when
// to ignore .spec.unschedulable for a node. It is being added to this
// struct for convenience as it will be used in similar places that check
// for taints to ignore.
ignoreNodeUnschedulable bool
}

// NewTaintConfig returns the taint config extracted from options
Expand Down Expand Up @@ -128,6 +133,7 @@ func NewTaintConfig(opts config.AutoscalingOptions) TaintConfig {
startupTaintPrefixes: []string{IgnoreTaintPrefix, StartupTaintPrefix},
statusTaintPrefixes: []string{StatusTaintPrefix},
explicitlyReportedTaints: explicitlyReportedTaints,
ignoreNodeUnschedulable: opts.IgnoreNodeUnschedulable,
}
}

Expand All @@ -147,6 +153,11 @@ func (tc TaintConfig) IsStatusTaint(taint string) bool {
return matchesAnyPrefix(tc.statusTaintPrefixes, taint)
}

// ShouldIgnoreNodeUnschedulable returns whether a node's .spec.unschedulable field should be ignored.
func (tc TaintConfig) ShouldIgnoreNodeUnschedulable() bool {
return tc.ignoreNodeUnschedulable
}

func (tc TaintConfig) isExplicitlyReportedTaint(taint string) bool {
_, ok := tc.explicitlyReportedTaints[taint]
return ok
Expand Down
Loading