diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md index 62c235c2745..f564e010540 100644 --- a/cluster-autoscaler/FAQ.md +++ b/cluster-autoscaler/FAQ.md @@ -1022,6 +1022,7 @@ The following startup parameters are supported for cluster autoscaler: | `grpc-expander-cert` | Path to cert used by gRPC server over TLS | | | `grpc-expander-url` | URL to reach gRPC expander server. | | | `ignore-daemonsets-utilization` | Should CA ignore DaemonSet pods when calculating resource utilization for scaling down | | +| `ignore-node-unschedulable` | Should CA ignore a node's .spec.unschedulable field when creating a node template for considering to scale a node group. | false | | `ignore-mirror-pods-utilization` | Should CA ignore Mirror pods when calculating resource utilization for scaling down | | | `ignore-taint` | Specifies a taint to ignore in node templates when considering to scale a node group (Deprecated, use startup-taints instead) | [] | | `initial-node-group-backoff-duration` | initialNodeGroupBackoffDuration is the duration of first backoff after a new node failed to start. | 5m0s | diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index a144580989c..30a06541c3a 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -230,6 +230,9 @@ type AutoscalingOptions struct { BalancingLabels []string // AWSUseStaticInstanceList tells if AWS cloud provider use static instance type list or dynamically fetch from remote APIs. AWSUseStaticInstanceList bool + // IgnoreNodeUnschedulable tells the autoscaler to ignore a node's .spec.unschedulable field when creating a node template. + // Specifically, this will cause the autoscaler to set the node template's .spec.unschedulable field to false. + IgnoreNodeUnschedulable bool // GCEOptions contain autoscaling options specific to GCE cloud provider. GCEOptions GCEOptions // KubeClientOpts specify options for kube client diff --git a/cluster-autoscaler/config/flags/flags.go b/cluster-autoscaler/config/flags/flags.go index 0f7209ebbb1..d16a255b3a9 100644 --- a/cluster-autoscaler/config/flags/flags.go +++ b/cluster-autoscaler/config/flags/flags.go @@ -167,6 +167,7 @@ var ( balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar") balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use for comparing if two node groups are similar, rather than the built in heuristics. Setting this flag disables all other comparison logic, and cannot be combined with --balancing-ignore-label.") awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only") + ignoreNodeUnschedulable = flag.Bool("ignore-node-unschedulable", false, "Specifies that the CA should ignore a node's .spec.unschedulable field in node templates when considering to scale a node group.") // GCE specific flags concurrentGceRefreshes = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.") @@ -351,6 +352,7 @@ func createAutoscalingOptions() config.AutoscalingOptions { }, NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout, AWSUseStaticInstanceList: *awsUseStaticInstanceList, + IgnoreNodeUnschedulable: *ignoreNodeUnschedulable, GCEOptions: config.GCEOptions{ ConcurrentRefreshes: *concurrentGceRefreshes, MigInstancesMinRefreshWaitTime: *gceMigInstancesMinRefreshWaitTime, diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 86d4ab9826a..9b089a319c7 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -348,7 +348,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr return typedErr.AddPrefix("failed to initialize RemainingPdbTracker: ") } - nodeInfosForGroups, autoscalerError := a.processors.TemplateNodeInfoProvider.Process(autoscalingCtx, readyNodes, daemonsets, a.taintConfig, currentTime) + nodeInfosForGroups, autoscalerError := a.processors.TemplateNodeInfoProvider.Process(autoscalingCtx, allNodes, daemonsets, a.taintConfig, currentTime) if autoscalerError != nil { klog.Errorf("Failed to get node infos for groups: %v", autoscalerError) return autoscalerError.AddPrefix("failed to build node infos for node groups: ") diff --git a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go index 4b297372e7b..304a05039e3 100644 --- a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go +++ b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go @@ -78,6 +78,18 @@ func (p *MixedTemplateNodeInfoProvider) Process(autoscalingCtx *ca_context.Autos result := make(map[string]*framework.NodeInfo) seenGroups := make(map[string]bool) + // sort nodes into those good and bad candidates for templates. the bad candidates will be processed + // at the end of this function as a last resort for a node info template. + goodCandidates := make([]*apiv1.Node, 0) + badCandidates := make([]*apiv1.Node, 0) + for _, node := range nodes { + if isNodeGoodTemplateCandidate(node, now) { + goodCandidates = append(goodCandidates, node) + } else { + badCandidates = append(badCandidates, node) + } + } + // processNode returns information whether the nodeTemplate was generated and if there was an error. processNode := func(node *apiv1.Node) (bool, string, caerror.AutoscalerError) { nodeGroup, err := autoscalingCtx.CloudProvider.NodeGroupForNode(node) @@ -103,11 +115,7 @@ func (p *MixedTemplateNodeInfoProvider) Process(autoscalingCtx *ca_context.Autos return false, "", nil } - for _, node := range nodes { - // Broken nodes might have some stuff missing. Skipping. - if !isNodeGoodTemplateCandidate(node, now) { - continue - } + for _, node := range goodCandidates { added, id, typedErr := processNode(node) if typedErr != nil { return map[string]*framework.NodeInfo{}, typedErr @@ -156,11 +164,7 @@ func (p *MixedTemplateNodeInfoProvider) Process(autoscalingCtx *ca_context.Autos } // Last resort - unready/unschedulable nodes. - for _, node := range nodes { - // Allowing broken nodes - if isNodeGoodTemplateCandidate(node, now) { - continue - } + for _, node := range badCandidates { added, _, typedErr := processNode(node) if typedErr != nil { return map[string]*framework.NodeInfo{}, typedErr diff --git a/cluster-autoscaler/simulator/node_info_utils.go b/cluster-autoscaler/simulator/node_info_utils.go index ffa8f33be53..8903cb54a83 100644 --- a/cluster-autoscaler/simulator/node_info_utils.go +++ b/cluster-autoscaler/simulator/node_info_utils.go @@ -115,6 +115,10 @@ func createSanitizedNode(node *apiv1.Node, newName string, taintConfig *taints.T } newNode.Labels[apiv1.LabelHostname] = newName + if taintConfig != nil && taintConfig.ShouldIgnoreNodeUnschedulable() { + newNode.Spec.Unschedulable = false + } + if taintConfig != nil { newNode.Spec.Taints = taints.SanitizeTaints(newNode.Spec.Taints, *taintConfig) } diff --git a/cluster-autoscaler/utils/taints/taints.go b/cluster-autoscaler/utils/taints/taints.go index acae356089d..7910ab4ca1b 100644 --- a/cluster-autoscaler/utils/taints/taints.go +++ b/cluster-autoscaler/utils/taints/taints.go @@ -97,6 +97,11 @@ type TaintConfig struct { startupTaintPrefixes []string statusTaintPrefixes []string explicitlyReportedTaints TaintKeySet + // The ignoreNodeUnschedulable field helps to inform the CA when + // to ignore .spec.unschedulable for a node. It is being added to this + // struct for convenience as it will be used in similar places that check + // for taints to ignore. + ignoreNodeUnschedulable bool } // NewTaintConfig returns the taint config extracted from options @@ -128,6 +133,7 @@ func NewTaintConfig(opts config.AutoscalingOptions) TaintConfig { startupTaintPrefixes: []string{IgnoreTaintPrefix, StartupTaintPrefix}, statusTaintPrefixes: []string{StatusTaintPrefix}, explicitlyReportedTaints: explicitlyReportedTaints, + ignoreNodeUnschedulable: opts.IgnoreNodeUnschedulable, } } @@ -147,6 +153,11 @@ func (tc TaintConfig) IsStatusTaint(taint string) bool { return matchesAnyPrefix(tc.statusTaintPrefixes, taint) } +// ShouldIgnoreNodeUnschedulable returns whether a node's .spec.unschedulable field should be ignored. +func (tc TaintConfig) ShouldIgnoreNodeUnschedulable() bool { + return tc.ignoreNodeUnschedulable +} + func (tc TaintConfig) isExplicitlyReportedTaint(taint string) bool { _, ok := tc.explicitlyReportedTaints[taint] return ok