Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
307 changes: 197 additions & 110 deletions scheduler/reconciler/filters.go
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, and what do we think about backports? The prospect of merge conflicts always sets me on edge, but previous refactors (e.g. #26042 and #26324) were not backported... I assume that's why you targeted 1.11.x+ent and 2.0.x? (PS shouldn't need 2.0.x+ent, because CE->Ent merge will catch that).

Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,171 @@ func (set allocSet) filterOldTerminalAllocs(a ReconcilerState) (remain, ignore a
return remain, ignored
}

// allocCategory represents the classification bucket for an allocation.
type allocCategory string

const (
categoryUntainted allocCategory = "untainted"
categoryMigrate allocCategory = "migrate"
categoryLost allocCategory = "lost"
categoryDisconnecting allocCategory = "disconnecting"
categoryReconnecting allocCategory = "reconnecting"
categoryIgnore allocCategory = "ignore"
categoryExpiring allocCategory = "expiring"
)

// allocContext holds the per-allocation data used by classification rules.
type allocContext struct {
alloc *structs.Allocation
shouldReconnect bool
taintedNode *structs.Node
nodeIsTainted bool
now time.Time
}

type classificationRule struct {
condition func(allocContext) bool
category allocCategory
Comment on lines +125 to +126
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suuuper nit-picky, but reversing the order of these both a) is alphabetical, and b) puts the simpler information (category) first. the latter I think makes the already way more readable classification rule slice just that extra little bit more so.

Suggested change
condition func(allocContext) bool
category allocCategory
category allocCategory
condition func(allocContext) bool

}

// classificationRules are evaluated in order; first match wins.
var classificationRules = []classificationRule{
// Failed allocs that need reconnect and are still desired to run.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Failed allocs that need reconnect and are still desired to run.
// Failed allocs that need to reconnect and are still desired to run.

{
condition: func(ctx allocContext) bool {
return ctx.shouldReconnect &&
ctx.alloc.DesiredStatus == structs.AllocDesiredStatusRun &&
ctx.alloc.ClientStatus == structs.AllocClientStatusFailed
},
category: categoryReconnecting,
Comment on lines +133 to +138
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e.g. per my previous comment

Suggested change
condition: func(ctx allocContext) bool {
return ctx.shouldReconnect &&
ctx.alloc.DesiredStatus == structs.AllocDesiredStatusRun &&
ctx.alloc.ClientStatus == structs.AllocClientStatusFailed
},
category: categoryReconnecting,
category: categoryReconnecting,
condition: func(ctx allocContext) bool {
return ctx.shouldReconnect &&
ctx.alloc.DesiredStatus == structs.AllocDesiredStatusRun &&
ctx.alloc.ClientStatus == structs.AllocClientStatusFailed
},

category is always just the one line, so my brain latches on to it a little easier before the variable-length condition.

(I repeat: this is super nit-picky 😋)

},
// Server-terminal allocs should be ignored when they are not reconnecting.
{
condition: func(ctx allocContext) bool {
return ctx.alloc.TerminalStatus() && !ctx.shouldReconnect && ctx.alloc.ServerTerminalStatus()
},
category: categoryIgnore,
},
// Terminal canaries marked for migration.
{
condition: func(ctx allocContext) bool {
return ctx.alloc.TerminalStatus() && !ctx.shouldReconnect &&
ctx.alloc.DeploymentStatus.IsCanary() && ctx.alloc.DesiredTransition.ShouldMigrate()
},
category: categoryMigrate,
},
// Terminal allocs that are not reconnecting are untainted.
{
condition: func(ctx allocContext) bool {
return ctx.alloc.TerminalStatus() && !ctx.shouldReconnect
},
category: categoryUntainted,
},
// Expired allocs are handled before reconnecting/disconnect paths.
{
condition: func(ctx allocContext) bool {
return ctx.alloc.Expired(ctx.now)
},
category: categoryExpiring,
},
// Failed reconnects that server already marked to stop should be ignored.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Failed reconnects that server already marked to stop should be ignored.
// Failed reconnects that the server already marked to stop should be ignored.

I assume this refers to the nomad server/leader?

{
condition: func(ctx allocContext) bool {
return ctx.shouldReconnect &&
ctx.alloc.ClientStatus == structs.AllocClientStatusFailed &&
ctx.alloc.DesiredStatus == structs.AllocDesiredStatusStop
},
category: categoryIgnore,
},
// Disconnected node and alloc already unknown.
{
condition: func(ctx allocContext) bool {
return ctx.taintedNode != nil &&
ctx.taintedNode.Status == structs.NodeStatusDisconnected &&
ctx.alloc.ClientStatus == structs.AllocClientStatusUnknown
},
category: categoryUntainted,
Comment on lines +178 to +185
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bonus from these conditions being so well-contained is that it's easier for me to isolate where wording confuses me.

This is saying "Sure, the node is tainted (can't handle allocs), but the alloc has the appropriate status, so it's fine -- we don't need to do anything with it." Right? I.e. the alloc already in the state that we want/expect it to be.

Short of renaming untainted, maybe the comment could elaborate on this a bit?

Suggested change
// Disconnected node and alloc already unknown.
{
condition: func(ctx allocContext) bool {
return ctx.taintedNode != nil &&
ctx.taintedNode.Status == structs.NodeStatusDisconnected &&
ctx.alloc.ClientStatus == structs.AllocClientStatusUnknown
},
category: categoryUntainted,
// Disconnected node, but alloc already marked unknown, so no changes needed.
{
category: categoryUntainted,
condition: func(ctx allocContext) bool {
return ctx.taintedNode != nil &&
ctx.taintedNode.Status == structs.NodeStatusDisconnected &&
ctx.alloc.ClientStatus == structs.AllocClientStatusUnknown
},

or something like that?

},
// Disconnected pending allocs should be replaced immediately.
{
condition: func(ctx allocContext) bool {
return ctx.taintedNode != nil &&
ctx.taintedNode.Status == structs.NodeStatusDisconnected &&
ctx.alloc.ClientStatus == structs.AllocClientStatusPending
},
category: categoryLost,
},
// Disconnected allocs with nil disconnect block or lost_after=0 are lost.
{
condition: func(ctx allocContext) bool {
return ctx.taintedNode != nil &&
ctx.taintedNode.Status == structs.NodeStatusDisconnected &&
ctx.alloc.DisconnectTimeout(ctx.now) == ctx.now
},
category: categoryLost,
},
// Remaining disconnected allocs are disconnecting.
{
condition: func(ctx allocContext) bool {
return ctx.taintedNode != nil && ctx.taintedNode.Status == structs.NodeStatusDisconnected
},
category: categoryDisconnecting,
},
// Non-terminal allocs marked for migration always migrate.
{
condition: func(ctx allocContext) bool {
return ctx.alloc.DesiredTransition.ShouldMigrate()
},
category: categoryMigrate,
},
// Ready or untainted nodes with reconnecting allocs.
{
condition: func(ctx allocContext) bool {
return (!ctx.nodeIsTainted || (ctx.taintedNode != nil && ctx.taintedNode.Status == structs.NodeStatusReady)) &&
ctx.shouldReconnect
},
category: categoryReconnecting,
},
// Ready or untainted nodes.
{
condition: func(ctx allocContext) bool {
return !ctx.nodeIsTainted || (ctx.taintedNode != nil && ctx.taintedNode.Status == structs.NodeStatusReady)
},
category: categoryUntainted,
},
// GC'd (tainted map entry with nil node) nodes are lost.
{
condition: func(ctx allocContext) bool {
return ctx.taintedNode == nil
},
category: categoryLost,
},
// Terminal node allocations that cannot be replaced.
{
condition: func(ctx allocContext) bool {
return ctx.taintedNode.TerminalStatus() &&
!ctx.alloc.ReplaceOnDisconnect() &&
ctx.alloc.ClientStatus == structs.AllocClientStatusUnknown
},
category: categoryUntainted,
},
{
condition: func(ctx allocContext) bool {
return ctx.taintedNode.TerminalStatus() &&
Comment on lines +250 to +252
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing a comment on this one

!ctx.alloc.ReplaceOnDisconnect() &&
ctx.alloc.ClientStatus == structs.AllocClientStatusRunning
},
category: categoryDisconnecting,
},
// Remaining terminal node allocs are lost.
{
condition: func(ctx allocContext) bool {
return ctx.taintedNode.TerminalStatus()
},
category: categoryLost,
},
}

// filterByTainted takes a set of tainted nodes and filters the allocation set
// into the following groups:
// 1. Those that exist on untainted nodes
Expand All @@ -117,128 +282,50 @@ func (set allocSet) filterByTainted(state ClusterState) (untainted, migrate, los
ignore = make(allocSet)
expiring = make(allocSet)

// Create a map for quick category assignment.
categoryMap := map[allocCategory]*allocSet{
categoryUntainted: &untainted,
categoryMigrate: &migrate,
categoryLost: &lost,
categoryDisconnecting: &disconnecting,
categoryReconnecting: &reconnecting,
categoryIgnore: &ignore,
categoryExpiring: &expiring,
}

for _, alloc := range set {
shouldReconnect := false
// Build context for classification rules.
ctx := allocContext{
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Go, I see ctx and I think context.Context, and in general, the word "context" is pretty overloaded.

Can we name it something more specific? Maybe like classificationData to match classificationRule?

Suggested change
ctx := allocContext{
// Gather data to pass to classification rules.
data := classificationData{

and all the other ctx references to match.

alloc: alloc,
now: state.Now,
}

// Only compute reconnect for unknown, running, and failed since they
// need to go through the reconnect logic.
// Compute shouldReconnect for unknown/running/failed allocs.
if alloc.ClientStatus == structs.AllocClientStatusUnknown ||
alloc.ClientStatus == structs.AllocClientStatusRunning ||
alloc.ClientStatus == structs.AllocClientStatusFailed {
shouldReconnect = alloc.NeedsToReconnect()
}

// Failed allocs that need to be reconnected must be added to
// reconnecting so that they can be handled as a failed reconnect.
if shouldReconnect &&
alloc.DesiredStatus == structs.AllocDesiredStatusRun &&
alloc.ClientStatus == structs.AllocClientStatusFailed {
reconnecting[alloc.ID] = alloc
continue
}

if alloc.TerminalStatus() && !shouldReconnect {
// Server-terminal allocs, if they should not reconnect,
// are probably stopped replacements and should be ignored
if alloc.ServerTerminalStatus() {
ignore[alloc.ID] = alloc
continue
}

// Terminal canaries that have been marked for migration need to be
// migrated, otherwise we block deployments from progressing by
// counting them as running canaries.
if alloc.DeploymentStatus.IsCanary() && alloc.DesiredTransition.ShouldMigrate() {
migrate[alloc.ID] = alloc
continue
}

// Terminal allocs, if not reconnect, are always untainted as they
// should never be migrated.
untainted[alloc.ID] = alloc
continue
}

// The alloc is expired. These end up getting added to "lost" but this
// skips having to evaluate the reconnecting logic for these allocs.
if alloc.Expired(state.Now) {
expiring[alloc.ID] = alloc
continue
}

// Ignore failed allocs that need to be reconnected and that have been
// marked to stop by the server.
if shouldReconnect &&
alloc.ClientStatus == structs.AllocClientStatusFailed &&
alloc.DesiredStatus == structs.AllocDesiredStatusStop {
ignore[alloc.ID] = alloc
continue
ctx.shouldReconnect = alloc.NeedsToReconnect()
}

taintedNode, nodeIsTainted := state.TaintedNodes[alloc.NodeID]
if taintedNode != nil && taintedNode.Status == structs.NodeStatusDisconnected {
// ignore allocs already marked unknown, they should already have a followup eval
if alloc.ClientStatus == structs.AllocClientStatusUnknown {
untainted[alloc.ID] = alloc
continue
}
// If the alloc is pending mark it lost so it is replaced immediately.
if alloc.ClientStatus == structs.AllocClientStatusPending {
lost[alloc.ID] = alloc
continue
// Get node taint information, preserving whether key existed so we can
// distinguish missing-node from GC'd-node semantics.
ctx.taintedNode, ctx.nodeIsTainted = state.TaintedNodes[alloc.NodeID]

// Apply classification rules in order (first match wins).
classified := false
for _, rule := range classificationRules {
if rule.condition(ctx) {
Comment on lines +316 to +317
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is just beautiful 🥲

targetSet := categoryMap[rule.category]
(*targetSet)[alloc.ID] = alloc
classified = true
break
}
// Allocs with nil disconnect blocks or disconnect.lost_after == 0 are lost
if alloc.DisconnectTimeout(state.Now) == state.Now {
lost[alloc.ID] = alloc
continue
}
disconnecting[alloc.ID] = alloc
continue
}

// Non-terminal allocs that should migrate should always migrate
if alloc.DesiredTransition.ShouldMigrate() {
migrate[alloc.ID] = alloc
continue
}

if !nodeIsTainted || (taintedNode != nil && taintedNode.Status == structs.NodeStatusReady) {
// Filter allocs on a node that is now re-connected to be resumed.
if shouldReconnect {
reconnecting[alloc.ID] = alloc
continue
}

// Otherwise, Node is untainted so alloc is untainted
// Default category if no rule matched.
if !classified {
untainted[alloc.ID] = alloc
continue
}

// Allocs on GC'd (nil) or lost nodes are Lost
if taintedNode == nil {
lost[alloc.ID] = alloc
continue
}

// Allocs on terminal nodes that can't be rescheduled need to be treated
// differently than those that can.
if taintedNode.TerminalStatus() {
if !alloc.ReplaceOnDisconnect() {
if alloc.ClientStatus == structs.AllocClientStatusUnknown {
untainted[alloc.ID] = alloc
continue
} else if alloc.ClientStatus == structs.AllocClientStatusRunning {
disconnecting[alloc.ID] = alloc
continue
}
}

lost[alloc.ID] = alloc
continue
}

// All other allocs are untainted
untainted[alloc.ID] = alloc
}

return
Expand Down
Loading
Loading