From 605395dee953f30233953a88d35f03005628f319 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:26:09 +0200 Subject: [PATCH 01/37] core: support batch job timeouts via new field max_run_duration --- api/tasks.go | 1 + nomad/batch_timeout_shims.go | 20 +++ nomad/batchtimeout/watcher.go | 170 ++++++++++++++++++ nomad/batchtimeout/watcher_test.go | 275 +++++++++++++++++++++++++++++ nomad/leader.go | 3 + nomad/server.go | 13 ++ nomad/structs/structs.go | 17 ++ nomad/structs/structs_test.go | 75 ++++++++ 8 files changed, 574 insertions(+) create mode 100644 nomad/batch_timeout_shims.go create mode 100644 nomad/batchtimeout/watcher.go create mode 100644 nomad/batchtimeout/watcher_test.go diff --git a/api/tasks.go b/api/tasks.go index a0624df90dc..a2aebd8a76c 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -507,6 +507,7 @@ type TaskGroup struct { Meta map[string]string `hcl:"meta,block"` Services []*Service `hcl:"service,block"` ShutdownDelay *time.Duration `mapstructure:"shutdown_delay" hcl:"shutdown_delay,optional"` + MaxRunDuration *time.Duration `mapstructure:"max_run_duration" hcl:"max_run_duration,optional"` // Deprecated: StopAfterClientDisconnect is deprecated in Nomad 1.8 and ignored in Nomad 1.10. Use Disconnect.StopOnClientAfter. StopAfterClientDisconnect *time.Duration `mapstructure:"stop_after_client_disconnect" hcl:"stop_after_client_disconnect,optional"` // Deprecated: MaxClientDisconnect is deprecated in Nomad 1.8.0 and ignored in Nomad 1.10. Use Disconnect.LostAfter. diff --git a/nomad/batch_timeout_shims.go b/nomad/batch_timeout_shims.go new file mode 100644 index 00000000000..57f1709a32c --- /dev/null +++ b/nomad/batch_timeout_shims.go @@ -0,0 +1,20 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package nomad + +import ( + "github.com/hashicorp/nomad/nomad/batchtimeout" + "github.com/hashicorp/nomad/nomad/structs" +) + +type batchTimeoutRaftShim struct { + s *Server +} + +var _ batchtimeout.RaftApplier = batchTimeoutRaftShim{} + +func (b batchTimeoutRaftShim) UpdateAllocDesiredTransition(req *structs.AllocUpdateDesiredTransitionRequest) (uint64, error) { + _, index, err := b.s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, req) + return index, err +} diff --git a/nomad/batchtimeout/watcher.go b/nomad/batchtimeout/watcher.go new file mode 100644 index 00000000000..e6aa7ac7c7d --- /dev/null +++ b/nomad/batchtimeout/watcher.go @@ -0,0 +1,170 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package batchtimeout + +import ( + "time" + + log "github.com/hashicorp/go-hclog" + + "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" +) + +const ( + defaultScanInterval = 5 * time.Second + + timeoutDescription = "allocation exceeded max_run_duration" +) + +type RaftApplier interface { + UpdateAllocDesiredTransition(req *structs.AllocUpdateDesiredTransitionRequest) (uint64, error) +} + +type Watcher struct { + logger log.Logger + raft RaftApplier + state *state.StateStore + scanInterval time.Duration + enabled bool +} + +func NewWatcher(logger log.Logger, raft RaftApplier, scanInterval time.Duration) *Watcher { + if scanInterval <= 0 { + scanInterval = defaultScanInterval + } + + return &Watcher{ + logger: logger.Named("batch_timeout_watcher"), + raft: raft, + scanInterval: scanInterval, + } +} + +func (w *Watcher) SetEnabled(enabled bool, st *state.StateStore) { + w.enabled = enabled + if st != nil { + w.state = st + } + + if enabled && w.state != nil { + go w.watch() + } +} + +func (w *Watcher) watch() { + ticker := time.NewTicker(w.scanInterval) + defer ticker.Stop() + + for { + if !w.enabled { + return + } + + w.scan(time.Now().UTC()) + + <-ticker.C + } +} + +func (w *Watcher) scan(now time.Time) { + if w.state == nil { + return + } + + iter, err := w.state.Allocs(nil, state.SortDefault) + if err != nil { + w.logger.Warn("failed to iterate allocations", "error", err) + return + } + + transitions := make(map[string]*structs.DesiredTransition) + + for { + raw := iter.Next() + if raw == nil { + break + } + + alloc := raw.(*structs.Allocation) + if !shouldStopAlloc(now, alloc) { + continue + } + + transitions[alloc.ID] = &structs.DesiredTransition{ + NoShutdownDelay: pointer.Of(false), + } + } + + if len(transitions) == 0 { + return + } + + req := &structs.AllocUpdateDesiredTransitionRequest{ + Allocs: transitions, + } + + if _, err := w.raft.UpdateAllocDesiredTransition(req); err != nil { + w.logger.Warn("failed to stop timed out allocations", "error", err, "count", len(transitions)) + return + } + + w.logger.Debug("stopped timed out allocations", "count", len(transitions)) +} + +func shouldStopAlloc(now time.Time, alloc *structs.Allocation) bool { + if alloc == nil || alloc.Job == nil { + return false + } + + if alloc.DesiredStatus != structs.AllocDesiredStatusRun { + return false + } + + if alloc.ClientStatus != structs.AllocClientStatusRunning { + return false + } + + if alloc.ClientTerminalStatus() || alloc.ServerTerminalStatus() { + return false + } + + tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) + if tg == nil || tg.MaxRunDuration == nil || *tg.MaxRunDuration <= 0 { + return false + } + + switch alloc.Job.Type { + case structs.JobTypeBatch, structs.JobTypeSysBatch: + default: + return false + } + + startedAt, ok := allocRunningSince(alloc) + if !ok { + return false + } + + return !startedAt.Add(*tg.MaxRunDuration).After(now) +} + +func allocRunningSince(alloc *structs.Allocation) (time.Time, bool) { + var latest time.Time + + for _, ts := range alloc.TaskStates { + if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() { + return time.Time{}, false + } + if ts.StartedAt.After(latest) { + latest = ts.StartedAt + } + } + + if latest.IsZero() { + return time.Time{}, false + } + + return latest, true +} diff --git a/nomad/batchtimeout/watcher_test.go b/nomad/batchtimeout/watcher_test.go new file mode 100644 index 00000000000..673e6898210 --- /dev/null +++ b/nomad/batchtimeout/watcher_test.go @@ -0,0 +1,275 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package batchtimeout + +import ( + "testing" + "time" + + "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func TestShouldStopAlloc(t *testing.T) { + t.Parallel() + + now := time.Now().UTC() + + cases := []struct { + name string + allocFn func() *structs.Allocation + expected bool + }{ + { + name: "batch alloc times out after max run duration", + allocFn: func() *structs.Allocation { + alloc := mock.Alloc() + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-10 * time.Minute), + }, + } + return alloc + }, + expected: true, + }, + { + name: "sysbatch alloc times out after max run duration", + allocFn: func() *structs.Allocation { + alloc := mock.SysBatchAlloc() + alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.TaskStates = map[string]*structs.TaskState{ + "ping-example": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-10 * time.Minute), + }, + } + return alloc + }, + expected: true, + }, + { + name: "alloc without max run duration does not time out", + allocFn: func() *structs.Allocation { + alloc := mock.Alloc() + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = nil + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-10 * time.Minute), + }, + } + return alloc + }, + expected: false, + }, + { + name: "service alloc does not time out", + allocFn: func() *structs.Allocation { + alloc := mock.Alloc() + alloc.Job.Type = structs.JobTypeService + alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-10 * time.Minute), + }, + } + return alloc + }, + expected: false, + }, + { + name: "pending alloc does not time out", + allocFn: func() *structs.Allocation { + alloc := mock.Alloc() + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) + alloc.ClientStatus = structs.AllocClientStatusPending + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-10 * time.Minute), + }, + } + return alloc + }, + expected: false, + }, + { + name: "stopping alloc does not time out again", + allocFn: func() *structs.Allocation { + alloc := mock.Alloc() + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusStop + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-10 * time.Minute), + }, + } + return alloc + }, + expected: false, + }, + { + name: "alloc within max run duration does not time out", + allocFn: func() *structs.Allocation { + alloc := mock.Alloc() + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(15 * time.Minute) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-10 * time.Minute), + }, + } + return alloc + }, + expected: false, + }, + { + name: "alloc with no running task state does not time out", + allocFn: func() *structs.Allocation { + alloc := mock.Alloc() + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStatePending, + StartedAt: now.Add(-10 * time.Minute), + }, + } + return alloc + }, + expected: false, + }, + { + name: "alloc with zero start time does not time out", + allocFn: func() *structs.Allocation { + alloc := mock.Alloc() + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + }, + } + return alloc + }, + expected: false, + }, + { + name: "alloc with mixed task states does not time out until all tasks are running", + allocFn: func() *structs.Allocation { + alloc := mock.Alloc() + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, &structs.Task{Name: "sidecar"}) + alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-10 * time.Minute), + }, + "sidecar": { + State: structs.TaskStatePending, + }, + } + return alloc + }, + expected: false, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + must.Eq(t, tc.expected, shouldStopAlloc(now, tc.allocFn())) + }) + } +} + +func TestAllocRunningSince(t *testing.T) { + t.Parallel() + + now := time.Now().UTC() + + t.Run("returns latest running task start time", func(t *testing.T) { + alloc := mock.Alloc() + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-10 * time.Minute), + }, + "sidecar": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-5 * time.Minute), + }, + } + + startedAt, ok := allocRunningSince(alloc) + must.True(t, ok) + must.Eq(t, now.Add(-5*time.Minute), startedAt) + }) + + t.Run("returns false when task states are missing", func(t *testing.T) { + alloc := mock.Alloc() + alloc.TaskStates = nil + + _, ok := allocRunningSince(alloc) + must.False(t, ok) + }) + + t.Run("returns false when any task is not running", func(t *testing.T) { + alloc := mock.Alloc() + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: now.Add(-10 * time.Minute), + }, + "sidecar": { + State: structs.TaskStatePending, + }, + } + + _, ok := allocRunningSince(alloc) + must.False(t, ok) + }) + + t.Run("returns false when any running task has zero start time", func(t *testing.T) { + alloc := mock.Alloc() + alloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + }, + } + + _, ok := allocRunningSince(alloc) + must.False(t, ok) + }) +} diff --git a/nomad/leader.go b/nomad/leader.go index 26a5f1d9521..1855fc2bda4 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -412,6 +412,9 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error { // Enable the deployment watcher, since we are now the leader s.deploymentWatcher.SetEnabled(true, s.State()) + // Enable the batch timeout watcher, since we are now the leader + s.batchTimeoutWatcher.SetEnabled(true, s.State()) + // Enable the NodeDrainer s.nodeDrainer.SetEnabled(true, s.State()) diff --git a/nomad/server.go b/nomad/server.go index c45047dffcb..ba49483be79 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -43,6 +43,7 @@ import ( "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/nomad/lib/auth/oidc" "github.com/hashicorp/nomad/nomad/auth" + "github.com/hashicorp/nomad/nomad/batchtimeout" "github.com/hashicorp/nomad/nomad/deploymentwatcher" "github.com/hashicorp/nomad/nomad/drainer" "github.com/hashicorp/nomad/nomad/lock" @@ -229,6 +230,10 @@ type Server struct { // make the required calls to continue to transition the deployment. deploymentWatcher *deploymentwatcher.Watcher + // batchTimeoutWatcher is used to stop batch and sysbatch allocations that + // exceed their configured max_run_duration. + batchTimeoutWatcher *batchtimeout.Watcher + // nodeDrainer is used to drain allocations from nodes. nodeDrainer *drainer.NodeDrainer @@ -527,6 +532,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigFunc // updates when the processes SetEnabled is triggered. go s.evalBroker.enabledNotifier.Run() + // Setup the batch timeout watcher. + s.setupBatchTimeoutWatcher() + // Setup the node drainer. s.setupNodeDrainer() @@ -1184,6 +1192,11 @@ func (s *Server) setupVolumeWatcher() error { // setupNodeDrainer creates a node drainer which will be enabled when a server // becomes a leader. +func (s *Server) setupBatchTimeoutWatcher() { + shim := batchTimeoutRaftShim{s} + s.batchTimeoutWatcher = batchtimeout.NewWatcher(s.logger, shim, 0) +} + func (s *Server) setupNodeDrainer() { // Create a shim around Raft requests shim := drainerShim{s} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index df142bab0dc..33ea1b3835d 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6867,6 +6867,11 @@ type TaskGroup struct { // group services in consul and stopping tasks. ShutdownDelay *time.Duration + // MaxRunDuration is the maximum amount of time a batch or sysbatch task + // group allocation may run after entering the running state before Nomad + // stops it. + MaxRunDuration *time.Duration + // StopAfterClientDisconnect, if set, configures the client to stop the task group // after this duration since the last known good heartbeat // To be deprecated after 1.8.0 infavor of Disconnect.StopOnClientAfter @@ -7054,6 +7059,18 @@ func (tg *TaskGroup) Validate(j *Job) error { } } + if tg.MaxRunDuration != nil { + if *tg.MaxRunDuration <= 0 { + mErr = multierror.Append(mErr, errors.New("MaxRunDuration must be greater than zero")) + } + + switch j.Type { + case JobTypeBatch, JobTypeSysBatch: + default: + mErr = multierror.Append(mErr, fmt.Errorf("Job type %q does not allow max_run_duration", j.Type)) + } + } + for idx, constr := range tg.Constraints { if err := constr.Validate(); err != nil { outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 6f77ff76b76..e49f6d19d99 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -1543,6 +1543,81 @@ func TestTaskGroup_Validate(t *testing.T) { }, jobType: JobTypeSystem, }, + { + name: "invalid max run duration for service job", + tg: &TaskGroup{ + Name: "web", + Count: 1, + MaxRunDuration: pointer.Of(5 * time.Minute), + Tasks: []*Task{ + {Name: "web"}, + }, + }, + expErr: []string{ + `Job type "service" does not allow max_run_duration`, + }, + jobType: JobTypeService, + }, + { + name: "invalid non-positive max run duration for batch job", + tg: &TaskGroup{ + Name: "web", + Count: 1, + MaxRunDuration: pointer.Of(time.Duration(0)), + Tasks: []*Task{ + {Name: "web"}, + }, + }, + expErr: []string{ + "MaxRunDuration must be greater than zero", + }, + jobType: JobTypeBatch, + }, + { + name: "valid max run duration for batch job", + tg: &TaskGroup{ + Name: "web", + Count: 1, + MaxRunDuration: pointer.Of(5 * time.Minute), + Tasks: []*Task{ + { + Name: "web", + Driver: "mock_driver", + Resources: DefaultResources(), + LogConfig: DefaultLogConfig(), + }, + }, + RestartPolicy: NewRestartPolicy(JobTypeBatch), + ReschedulePolicy: NewReschedulePolicy(JobTypeBatch), + EphemeralDisk: DefaultEphemeralDisk(), + }, + jobType: JobTypeBatch, + }, + { + name: "valid max run duration for sysbatch job", + tg: &TaskGroup{ + Name: "web", + Count: 1, + MaxRunDuration: pointer.Of(5 * time.Minute), + Tasks: []*Task{ + { + Name: "web", + Driver: "mock_driver", + Resources: DefaultResources(), + LogConfig: DefaultLogConfig(), + }, + }, + RestartPolicy: &RestartPolicy{ + Attempts: 0, + Interval: 5 * time.Second, + Delay: 5 * time.Second, + Mode: RestartPolicyModeFail, + RenderTemplates: false, + }, + EphemeralDisk: DefaultEphemeralDisk(), + }, + jobType: JobTypeSysBatch, + }, { name: "duplicated por label", tg: &TaskGroup{ From 2388417e8e56e5da07b93cbc5ac89e4ba844426e Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:30:14 +0200 Subject: [PATCH 02/37] refine batch timeout watcher --- nomad/batch_timeout_shims.go | 4 +- nomad/batchtimeout/watcher.go | 86 ++++++++++++++++------ nomad/batchtimeout/watcher_test.go | 112 +++++++++++++++++++++++++++++ nomad/state/state_store_test.go | 21 +++++- 4 files changed, 197 insertions(+), 26 deletions(-) diff --git a/nomad/batch_timeout_shims.go b/nomad/batch_timeout_shims.go index 57f1709a32c..82e7520fdca 100644 --- a/nomad/batch_timeout_shims.go +++ b/nomad/batch_timeout_shims.go @@ -14,7 +14,7 @@ type batchTimeoutRaftShim struct { var _ batchtimeout.RaftApplier = batchTimeoutRaftShim{} -func (b batchTimeoutRaftShim) UpdateAllocDesiredTransition(req *structs.AllocUpdateDesiredTransitionRequest) (uint64, error) { - _, index, err := b.s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, req) +func (b batchTimeoutRaftShim) ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) { + _, index, err := b.s.raftApply(structs.ApplyPlanResultsRequestType, req) return index, err } diff --git a/nomad/batchtimeout/watcher.go b/nomad/batchtimeout/watcher.go index e6aa7ac7c7d..8dc907e43ac 100644 --- a/nomad/batchtimeout/watcher.go +++ b/nomad/batchtimeout/watcher.go @@ -4,11 +4,11 @@ package batchtimeout import ( + "sync" "time" log "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" ) @@ -20,7 +20,7 @@ const ( ) type RaftApplier interface { - UpdateAllocDesiredTransition(req *structs.AllocUpdateDesiredTransitionRequest) (uint64, error) + ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) } type Watcher struct { @@ -28,7 +28,11 @@ type Watcher struct { raft RaftApplier state *state.StateStore scanInterval time.Duration - enabled bool + + mu sync.Mutex + enabled bool + stopCh chan struct{} + running bool } func NewWatcher(logger log.Logger, raft RaftApplier, scanInterval time.Duration) *Watcher { @@ -44,43 +48,78 @@ func NewWatcher(logger log.Logger, raft RaftApplier, scanInterval time.Duration) } func (w *Watcher) SetEnabled(enabled bool, st *state.StateStore) { - w.enabled = enabled + w.mu.Lock() + defer w.mu.Unlock() + if st != nil { w.state = st } - if enabled && w.state != nil { - go w.watch() + if enabled { + w.enabled = true + if w.running || w.state == nil { + return + } + + w.stopCh = make(chan struct{}) + w.running = true + go w.watch(w.stopCh) + return + } + + w.enabled = false + if !w.running { + return } + + close(w.stopCh) + w.stopCh = nil + w.running = false } -func (w *Watcher) watch() { +func (w *Watcher) watch(stopCh <-chan struct{}) { ticker := time.NewTicker(w.scanInterval) defer ticker.Stop() + defer w.markStopped(stopCh) for { - if !w.enabled { + w.scan(time.Now().UTC()) + + select { + case <-ticker.C: + case <-stopCh: return } + } +} - w.scan(time.Now().UTC()) +func (w *Watcher) markStopped(stopCh <-chan struct{}) { + w.mu.Lock() + defer w.mu.Unlock() - <-ticker.C + if w.stopCh == stopCh { + w.stopCh = nil } + w.running = false } func (w *Watcher) scan(now time.Time) { - if w.state == nil { + w.mu.Lock() + st := w.state + enabled := w.enabled + w.mu.Unlock() + + if !enabled || st == nil { return } - iter, err := w.state.Allocs(nil, state.SortDefault) + iter, err := st.Allocs(nil, state.SortDefault) if err != nil { w.logger.Warn("failed to iterate allocations", "error", err) return } - transitions := make(map[string]*structs.DesiredTransition) + var stopped []*structs.AllocationDiff for { raw := iter.Next() @@ -93,25 +132,28 @@ func (w *Watcher) scan(now time.Time) { continue } - transitions[alloc.ID] = &structs.DesiredTransition{ - NoShutdownDelay: pointer.Of(false), - } + stopped = append(stopped, &structs.AllocationDiff{ + ID: alloc.ID, + DesiredDescription: timeoutDescription, + ClientStatus: structs.AllocClientStatusFailed, + }) } - if len(transitions) == 0 { + if len(stopped) == 0 { return } - req := &structs.AllocUpdateDesiredTransitionRequest{ - Allocs: transitions, + req := &structs.ApplyPlanResultsRequest{ + AllocsStopped: stopped, + UpdatedAt: now.UnixNano(), } - if _, err := w.raft.UpdateAllocDesiredTransition(req); err != nil { - w.logger.Warn("failed to stop timed out allocations", "error", err, "count", len(transitions)) + if _, err := w.raft.ApplyPlanResults(req); err != nil { + w.logger.Warn("failed to stop timed out allocations", "error", err, "count", len(stopped)) return } - w.logger.Debug("stopped timed out allocations", "count", len(transitions)) + w.logger.Debug("stopped timed out allocations", "count", len(stopped)) } func shouldStopAlloc(now time.Time, alloc *structs.Allocation) bool { diff --git a/nomad/batchtimeout/watcher_test.go b/nomad/batchtimeout/watcher_test.go index 673e6898210..c73f410ac90 100644 --- a/nomad/batchtimeout/watcher_test.go +++ b/nomad/batchtimeout/watcher_test.go @@ -4,11 +4,14 @@ package batchtimeout import ( + "sync" "testing" "time" + "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/shoenig/test/must" ) @@ -273,3 +276,112 @@ func TestAllocRunningSince(t *testing.T) { must.False(t, ok) }) } + +func TestWatcherSetEnabled(t *testing.T) { + t.Parallel() + + raft := &mockRaftApplier{} + w := NewWatcher(hclog.NewNullLogger(), raft, time.Hour) + + st := state.TestStateStore(t) + + w.SetEnabled(true, st) + must.True(t, w.enabled) + must.True(t, w.running) + must.NotNil(t, w.stopCh) + + firstStopCh := w.stopCh + + w.SetEnabled(true, st) + must.True(t, w.running) + must.Eq(t, firstStopCh, w.stopCh) + + w.SetEnabled(false, st) + must.False(t, w.enabled) +} + +func TestWatcherScanAppliesTimedOutAllocs(t *testing.T) { + t.Parallel() + + st := state.TestStateStore(t) + raft := &mockRaftApplier{} + w := NewWatcher(hclog.NewNullLogger(), raft, time.Hour) + w.state = st + w.enabled = true + + batchAlloc := mock.Alloc() + batchAlloc.Job.Type = structs.JobTypeBatch + batchAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) + batchAlloc.ClientStatus = structs.AllocClientStatusRunning + batchAlloc.DesiredStatus = structs.AllocDesiredStatusRun + batchAlloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: time.Now().UTC().Add(-10 * time.Minute), + }, + } + + sysbatchAlloc := mock.SysBatchAlloc() + sysbatchAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) + sysbatchAlloc.ClientStatus = structs.AllocClientStatusRunning + sysbatchAlloc.DesiredStatus = structs.AllocDesiredStatusRun + sysbatchAlloc.TaskStates = map[string]*structs.TaskState{ + "ping-example": { + State: structs.TaskStateRunning, + StartedAt: time.Now().UTC().Add(-10 * time.Minute), + }, + } + + okAlloc := mock.Alloc() + okAlloc.Job.Type = structs.JobTypeBatch + okAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(30 * time.Minute) + okAlloc.ClientStatus = structs.AllocClientStatusRunning + okAlloc.DesiredStatus = structs.AllocDesiredStatusRun + okAlloc.TaskStates = map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: time.Now().UTC().Add(-10 * time.Minute), + }, + } + + must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 100, nil, batchAlloc.Job)) + must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 101, nil, sysbatchAlloc.Job)) + must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 102, nil, okAlloc.Job)) + must.NoError(t, st.UpsertAllocs(structs.MsgTypeTestSetup, 103, []*structs.Allocation{ + batchAlloc, + sysbatchAlloc, + okAlloc, + })) + + w.scan(time.Now().UTC()) + + must.NotNil(t, raft.lastReq) + must.SliceLen(t, 2, raft.lastReq.AllocsStopped) + + got := map[string]*structs.AllocationDiff{} + for _, diff := range raft.lastReq.AllocsStopped { + got[diff.ID] = diff + } + + must.MapContainsKey(t, got, batchAlloc.ID) + must.MapContainsKey(t, got, sysbatchAlloc.ID) + must.MapNotContainsKey(t, got, okAlloc.ID) + + must.Eq(t, timeoutDescription, got[batchAlloc.ID].DesiredDescription) + must.Eq(t, structs.AllocClientStatusFailed, got[batchAlloc.ID].ClientStatus) + must.Eq(t, timeoutDescription, got[sysbatchAlloc.ID].DesiredDescription) + must.Eq(t, structs.AllocClientStatusFailed, got[sysbatchAlloc.ID].ClientStatus) +} + +type mockRaftApplier struct { + mu sync.Mutex + lastReq *structs.ApplyPlanResultsRequest +} + +func (m *mockRaftApplier) ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) { + m.mu.Lock() + defer m.mu.Unlock() + + m.lastReq = req + return 1, nil +} diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index a04e1262119..b069523dd0a 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -171,6 +171,15 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) { DesiredDescription: "desired desc", ClientStatus: structs.AllocClientStatusLost, } + + timeoutStoppedAlloc := mock.Alloc() + timeoutStoppedAlloc.Job = job + timeoutStoppedAlloc.JobID = job.ID + timeoutStoppedAllocDiff := &structs.AllocationDiff{ + ID: timeoutStoppedAlloc.ID, + DesiredDescription: "allocation exceeded max_run_duration", + ClientStatus: structs.AllocClientStatusFailed, + } preemptedAlloc := mock.Alloc() preemptedAlloc.Job = job preemptedAlloc.JobID = job.ID @@ -181,7 +190,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) { must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 900, nil, job)) must.NoError(t, state.UpsertAllocs( - structs.MsgTypeTestSetup, 999, []*structs.Allocation{stoppedAlloc, preemptedAlloc})) + structs.MsgTypeTestSetup, 999, []*structs.Allocation{stoppedAlloc, timeoutStoppedAlloc, preemptedAlloc})) // modify job and ensure that stopped and preempted alloc point to original Job mJob := job.Copy() @@ -198,7 +207,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) { // Create a plan result res := structs.ApplyPlanResultsRequest{ AllocsUpdated: []*structs.Allocation{alloc}, - AllocsStopped: []*structs.AllocationDiff{stoppedAllocDiff}, + AllocsStopped: []*structs.AllocationDiff{stoppedAllocDiff, timeoutStoppedAllocDiff}, Job: mJob, EvalID: eval.ID, AllocsPreempted: []*structs.AllocationDiff{preemptedAllocDiff}, @@ -227,6 +236,14 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) { test.Eq(t, planModifyIndex, updatedStoppedAlloc.AllocModifyIndex) test.Eq(t, job.TaskGroups, updatedStoppedAlloc.Job.TaskGroups) + updatedTimeoutStoppedAlloc, err := state.AllocByID(ws, timeoutStoppedAlloc.ID) + must.NoError(t, err) + test.Eq(t, timeoutStoppedAllocDiff.DesiredDescription, updatedTimeoutStoppedAlloc.DesiredDescription) + test.Eq(t, structs.AllocDesiredStatusStop, updatedTimeoutStoppedAlloc.DesiredStatus) + test.Eq(t, timeoutStoppedAlloc.ClientStatus, updatedTimeoutStoppedAlloc.ClientStatus) + test.Eq(t, planModifyIndex, updatedTimeoutStoppedAlloc.AllocModifyIndex) + test.Eq(t, job.TaskGroups, updatedTimeoutStoppedAlloc.Job.TaskGroups) + updatedPreemptedAlloc, err := state.AllocByID(ws, preemptedAlloc.ID) must.NoError(t, err) test.Eq(t, structs.AllocDesiredStatusEvict, updatedPreemptedAlloc.DesiredStatus) From 617fe2b0419ecca327d07de3807115e366dfe5a8 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:31:25 +0200 Subject: [PATCH 03/37] client status --- nomad/state/state_store.go | 20 +++++++++++++++----- nomad/state/state_store_test.go | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index f660d9ad94d..3c452d6f5c5 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -4054,7 +4054,9 @@ func (s *StateStore) nestedUpdateAllocFromClient(txn *txn, index uint64, alloc * // Pull in anything the client is the authority on copyAlloc.ClientStatus = alloc.ClientStatus - copyAlloc.ClientDescription = alloc.ClientDescription + if alloc.ClientDescription != "" { + copyAlloc.ClientDescription = alloc.ClientDescription + } copyAlloc.TaskStates = alloc.TaskStates copyAlloc.NetworkStatus = alloc.NetworkStatus @@ -4288,10 +4290,18 @@ func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation // Keep the clients task states alloc.TaskStates = exist.TaskStates - // If the scheduler is marking this allocation as lost or unknown we do not - // want to reuse the status of the existing allocation. - if alloc.ClientStatus != structs.AllocClientStatusLost && - alloc.ClientStatus != structs.AllocClientStatusUnknown { + // If the scheduler is marking this allocation as lost, unknown, or another + // explicit terminal client status, preserve the scheduler-provided status + // and description instead of reusing the existing allocation values. + switch alloc.ClientStatus { + case structs.AllocClientStatusLost, + structs.AllocClientStatusUnknown, + structs.AllocClientStatusFailed, + structs.AllocClientStatusComplete: + if alloc.ClientDescription == "" { + alloc.ClientDescription = exist.ClientDescription + } + default: alloc.ClientStatus = exist.ClientStatus alloc.ClientDescription = exist.ClientDescription } diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index b069523dd0a..4db517a6e82 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -240,7 +240,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) { must.NoError(t, err) test.Eq(t, timeoutStoppedAllocDiff.DesiredDescription, updatedTimeoutStoppedAlloc.DesiredDescription) test.Eq(t, structs.AllocDesiredStatusStop, updatedTimeoutStoppedAlloc.DesiredStatus) - test.Eq(t, timeoutStoppedAlloc.ClientStatus, updatedTimeoutStoppedAlloc.ClientStatus) + test.Eq(t, timeoutStoppedAllocDiff.ClientStatus, updatedTimeoutStoppedAlloc.ClientStatus) test.Eq(t, planModifyIndex, updatedTimeoutStoppedAlloc.AllocModifyIndex) test.Eq(t, job.TaskGroups, updatedTimeoutStoppedAlloc.Job.TaskGroups) From 1f4c4dba8d8cf222a8615874162c9f013d96a2a3 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:39:13 +0200 Subject: [PATCH 04/37] integration-style test --- nomad/batch_timeout_test.go | 120 ++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 nomad/batch_timeout_test.go diff --git a/nomad/batch_timeout_test.go b/nomad/batch_timeout_test.go new file mode 100644 index 00000000000..61e1ef99f1e --- /dev/null +++ b/nomad/batch_timeout_test.go @@ -0,0 +1,120 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package nomad + +import ( + "testing" + "time" + + "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" +) + +func Test_BatchTimeoutWatcher_StopsExpiredBatchAlloc(t *testing.T) { + t.Parallel() + + srv, cleanup := TestServer(t, nil) + t.Cleanup(cleanup) + + testutil.WaitForLeader(t, srv.RPC) + + job := mock.Job() + job.Type = structs.JobTypeBatch + job.TaskGroups[0].MaxRunDuration = pointer.Of(50 * time.Millisecond) + job.Canonicalize() + + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.Namespace = job.Namespace + alloc.TaskGroup = job.TaskGroups[0].Name + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.TaskStates = map[string]*structs.TaskState{ + job.TaskGroups[0].Tasks[0].Name: { + State: structs.TaskStateRunning, + StartedAt: time.Now().UTC().Add(-1 * time.Second), + }, + } + + state := srv.fsm.State() + must.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(job.ID))) + must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 999, nil, job)) + must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc})) + + testutil.WaitForResult(func() (bool, error) { + out, err := state.AllocByID(nil, alloc.ID) + if err != nil { + return false, err + } + if out == nil { + return false, nil + } + + return out.DesiredStatus == structs.AllocDesiredStatusStop && + out.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration && + out.ClientStatus == structs.AllocClientStatusFailed, nil + }, func(err error) { + out, lookupErr := state.AllocByID(nil, alloc.ID) + if lookupErr != nil { + t.Fatalf("failed to lookup alloc after waiting: %v", lookupErr) + } + t.Fatalf("timed out waiting for batch timeout watcher to stop alloc: %v; alloc=%#v", err, out) + }) +} + +func Test_BatchTimeoutWatcher_StopsExpiredSysBatchAlloc(t *testing.T) { + t.Parallel() + + srv, cleanup := TestServer(t, nil) + t.Cleanup(cleanup) + + testutil.WaitForLeader(t, srv.RPC) + + job := mock.SystemBatchJob() + job.TaskGroups[0].MaxRunDuration = pointer.Of(50 * time.Millisecond) + job.Canonicalize() + + alloc := mock.SysBatchAlloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.Namespace = job.Namespace + alloc.TaskGroup = job.TaskGroups[0].Name + alloc.DesiredStatus = structs.AllocDesiredStatusRun + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.TaskStates = map[string]*structs.TaskState{ + job.TaskGroups[0].Tasks[0].Name: { + State: structs.TaskStateRunning, + StartedAt: time.Now().UTC().Add(-1 * time.Second), + }, + } + + state := srv.fsm.State() + must.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(job.ID))) + must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 999, nil, job)) + must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc})) + + testutil.WaitForResult(func() (bool, error) { + out, err := state.AllocByID(nil, alloc.ID) + if err != nil { + return false, err + } + if out == nil { + return false, nil + } + + return out.DesiredStatus == structs.AllocDesiredStatusStop && + out.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration && + out.ClientStatus == structs.AllocClientStatusFailed, nil + }, func(err error) { + out, lookupErr := state.AllocByID(nil, alloc.ID) + if lookupErr != nil { + t.Fatalf("failed to lookup alloc after waiting: %v", lookupErr) + } + t.Fatalf("timed out waiting for batch timeout watcher to stop sysbatch alloc: %v; alloc=%#v", err, out) + }) +} From 180aea3eba98d90967964fa643be9c1d88c485da Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:39:23 +0200 Subject: [PATCH 05/37] event stream --- nomad/state/events.go | 9 ++++++- nomad/state/events_test.go | 51 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/nomad/state/events.go b/nomad/state/events.go index 07ea50c3acf..0211885165d 100644 --- a/nomad/state/events.go +++ b/nomad/state/events.go @@ -357,12 +357,19 @@ func eventFromChange(change memdb.Change) (structs.Event, bool) { // remove job info to help keep size of alloc event down alloc.Job = nil + allocEvent := &structs.AllocationEvent{Allocation: alloc} + if alloc.DesiredStatus == structs.AllocDesiredStatusStop && + alloc.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration { + allocEvent.Timeout = true + allocEvent.TimeoutReason = alloc.DesiredDescription + } + return structs.Event{ Topic: structs.TopicAllocation, Key: after.ID, FilterKeys: filterKeys, Namespace: after.Namespace, - Payload: &structs.AllocationEvent{Allocation: alloc}, + Payload: allocEvent, }, true case "jobs": after, ok := change.After.(*structs.Job) diff --git a/nomad/state/events_test.go b/nomad/state/events_test.go index 8cbab8f26ea..73b9d1ed26f 100644 --- a/nomad/state/events_test.go +++ b/nomad/state/events_test.go @@ -538,6 +538,57 @@ func TestEventsFromChanges_ApplyPlanResultsRequestType(t *testing.T) { must.Len(t, 1, evals) must.Len(t, 1, jobs) must.Len(t, 1, deploys) + + for _, e := range allocs { + allocEvent := e.Payload.(*structs.AllocationEvent) + must.False(t, allocEvent.Timeout) + must.Eq(t, "", allocEvent.TimeoutReason) + } +} + +func TestEventsFromChanges_ApplyPlanResultsRequestType_TimeoutStoppedAlloc(t *testing.T) { + ci.Parallel(t) + s := TestStateStoreCfg(t, TestStateStorePublisher(t)) + defer s.StopEventBroker() + + mockJob := mock.Job() + must.NoError(t, s.UpsertJob(structs.MsgTypeTestSetup, 9, nil, mockJob)) + + stoppedAlloc := mock.Alloc() + stoppedAlloc.Job = mockJob + stoppedAlloc.JobID = mockJob.ID + + must.NoError(t, s.UpsertAllocs(structs.MsgTypeTestSetup, 10, []*structs.Allocation{stoppedAlloc})) + + msgType := structs.ApplyPlanResultsRequestType + req := &structs.ApplyPlanResultsRequest{ + AllocsStopped: []*structs.AllocationDiff{{ + ID: stoppedAlloc.ID, + DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration, + ClientStatus: structs.AllocClientStatusFailed, + }}, + Job: mockJob, + } + + must.NoError(t, s.UpsertPlanResults(msgType, 100, req)) + + events := WaitForEvents(t, s, 100, 1, 1*time.Second) + must.Len(t, 2, events) + + var allocEvent *structs.AllocationEvent + for _, e := range events { + must.Eq(t, structs.TypePlanResult, e.Type) + if e.Topic == structs.TopicAllocation { + allocEvent = e.Payload.(*structs.AllocationEvent) + } + } + + must.NotNil(t, allocEvent) + must.True(t, allocEvent.Timeout) + must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.TimeoutReason) + must.Eq(t, structs.AllocDesiredStatusStop, allocEvent.Allocation.DesiredStatus) + must.Eq(t, structs.AllocClientStatusFailed, allocEvent.Allocation.ClientStatus) + must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.Allocation.DesiredDescription) } func TestEventsFromChanges_BatchNodeUpdateDrainRequestType(t *testing.T) { From 5fff8280523953ce4bd6791934d0dda3f2d60300 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:39:41 +0200 Subject: [PATCH 06/37] AllocTimeoutReasonMaxRunDuration --- nomad/batchtimeout/watcher.go | 4 +--- nomad/batchtimeout/watcher_test.go | 4 ++-- nomad/state/state_store_test.go | 2 +- nomad/structs/alloc.go | 4 ++++ nomad/structs/event.go | 8 ++++++++ 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/nomad/batchtimeout/watcher.go b/nomad/batchtimeout/watcher.go index 8dc907e43ac..69892e26c6f 100644 --- a/nomad/batchtimeout/watcher.go +++ b/nomad/batchtimeout/watcher.go @@ -15,8 +15,6 @@ import ( const ( defaultScanInterval = 5 * time.Second - - timeoutDescription = "allocation exceeded max_run_duration" ) type RaftApplier interface { @@ -134,7 +132,7 @@ func (w *Watcher) scan(now time.Time) { stopped = append(stopped, &structs.AllocationDiff{ ID: alloc.ID, - DesiredDescription: timeoutDescription, + DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration, ClientStatus: structs.AllocClientStatusFailed, }) } diff --git a/nomad/batchtimeout/watcher_test.go b/nomad/batchtimeout/watcher_test.go index c73f410ac90..84cddfb1701 100644 --- a/nomad/batchtimeout/watcher_test.go +++ b/nomad/batchtimeout/watcher_test.go @@ -367,9 +367,9 @@ func TestWatcherScanAppliesTimedOutAllocs(t *testing.T) { must.MapContainsKey(t, got, sysbatchAlloc.ID) must.MapNotContainsKey(t, got, okAlloc.ID) - must.Eq(t, timeoutDescription, got[batchAlloc.ID].DesiredDescription) + must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, got[batchAlloc.ID].DesiredDescription) must.Eq(t, structs.AllocClientStatusFailed, got[batchAlloc.ID].ClientStatus) - must.Eq(t, timeoutDescription, got[sysbatchAlloc.ID].DesiredDescription) + must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, got[sysbatchAlloc.ID].DesiredDescription) must.Eq(t, structs.AllocClientStatusFailed, got[sysbatchAlloc.ID].ClientStatus) } diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 4db517a6e82..ba6ae7a26c1 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -177,7 +177,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) { timeoutStoppedAlloc.JobID = job.ID timeoutStoppedAllocDiff := &structs.AllocationDiff{ ID: timeoutStoppedAlloc.ID, - DesiredDescription: "allocation exceeded max_run_duration", + DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration, ClientStatus: structs.AllocClientStatusFailed, } preemptedAlloc := mock.Alloc() diff --git a/nomad/structs/alloc.go b/nomad/structs/alloc.go index fbc7777e776..56c4df7ae96 100644 --- a/nomad/structs/alloc.go +++ b/nomad/structs/alloc.go @@ -20,6 +20,10 @@ const ( AllocDesiredStatusRun = "run" // Allocation should run AllocDesiredStatusStop = "stop" // Allocation should stop AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted + + // AllocTimeoutReasonMaxRunDuration is the reason used when an allocation is + // stopped because it exceeded its configured max_run_duration. + AllocTimeoutReasonMaxRunDuration = "allocation exceeded max_run_duration" ) const ( diff --git a/nomad/structs/event.go b/nomad/structs/event.go index 2affe4da618..e1aaecc6118 100644 --- a/nomad/structs/event.go +++ b/nomad/structs/event.go @@ -141,6 +141,14 @@ type EvaluationEvent struct { // Allocs embedded Job has been removed to reduce size. type AllocationEvent struct { Allocation *Allocation + + // Timeout indicates the allocation was stopped because it exceeded its + // configured max_run_duration. + Timeout bool `json:",omitempty"` + + // TimeoutReason is a human-readable explanation for timeout-triggered + // allocation stops. + TimeoutReason string `json:",omitempty"` } // DeploymentEvent holds a newly updated Deployment. From 8fbfff316ae965dd1fea0acd5ad59574dbb4db40 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:44:46 +0200 Subject: [PATCH 07/37] batch timeout watcher improvements --- nomad/batchtimeout/watcher.go | 63 +++++++++++++++++++++--------- nomad/batchtimeout/watcher_test.go | 8 ++-- 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/nomad/batchtimeout/watcher.go b/nomad/batchtimeout/watcher.go index 69892e26c6f..719a137cc13 100644 --- a/nomad/batchtimeout/watcher.go +++ b/nomad/batchtimeout/watcher.go @@ -21,6 +21,11 @@ type RaftApplier interface { ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) } +// Watcher scans running batch and sysbatch allocations for max_run_duration +// violations and stops expired allocations via plan results. +// +// The watcher is leader-managed. The enabled flag tracks desired state, while +// running indicates whether the background watch loop is currently active. type Watcher struct { logger log.Logger raft RaftApplier @@ -45,6 +50,8 @@ func NewWatcher(logger log.Logger, raft RaftApplier, scanInterval time.Duration) } } +// SetEnabled updates the desired watcher state and starts or stops the +// background loop as needed. func (w *Watcher) SetEnabled(enabled bool, st *state.StateStore) { w.mu.Lock() defer w.mu.Unlock() @@ -54,17 +61,25 @@ func (w *Watcher) SetEnabled(enabled bool, st *state.StateStore) { } if enabled { - w.enabled = true - if w.running || w.state == nil { - return - } + w.enableLocked() + return + } + + w.disableLocked() +} - w.stopCh = make(chan struct{}) - w.running = true - go w.watch(w.stopCh) +func (w *Watcher) enableLocked() { + w.enabled = true + if w.running || w.state == nil { return } + w.stopCh = make(chan struct{}) + w.running = true + go w.watch(w.stopCh) +} + +func (w *Watcher) disableLocked() { w.enabled = false if !w.running { return @@ -81,6 +96,8 @@ func (w *Watcher) watch(stopCh <-chan struct{}) { defer w.markStopped(stopCh) for { + // Scan immediately so leadership changes do not wait a full interval + // before enforcing timeouts. w.scan(time.Now().UTC()) select { @@ -97,16 +114,14 @@ func (w *Watcher) markStopped(stopCh <-chan struct{}) { if w.stopCh == stopCh { w.stopCh = nil + w.running = false } - w.running = false } +// scan evaluates all allocations in state and stops any that have exceeded +// max_run_duration. func (w *Watcher) scan(now time.Time) { - w.mu.Lock() - st := w.state - enabled := w.enabled - w.mu.Unlock() - + st, enabled := w.snapshot() if !enabled || st == nil { return } @@ -154,35 +169,42 @@ func (w *Watcher) scan(now time.Time) { w.logger.Debug("stopped timed out allocations", "count", len(stopped)) } +func (w *Watcher) snapshot() (*state.StateStore, bool) { + w.mu.Lock() + defer w.mu.Unlock() + + return w.state, w.enabled +} + func shouldStopAlloc(now time.Time, alloc *structs.Allocation) bool { + // Allocation must exist and still be actively running. if alloc == nil || alloc.Job == nil { return false } - if alloc.DesiredStatus != structs.AllocDesiredStatusRun { return false } - if alloc.ClientStatus != structs.AllocClientStatusRunning { return false } - if alloc.ClientTerminalStatus() || alloc.ServerTerminalStatus() { return false } + // Job and task group must opt into max_run_duration and support timeout + // enforcement. tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) if tg == nil || tg.MaxRunDuration == nil || *tg.MaxRunDuration <= 0 { return false } - switch alloc.Job.Type { case structs.JobTypeBatch, structs.JobTypeSysBatch: default: return false } - startedAt, ok := allocRunningSince(alloc) + // Allocation must be fully running and past its deadline. + startedAt, ok := allocFullyRunningSince(alloc) if !ok { return false } @@ -190,7 +212,10 @@ func shouldStopAlloc(now time.Time, alloc *structs.Allocation) bool { return !startedAt.Add(*tg.MaxRunDuration).After(now) } -func allocRunningSince(alloc *structs.Allocation) (time.Time, bool) { +// allocFullyRunningSince returns the latest StartedAt timestamp across all task +// states, but only when every known task state is running with a non-zero start +// time. +func allocFullyRunningSince(alloc *structs.Allocation) (time.Time, bool) { var latest time.Time for _, ts := range alloc.TaskStates { diff --git a/nomad/batchtimeout/watcher_test.go b/nomad/batchtimeout/watcher_test.go index 84cddfb1701..8eed4c651d6 100644 --- a/nomad/batchtimeout/watcher_test.go +++ b/nomad/batchtimeout/watcher_test.go @@ -235,7 +235,7 @@ func TestAllocRunningSince(t *testing.T) { }, } - startedAt, ok := allocRunningSince(alloc) + startedAt, ok := allocFullyRunningSince(alloc) must.True(t, ok) must.Eq(t, now.Add(-5*time.Minute), startedAt) }) @@ -244,7 +244,7 @@ func TestAllocRunningSince(t *testing.T) { alloc := mock.Alloc() alloc.TaskStates = nil - _, ok := allocRunningSince(alloc) + _, ok := allocFullyRunningSince(alloc) must.False(t, ok) }) @@ -260,7 +260,7 @@ func TestAllocRunningSince(t *testing.T) { }, } - _, ok := allocRunningSince(alloc) + _, ok := allocFullyRunningSince(alloc) must.False(t, ok) }) @@ -272,7 +272,7 @@ func TestAllocRunningSince(t *testing.T) { }, } - _, ok := allocRunningSince(alloc) + _, ok := allocFullyRunningSince(alloc) must.False(t, ok) }) } From 697c5c34d06c13b51efeadb53e986071961c35d8 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:46:09 +0200 Subject: [PATCH 08/37] added batchtimeout package to test-core.json --- ci/test-core.json | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/test-core.json b/ci/test-core.json index 4756a5ed233..8e443e94180 100644 --- a/ci/test-core.json +++ b/ci/test-core.json @@ -34,6 +34,7 @@ "jobspec2/...", "lib/...", "nomad/auth/...", + "nomad/batchtimeout/...", "nomad/deploymentwatcher/...", "nomad/drainer/...", "nomad/reporting/...", From a31525ad2c7f9fff6c4723b445050a9d1ab32ed7 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Wed, 8 Apr 2026 08:46:35 +0200 Subject: [PATCH 09/37] move to allocrunner --- client/allocrunner/alloc_runner.go | 88 +++++- client/allocrunner/alloc_runner_test.go | 44 +++ client/allocrunner/state/state.go | 16 +- nomad/batch_timeout_shims.go | 20 -- nomad/batch_timeout_test.go | 120 -------- nomad/batchtimeout/watcher.go | 235 -------------- nomad/batchtimeout/watcher_test.go | 387 ------------------------ nomad/leader.go | 3 - nomad/server.go | 13 - nomad/state/events.go | 5 +- nomad/state/events_test.go | 9 +- nomad/state/state_store.go | 4 +- nomad/structs/alloc.go | 81 +++++ 13 files changed, 233 insertions(+), 792 deletions(-) delete mode 100644 nomad/batch_timeout_shims.go delete mode 100644 nomad/batch_timeout_test.go delete mode 100644 nomad/batchtimeout/watcher.go delete mode 100644 nomad/batchtimeout/watcher_test.go diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index e0cb0d7daa8..8ac89516edf 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -131,6 +131,10 @@ type allocRunner struct { // and serializes Shutdown/Destroy calls. destroyedLock sync.Mutex + // maxRunTimer is used to enforce task group max_run_duration locally. + maxRunTimer *time.Timer + maxRunTimerLock sync.Mutex + // Alloc captures the allocation being run. alloc *structs.Allocation allocLock sync.RWMutex @@ -372,6 +376,7 @@ func (ar *allocRunner) WaitCh() <-chan struct{} { func (ar *allocRunner) Run() { // Close the wait channel on return defer close(ar.waitCh) + defer ar.stopMaxRunTimer() // Start the task state update handler go ar.handleTaskStateUpdates() @@ -472,6 +477,8 @@ func (ar *allocRunner) GetAllocDir() allocdir.Interface { // Restore state from database. Must be called after NewAllocRunner but before // Run. func (ar *allocRunner) Restore() error { + defer ar.resetMaxRunTimer() + // Retrieve deployment status to avoid reseting it across agent // restarts. Once a deployment status is set Nomad no longer monitors // alloc health, so we must persist deployment state across restarts. @@ -689,6 +696,8 @@ func (ar *allocRunner) handleTaskStateUpdates() { // Get the client allocation calloc := ar.clientAlloc(states) + ar.resetMaxRunTimerWithAlloc(calloc) + // Update the server ar.stateUpdater.AllocStateUpdated(calloc) @@ -845,7 +854,10 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st } // Compute the ClientStatus - if ar.state.ClientStatus != "" { + if ar.state.MaxRunDurationExceeded { + a.ClientStatus = structs.AllocClientStatusFailed + a.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration + } else if ar.state.ClientStatus != "" { // The client status is being forced a.ClientStatus, a.ClientDescription = ar.state.ClientStatus, ar.state.ClientDescription } else { @@ -1056,6 +1068,7 @@ func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) { // Update ar.alloc ar.setAlloc(update) + ar.resetMaxRunTimer() // Run update hooks if not stopping or dead if !update.TerminalStatus() { @@ -1081,6 +1094,79 @@ func (ar *allocRunner) Listener() *cstructs.AllocListener { return ar.allocBroadcaster.Listen() } +func (ar *allocRunner) resetMaxRunTimer() { + ar.maxRunTimerLock.Lock() + defer ar.maxRunTimerLock.Unlock() + + if ar.maxRunTimer != nil { + ar.maxRunTimer.Stop() + ar.maxRunTimer = nil + } +} + +func (ar *allocRunner) resetMaxRunTimerWithAlloc(alloc *structs.Allocation) { + ar.resetMaxRunTimer() + + if alloc == nil || alloc.TerminalStatus() || alloc.DesiredStatus != structs.AllocDesiredStatusRun { + return + } + + deadline, ok := alloc.MaxRunDurationDeadline() + if !ok { + return + } + + remaining := time.Until(deadline) + if remaining <= 0 { + go ar.enforceMaxRunDuration() + return + } + + timer := time.NewTimer(remaining) + + ar.maxRunTimerLock.Lock() + ar.maxRunTimer = timer + ar.maxRunTimerLock.Unlock() + + go func(t *time.Timer) { + select { + case <-t.C: + ar.enforceMaxRunDuration() + case <-ar.waitCh: + if !t.Stop() { + select { + case <-t.C: + default: + } + } + } + }(timer) +} + +func (ar *allocRunner) stopMaxRunTimer() { + ar.resetMaxRunTimer() +} + +func (ar *allocRunner) enforceMaxRunDuration() { + if ar.isShuttingDown() { + return + } + + alloc := ar.Alloc() + if alloc == nil || !alloc.MaxRunDurationExpired(time.Now().UTC()) { + return + } + + ar.stateLock.Lock() + ar.state.MaxRunDurationExceeded = true + ar.state.ClientStatus = structs.AllocClientStatusFailed + ar.state.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration + ar.stateLock.Unlock() + + ar.logger.Debug("allocation exceeded max_run_duration, shutting down tasks") + ar.Shutdown() +} + func (ar *allocRunner) destroyImpl() { // Stop any running tasks and persist states in case the client is // shutdown before Destroy finishes. diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 794923d4819..aa59adc8ccb 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -2676,6 +2676,50 @@ func TestAllocRunner_GetUpdatePriority(t *testing.T) { must.Eq(t, cstructs.AllocUpdatePriorityUrgent, ar.GetUpdatePriority(calloc)) } +func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + task.Driver = "mock_driver" + task.Config = map[string]interface{}{ + "run_for": "10s", + } + task.KillTimeout = 10 * time.Millisecond + maxRunDuration := 50 * time.Millisecond + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + conf, cleanup := testAllocRunnerConfig(t, alloc) + defer cleanup() + + arIface, err := NewAllocRunner(conf) + must.NoError(t, err) + ar := arIface.(*allocRunner) + + ar.setAlloc(ar.Alloc().Copy()) + ar.alloc.Job.TaskGroups[0].MaxRunDuration = nil + + go ar.Run() + defer destroy(ar) + + testutil.WaitForResult(func() (bool, error) { + state := ar.AllocState() + if state == nil { + return false, fmt.Errorf("No alloc state") + } + if state.ClientStatus != structs.AllocClientStatusFailed { + return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusFailed) + } + if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration { + return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration) + } + return true, nil + }, func(err error) { + state := ar.AllocState() + t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement: %v; state=%#v", err, state) + }) +} + func TestAllocRunner_setHookStatsHandler(t *testing.T) { ci.Parallel(t) diff --git a/client/allocrunner/state/state.go b/client/allocrunner/state/state.go index 54954711907..89326eddc9b 100644 --- a/client/allocrunner/state/state.go +++ b/client/allocrunner/state/state.go @@ -19,6 +19,11 @@ type State struct { // allocations client state ClientDescription string + // MaxRunDurationExceeded indicates the allocation exceeded its configured + // max_run_duration and should be reported as failed regardless of task exit + // status. + MaxRunDurationExceeded bool + // DeploymentStatus captures the status of the deployment DeploymentStatus *structs.AllocDeploymentStatus @@ -60,11 +65,12 @@ func (s *State) Copy() *State { taskStates[k] = v.Copy() } return &State{ - ClientStatus: s.ClientStatus, - ClientDescription: s.ClientDescription, - DeploymentStatus: s.DeploymentStatus.Copy(), - TaskStates: taskStates, - NetworkStatus: s.NetworkStatus.Copy(), + ClientStatus: s.ClientStatus, + ClientDescription: s.ClientDescription, + MaxRunDurationExceeded: s.MaxRunDurationExceeded, + DeploymentStatus: s.DeploymentStatus.Copy(), + TaskStates: taskStates, + NetworkStatus: s.NetworkStatus.Copy(), } } diff --git a/nomad/batch_timeout_shims.go b/nomad/batch_timeout_shims.go deleted file mode 100644 index 82e7520fdca..00000000000 --- a/nomad/batch_timeout_shims.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright IBM Corp. 2015, 2025 -// SPDX-License-Identifier: BUSL-1.1 - -package nomad - -import ( - "github.com/hashicorp/nomad/nomad/batchtimeout" - "github.com/hashicorp/nomad/nomad/structs" -) - -type batchTimeoutRaftShim struct { - s *Server -} - -var _ batchtimeout.RaftApplier = batchTimeoutRaftShim{} - -func (b batchTimeoutRaftShim) ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) { - _, index, err := b.s.raftApply(structs.ApplyPlanResultsRequestType, req) - return index, err -} diff --git a/nomad/batch_timeout_test.go b/nomad/batch_timeout_test.go deleted file mode 100644 index 61e1ef99f1e..00000000000 --- a/nomad/batch_timeout_test.go +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright IBM Corp. 2015, 2025 -// SPDX-License-Identifier: BUSL-1.1 - -package nomad - -import ( - "testing" - "time" - - "github.com/hashicorp/nomad/helper/pointer" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/hashicorp/nomad/testutil" - "github.com/shoenig/test/must" -) - -func Test_BatchTimeoutWatcher_StopsExpiredBatchAlloc(t *testing.T) { - t.Parallel() - - srv, cleanup := TestServer(t, nil) - t.Cleanup(cleanup) - - testutil.WaitForLeader(t, srv.RPC) - - job := mock.Job() - job.Type = structs.JobTypeBatch - job.TaskGroups[0].MaxRunDuration = pointer.Of(50 * time.Millisecond) - job.Canonicalize() - - alloc := mock.Alloc() - alloc.Job = job - alloc.JobID = job.ID - alloc.Namespace = job.Namespace - alloc.TaskGroup = job.TaskGroups[0].Name - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.TaskStates = map[string]*structs.TaskState{ - job.TaskGroups[0].Tasks[0].Name: { - State: structs.TaskStateRunning, - StartedAt: time.Now().UTC().Add(-1 * time.Second), - }, - } - - state := srv.fsm.State() - must.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(job.ID))) - must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 999, nil, job)) - must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc})) - - testutil.WaitForResult(func() (bool, error) { - out, err := state.AllocByID(nil, alloc.ID) - if err != nil { - return false, err - } - if out == nil { - return false, nil - } - - return out.DesiredStatus == structs.AllocDesiredStatusStop && - out.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration && - out.ClientStatus == structs.AllocClientStatusFailed, nil - }, func(err error) { - out, lookupErr := state.AllocByID(nil, alloc.ID) - if lookupErr != nil { - t.Fatalf("failed to lookup alloc after waiting: %v", lookupErr) - } - t.Fatalf("timed out waiting for batch timeout watcher to stop alloc: %v; alloc=%#v", err, out) - }) -} - -func Test_BatchTimeoutWatcher_StopsExpiredSysBatchAlloc(t *testing.T) { - t.Parallel() - - srv, cleanup := TestServer(t, nil) - t.Cleanup(cleanup) - - testutil.WaitForLeader(t, srv.RPC) - - job := mock.SystemBatchJob() - job.TaskGroups[0].MaxRunDuration = pointer.Of(50 * time.Millisecond) - job.Canonicalize() - - alloc := mock.SysBatchAlloc() - alloc.Job = job - alloc.JobID = job.ID - alloc.Namespace = job.Namespace - alloc.TaskGroup = job.TaskGroups[0].Name - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.TaskStates = map[string]*structs.TaskState{ - job.TaskGroups[0].Tasks[0].Name: { - State: structs.TaskStateRunning, - StartedAt: time.Now().UTC().Add(-1 * time.Second), - }, - } - - state := srv.fsm.State() - must.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(job.ID))) - must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 999, nil, job)) - must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc})) - - testutil.WaitForResult(func() (bool, error) { - out, err := state.AllocByID(nil, alloc.ID) - if err != nil { - return false, err - } - if out == nil { - return false, nil - } - - return out.DesiredStatus == structs.AllocDesiredStatusStop && - out.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration && - out.ClientStatus == structs.AllocClientStatusFailed, nil - }, func(err error) { - out, lookupErr := state.AllocByID(nil, alloc.ID) - if lookupErr != nil { - t.Fatalf("failed to lookup alloc after waiting: %v", lookupErr) - } - t.Fatalf("timed out waiting for batch timeout watcher to stop sysbatch alloc: %v; alloc=%#v", err, out) - }) -} diff --git a/nomad/batchtimeout/watcher.go b/nomad/batchtimeout/watcher.go deleted file mode 100644 index 719a137cc13..00000000000 --- a/nomad/batchtimeout/watcher.go +++ /dev/null @@ -1,235 +0,0 @@ -// Copyright IBM Corp. 2015, 2025 -// SPDX-License-Identifier: BUSL-1.1 - -package batchtimeout - -import ( - "sync" - "time" - - log "github.com/hashicorp/go-hclog" - - "github.com/hashicorp/nomad/nomad/state" - "github.com/hashicorp/nomad/nomad/structs" -) - -const ( - defaultScanInterval = 5 * time.Second -) - -type RaftApplier interface { - ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) -} - -// Watcher scans running batch and sysbatch allocations for max_run_duration -// violations and stops expired allocations via plan results. -// -// The watcher is leader-managed. The enabled flag tracks desired state, while -// running indicates whether the background watch loop is currently active. -type Watcher struct { - logger log.Logger - raft RaftApplier - state *state.StateStore - scanInterval time.Duration - - mu sync.Mutex - enabled bool - stopCh chan struct{} - running bool -} - -func NewWatcher(logger log.Logger, raft RaftApplier, scanInterval time.Duration) *Watcher { - if scanInterval <= 0 { - scanInterval = defaultScanInterval - } - - return &Watcher{ - logger: logger.Named("batch_timeout_watcher"), - raft: raft, - scanInterval: scanInterval, - } -} - -// SetEnabled updates the desired watcher state and starts or stops the -// background loop as needed. -func (w *Watcher) SetEnabled(enabled bool, st *state.StateStore) { - w.mu.Lock() - defer w.mu.Unlock() - - if st != nil { - w.state = st - } - - if enabled { - w.enableLocked() - return - } - - w.disableLocked() -} - -func (w *Watcher) enableLocked() { - w.enabled = true - if w.running || w.state == nil { - return - } - - w.stopCh = make(chan struct{}) - w.running = true - go w.watch(w.stopCh) -} - -func (w *Watcher) disableLocked() { - w.enabled = false - if !w.running { - return - } - - close(w.stopCh) - w.stopCh = nil - w.running = false -} - -func (w *Watcher) watch(stopCh <-chan struct{}) { - ticker := time.NewTicker(w.scanInterval) - defer ticker.Stop() - defer w.markStopped(stopCh) - - for { - // Scan immediately so leadership changes do not wait a full interval - // before enforcing timeouts. - w.scan(time.Now().UTC()) - - select { - case <-ticker.C: - case <-stopCh: - return - } - } -} - -func (w *Watcher) markStopped(stopCh <-chan struct{}) { - w.mu.Lock() - defer w.mu.Unlock() - - if w.stopCh == stopCh { - w.stopCh = nil - w.running = false - } -} - -// scan evaluates all allocations in state and stops any that have exceeded -// max_run_duration. -func (w *Watcher) scan(now time.Time) { - st, enabled := w.snapshot() - if !enabled || st == nil { - return - } - - iter, err := st.Allocs(nil, state.SortDefault) - if err != nil { - w.logger.Warn("failed to iterate allocations", "error", err) - return - } - - var stopped []*structs.AllocationDiff - - for { - raw := iter.Next() - if raw == nil { - break - } - - alloc := raw.(*structs.Allocation) - if !shouldStopAlloc(now, alloc) { - continue - } - - stopped = append(stopped, &structs.AllocationDiff{ - ID: alloc.ID, - DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration, - ClientStatus: structs.AllocClientStatusFailed, - }) - } - - if len(stopped) == 0 { - return - } - - req := &structs.ApplyPlanResultsRequest{ - AllocsStopped: stopped, - UpdatedAt: now.UnixNano(), - } - - if _, err := w.raft.ApplyPlanResults(req); err != nil { - w.logger.Warn("failed to stop timed out allocations", "error", err, "count", len(stopped)) - return - } - - w.logger.Debug("stopped timed out allocations", "count", len(stopped)) -} - -func (w *Watcher) snapshot() (*state.StateStore, bool) { - w.mu.Lock() - defer w.mu.Unlock() - - return w.state, w.enabled -} - -func shouldStopAlloc(now time.Time, alloc *structs.Allocation) bool { - // Allocation must exist and still be actively running. - if alloc == nil || alloc.Job == nil { - return false - } - if alloc.DesiredStatus != structs.AllocDesiredStatusRun { - return false - } - if alloc.ClientStatus != structs.AllocClientStatusRunning { - return false - } - if alloc.ClientTerminalStatus() || alloc.ServerTerminalStatus() { - return false - } - - // Job and task group must opt into max_run_duration and support timeout - // enforcement. - tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) - if tg == nil || tg.MaxRunDuration == nil || *tg.MaxRunDuration <= 0 { - return false - } - switch alloc.Job.Type { - case structs.JobTypeBatch, structs.JobTypeSysBatch: - default: - return false - } - - // Allocation must be fully running and past its deadline. - startedAt, ok := allocFullyRunningSince(alloc) - if !ok { - return false - } - - return !startedAt.Add(*tg.MaxRunDuration).After(now) -} - -// allocFullyRunningSince returns the latest StartedAt timestamp across all task -// states, but only when every known task state is running with a non-zero start -// time. -func allocFullyRunningSince(alloc *structs.Allocation) (time.Time, bool) { - var latest time.Time - - for _, ts := range alloc.TaskStates { - if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() { - return time.Time{}, false - } - if ts.StartedAt.After(latest) { - latest = ts.StartedAt - } - } - - if latest.IsZero() { - return time.Time{}, false - } - - return latest, true -} diff --git a/nomad/batchtimeout/watcher_test.go b/nomad/batchtimeout/watcher_test.go deleted file mode 100644 index 8eed4c651d6..00000000000 --- a/nomad/batchtimeout/watcher_test.go +++ /dev/null @@ -1,387 +0,0 @@ -// Copyright IBM Corp. 2015, 2025 -// SPDX-License-Identifier: BUSL-1.1 - -package batchtimeout - -import ( - "sync" - "testing" - "time" - - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/helper/pointer" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/state" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/shoenig/test/must" -) - -func TestShouldStopAlloc(t *testing.T) { - t.Parallel() - - now := time.Now().UTC() - - cases := []struct { - name string - allocFn func() *structs.Allocation - expected bool - }{ - { - name: "batch alloc times out after max run duration", - allocFn: func() *structs.Allocation { - alloc := mock.Alloc() - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-10 * time.Minute), - }, - } - return alloc - }, - expected: true, - }, - { - name: "sysbatch alloc times out after max run duration", - allocFn: func() *structs.Allocation { - alloc := mock.SysBatchAlloc() - alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.TaskStates = map[string]*structs.TaskState{ - "ping-example": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-10 * time.Minute), - }, - } - return alloc - }, - expected: true, - }, - { - name: "alloc without max run duration does not time out", - allocFn: func() *structs.Allocation { - alloc := mock.Alloc() - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = nil - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-10 * time.Minute), - }, - } - return alloc - }, - expected: false, - }, - { - name: "service alloc does not time out", - allocFn: func() *structs.Allocation { - alloc := mock.Alloc() - alloc.Job.Type = structs.JobTypeService - alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-10 * time.Minute), - }, - } - return alloc - }, - expected: false, - }, - { - name: "pending alloc does not time out", - allocFn: func() *structs.Allocation { - alloc := mock.Alloc() - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) - alloc.ClientStatus = structs.AllocClientStatusPending - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-10 * time.Minute), - }, - } - return alloc - }, - expected: false, - }, - { - name: "stopping alloc does not time out again", - allocFn: func() *structs.Allocation { - alloc := mock.Alloc() - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.DesiredStatus = structs.AllocDesiredStatusStop - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-10 * time.Minute), - }, - } - return alloc - }, - expected: false, - }, - { - name: "alloc within max run duration does not time out", - allocFn: func() *structs.Allocation { - alloc := mock.Alloc() - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(15 * time.Minute) - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-10 * time.Minute), - }, - } - return alloc - }, - expected: false, - }, - { - name: "alloc with no running task state does not time out", - allocFn: func() *structs.Allocation { - alloc := mock.Alloc() - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStatePending, - StartedAt: now.Add(-10 * time.Minute), - }, - } - return alloc - }, - expected: false, - }, - { - name: "alloc with zero start time does not time out", - allocFn: func() *structs.Allocation { - alloc := mock.Alloc() - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - }, - } - return alloc - }, - expected: false, - }, - { - name: "alloc with mixed task states does not time out until all tasks are running", - allocFn: func() *structs.Allocation { - alloc := mock.Alloc() - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, &structs.Task{Name: "sidecar"}) - alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.DesiredStatus = structs.AllocDesiredStatusRun - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-10 * time.Minute), - }, - "sidecar": { - State: structs.TaskStatePending, - }, - } - return alloc - }, - expected: false, - }, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - must.Eq(t, tc.expected, shouldStopAlloc(now, tc.allocFn())) - }) - } -} - -func TestAllocRunningSince(t *testing.T) { - t.Parallel() - - now := time.Now().UTC() - - t.Run("returns latest running task start time", func(t *testing.T) { - alloc := mock.Alloc() - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-10 * time.Minute), - }, - "sidecar": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-5 * time.Minute), - }, - } - - startedAt, ok := allocFullyRunningSince(alloc) - must.True(t, ok) - must.Eq(t, now.Add(-5*time.Minute), startedAt) - }) - - t.Run("returns false when task states are missing", func(t *testing.T) { - alloc := mock.Alloc() - alloc.TaskStates = nil - - _, ok := allocFullyRunningSince(alloc) - must.False(t, ok) - }) - - t.Run("returns false when any task is not running", func(t *testing.T) { - alloc := mock.Alloc() - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: now.Add(-10 * time.Minute), - }, - "sidecar": { - State: structs.TaskStatePending, - }, - } - - _, ok := allocFullyRunningSince(alloc) - must.False(t, ok) - }) - - t.Run("returns false when any running task has zero start time", func(t *testing.T) { - alloc := mock.Alloc() - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - }, - } - - _, ok := allocFullyRunningSince(alloc) - must.False(t, ok) - }) -} - -func TestWatcherSetEnabled(t *testing.T) { - t.Parallel() - - raft := &mockRaftApplier{} - w := NewWatcher(hclog.NewNullLogger(), raft, time.Hour) - - st := state.TestStateStore(t) - - w.SetEnabled(true, st) - must.True(t, w.enabled) - must.True(t, w.running) - must.NotNil(t, w.stopCh) - - firstStopCh := w.stopCh - - w.SetEnabled(true, st) - must.True(t, w.running) - must.Eq(t, firstStopCh, w.stopCh) - - w.SetEnabled(false, st) - must.False(t, w.enabled) -} - -func TestWatcherScanAppliesTimedOutAllocs(t *testing.T) { - t.Parallel() - - st := state.TestStateStore(t) - raft := &mockRaftApplier{} - w := NewWatcher(hclog.NewNullLogger(), raft, time.Hour) - w.state = st - w.enabled = true - - batchAlloc := mock.Alloc() - batchAlloc.Job.Type = structs.JobTypeBatch - batchAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) - batchAlloc.ClientStatus = structs.AllocClientStatusRunning - batchAlloc.DesiredStatus = structs.AllocDesiredStatusRun - batchAlloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: time.Now().UTC().Add(-10 * time.Minute), - }, - } - - sysbatchAlloc := mock.SysBatchAlloc() - sysbatchAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute) - sysbatchAlloc.ClientStatus = structs.AllocClientStatusRunning - sysbatchAlloc.DesiredStatus = structs.AllocDesiredStatusRun - sysbatchAlloc.TaskStates = map[string]*structs.TaskState{ - "ping-example": { - State: structs.TaskStateRunning, - StartedAt: time.Now().UTC().Add(-10 * time.Minute), - }, - } - - okAlloc := mock.Alloc() - okAlloc.Job.Type = structs.JobTypeBatch - okAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(30 * time.Minute) - okAlloc.ClientStatus = structs.AllocClientStatusRunning - okAlloc.DesiredStatus = structs.AllocDesiredStatusRun - okAlloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: time.Now().UTC().Add(-10 * time.Minute), - }, - } - - must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 100, nil, batchAlloc.Job)) - must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 101, nil, sysbatchAlloc.Job)) - must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 102, nil, okAlloc.Job)) - must.NoError(t, st.UpsertAllocs(structs.MsgTypeTestSetup, 103, []*structs.Allocation{ - batchAlloc, - sysbatchAlloc, - okAlloc, - })) - - w.scan(time.Now().UTC()) - - must.NotNil(t, raft.lastReq) - must.SliceLen(t, 2, raft.lastReq.AllocsStopped) - - got := map[string]*structs.AllocationDiff{} - for _, diff := range raft.lastReq.AllocsStopped { - got[diff.ID] = diff - } - - must.MapContainsKey(t, got, batchAlloc.ID) - must.MapContainsKey(t, got, sysbatchAlloc.ID) - must.MapNotContainsKey(t, got, okAlloc.ID) - - must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, got[batchAlloc.ID].DesiredDescription) - must.Eq(t, structs.AllocClientStatusFailed, got[batchAlloc.ID].ClientStatus) - must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, got[sysbatchAlloc.ID].DesiredDescription) - must.Eq(t, structs.AllocClientStatusFailed, got[sysbatchAlloc.ID].ClientStatus) -} - -type mockRaftApplier struct { - mu sync.Mutex - lastReq *structs.ApplyPlanResultsRequest -} - -func (m *mockRaftApplier) ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) { - m.mu.Lock() - defer m.mu.Unlock() - - m.lastReq = req - return 1, nil -} diff --git a/nomad/leader.go b/nomad/leader.go index 1855fc2bda4..26a5f1d9521 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -412,9 +412,6 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error { // Enable the deployment watcher, since we are now the leader s.deploymentWatcher.SetEnabled(true, s.State()) - // Enable the batch timeout watcher, since we are now the leader - s.batchTimeoutWatcher.SetEnabled(true, s.State()) - // Enable the NodeDrainer s.nodeDrainer.SetEnabled(true, s.State()) diff --git a/nomad/server.go b/nomad/server.go index ba49483be79..c45047dffcb 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -43,7 +43,6 @@ import ( "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/nomad/lib/auth/oidc" "github.com/hashicorp/nomad/nomad/auth" - "github.com/hashicorp/nomad/nomad/batchtimeout" "github.com/hashicorp/nomad/nomad/deploymentwatcher" "github.com/hashicorp/nomad/nomad/drainer" "github.com/hashicorp/nomad/nomad/lock" @@ -230,10 +229,6 @@ type Server struct { // make the required calls to continue to transition the deployment. deploymentWatcher *deploymentwatcher.Watcher - // batchTimeoutWatcher is used to stop batch and sysbatch allocations that - // exceed their configured max_run_duration. - batchTimeoutWatcher *batchtimeout.Watcher - // nodeDrainer is used to drain allocations from nodes. nodeDrainer *drainer.NodeDrainer @@ -532,9 +527,6 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigFunc // updates when the processes SetEnabled is triggered. go s.evalBroker.enabledNotifier.Run() - // Setup the batch timeout watcher. - s.setupBatchTimeoutWatcher() - // Setup the node drainer. s.setupNodeDrainer() @@ -1192,11 +1184,6 @@ func (s *Server) setupVolumeWatcher() error { // setupNodeDrainer creates a node drainer which will be enabled when a server // becomes a leader. -func (s *Server) setupBatchTimeoutWatcher() { - shim := batchTimeoutRaftShim{s} - s.batchTimeoutWatcher = batchtimeout.NewWatcher(s.logger, shim, 0) -} - func (s *Server) setupNodeDrainer() { // Create a shim around Raft requests shim := drainerShim{s} diff --git a/nomad/state/events.go b/nomad/state/events.go index 0211885165d..3470e06bcf8 100644 --- a/nomad/state/events.go +++ b/nomad/state/events.go @@ -358,10 +358,9 @@ func eventFromChange(change memdb.Change) (structs.Event, bool) { alloc.Job = nil allocEvent := &structs.AllocationEvent{Allocation: alloc} - if alloc.DesiredStatus == structs.AllocDesiredStatusStop && - alloc.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration { + if alloc.ClientDescription == structs.AllocTimeoutReasonMaxRunDuration { allocEvent.Timeout = true - allocEvent.TimeoutReason = alloc.DesiredDescription + allocEvent.TimeoutReason = alloc.ClientDescription } return structs.Event{ diff --git a/nomad/state/events_test.go b/nomad/state/events_test.go index 73b9d1ed26f..57dad2710b2 100644 --- a/nomad/state/events_test.go +++ b/nomad/state/events_test.go @@ -557,15 +557,16 @@ func TestEventsFromChanges_ApplyPlanResultsRequestType_TimeoutStoppedAlloc(t *te stoppedAlloc := mock.Alloc() stoppedAlloc.Job = mockJob stoppedAlloc.JobID = mockJob.ID + stoppedAlloc.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration must.NoError(t, s.UpsertAllocs(structs.MsgTypeTestSetup, 10, []*structs.Allocation{stoppedAlloc})) msgType := structs.ApplyPlanResultsRequestType req := &structs.ApplyPlanResultsRequest{ AllocsStopped: []*structs.AllocationDiff{{ - ID: stoppedAlloc.ID, - DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration, - ClientStatus: structs.AllocClientStatusFailed, + ID: stoppedAlloc.ID, + ClientStatus: structs.AllocClientStatusFailed, + ClientDescription: structs.AllocTimeoutReasonMaxRunDuration, }}, Job: mockJob, } @@ -588,7 +589,7 @@ func TestEventsFromChanges_ApplyPlanResultsRequestType_TimeoutStoppedAlloc(t *te must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.TimeoutReason) must.Eq(t, structs.AllocDesiredStatusStop, allocEvent.Allocation.DesiredStatus) must.Eq(t, structs.AllocClientStatusFailed, allocEvent.Allocation.ClientStatus) - must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.Allocation.DesiredDescription) + must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.Allocation.ClientDescription) } func TestEventsFromChanges_BatchNodeUpdateDrainRequestType(t *testing.T) { diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 3c452d6f5c5..2232dd6ca82 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -4303,7 +4303,9 @@ func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation } default: alloc.ClientStatus = exist.ClientStatus - alloc.ClientDescription = exist.ClientDescription + if alloc.ClientDescription == "" { + alloc.ClientDescription = exist.ClientDescription + } } // The job has been denormalized so re-attach the original job diff --git a/nomad/structs/alloc.go b/nomad/structs/alloc.go index 56c4df7ae96..7af3e6683b8 100644 --- a/nomad/structs/alloc.go +++ b/nomad/structs/alloc.go @@ -361,6 +361,87 @@ func (a *Allocation) TerminalStatus() bool { return a.ServerTerminalStatus() || a.ClientTerminalStatus() } +// MaxRunDuration returns the configured max_run_duration for the allocation's +// task group, if any. +func (a *Allocation) MaxRunDuration() (time.Duration, bool) { + if a == nil || a.Job == nil { + return 0, false + } + + tg := a.Job.LookupTaskGroup(a.TaskGroup) + if tg == nil || tg.MaxRunDuration == nil || *tg.MaxRunDuration <= 0 { + return 0, false + } + + switch a.Job.Type { + case JobTypeBatch, JobTypeSysBatch: + return *tg.MaxRunDuration, true + default: + return 0, false + } +} + +// FullyRunningSince returns the latest StartedAt timestamp across all task +// states, but only when every known task state is running with a non-zero start +// time. +func (a *Allocation) FullyRunningSince() (time.Time, bool) { + if a == nil { + return time.Time{}, false + } + + var latest time.Time + + for _, ts := range a.TaskStates { + if ts == nil || ts.State != TaskStateRunning || ts.StartedAt.IsZero() { + return time.Time{}, false + } + if ts.StartedAt.After(latest) { + latest = ts.StartedAt + } + } + + if latest.IsZero() { + return time.Time{}, false + } + + return latest, true +} + +// MaxRunDurationDeadline returns the deadline at which the allocation should be +// considered timed out based on max_run_duration. +func (a *Allocation) MaxRunDurationDeadline() (time.Time, bool) { + maxRunDuration, ok := a.MaxRunDuration() + if !ok { + return time.Time{}, false + } + + startedAt, ok := a.FullyRunningSince() + if !ok { + return time.Time{}, false + } + + return startedAt.Add(maxRunDuration), true +} + +// MaxRunDurationExpired returns whether the allocation has exceeded its +// configured max_run_duration. +func (a *Allocation) MaxRunDurationExpired(now time.Time) bool { + if a == nil || a.DesiredStatus != AllocDesiredStatusRun || a.ClientStatus != AllocClientStatusRunning { + return false + } + + if a.ClientTerminalStatus() || a.ServerTerminalStatus() { + return false + } + + deadline, ok := a.MaxRunDurationDeadline() + if !ok { + return false + } + + return !deadline.After(now) +} + // ServerTerminalStatus returns true if the desired state of the allocation is terminal func (a *Allocation) ServerTerminalStatus() bool { switch a.DesiredStatus { From 2a7fc3bdeeab6672c124b39eccfe36279b932733 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Wed, 8 Apr 2026 08:47:02 +0200 Subject: [PATCH 10/37] remove nomad/batchtimeout --- ci/test-core.json | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/test-core.json b/ci/test-core.json index 8e443e94180..4756a5ed233 100644 --- a/ci/test-core.json +++ b/ci/test-core.json @@ -34,7 +34,6 @@ "jobspec2/...", "lib/...", "nomad/auth/...", - "nomad/batchtimeout/...", "nomad/deploymentwatcher/...", "nomad/drainer/...", "nomad/reporting/...", From e9d30e78506fb001a05a2a6aac546b5bb2f5a96c Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Wed, 8 Apr 2026 17:05:23 +0200 Subject: [PATCH 11/37] timer tweaks --- client/allocrunner/alloc_runner.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 8ac89516edf..8e973b06a4e 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -1105,7 +1105,12 @@ func (ar *allocRunner) resetMaxRunTimer() { } func (ar *allocRunner) resetMaxRunTimerWithAlloc(alloc *structs.Allocation) { - ar.resetMaxRunTimer() + ar.maxRunTimerLock.Lock() + if ar.maxRunTimer != nil { + ar.maxRunTimer.Stop() + ar.maxRunTimer = nil + } + ar.maxRunTimerLock.Unlock() if alloc == nil || alloc.TerminalStatus() || alloc.DesiredStatus != structs.AllocDesiredStatusRun { return @@ -1131,8 +1136,19 @@ func (ar *allocRunner) resetMaxRunTimerWithAlloc(alloc *structs.Allocation) { go func(t *time.Timer) { select { case <-t.C: + ar.maxRunTimerLock.Lock() + if ar.maxRunTimer != t { + ar.maxRunTimerLock.Unlock() + return + } + ar.maxRunTimer = nil + ar.maxRunTimerLock.Unlock() + ar.enforceMaxRunDuration() case <-ar.waitCh: + ar.maxRunTimerLock.Lock() + ar.maxRunTimerLock.Unlock() + if !t.Stop() { select { case <-t.C: @@ -1159,7 +1175,7 @@ func (ar *allocRunner) enforceMaxRunDuration() { ar.stateLock.Lock() ar.state.MaxRunDurationExceeded = true - ar.state.ClientStatus = structs.AllocClientStatusFailed + ar.state.ClientStatus = structs.AllocClientStatusComplete ar.state.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration ar.stateLock.Unlock() From d560d47b1c3158cbc23f709db4a880595eeea558 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Wed, 8 Apr 2026 18:43:24 +0200 Subject: [PATCH 12/37] max_run_duration_hook --- client/allocrunner/alloc_runner.go | 135 ++++------ client/allocrunner/alloc_runner_hooks.go | 6 +- client/allocrunner/alloc_runner_test.go | 63 ++++- .../interfaces/runner_lifecycle.go | 12 +- client/allocrunner/max_run_duration_hook.go | 246 ++++++++++++++++++ client/allocrunner/state/state.go | 4 +- command/agent/job_endpoint.go | 4 + nomad/structs/structs.go | 4 + 8 files changed, 382 insertions(+), 92 deletions(-) create mode 100644 client/allocrunner/max_run_duration_hook.go diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 8e973b06a4e..f86b11fda39 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -131,10 +131,6 @@ type allocRunner struct { // and serializes Shutdown/Destroy calls. destroyedLock sync.Mutex - // maxRunTimer is used to enforce task group max_run_duration locally. - maxRunTimer *time.Timer - maxRunTimerLock sync.Mutex - // Alloc captures the allocation being run. alloc *structs.Allocation allocLock sync.RWMutex @@ -376,7 +372,6 @@ func (ar *allocRunner) WaitCh() <-chan struct{} { func (ar *allocRunner) Run() { // Close the wait channel on return defer close(ar.waitCh) - defer ar.stopMaxRunTimer() // Start the task state update handler go ar.handleTaskStateUpdates() @@ -477,8 +472,6 @@ func (ar *allocRunner) GetAllocDir() allocdir.Interface { // Restore state from database. Must be called after NewAllocRunner but before // Run. func (ar *allocRunner) Restore() error { - defer ar.resetMaxRunTimer() - // Retrieve deployment status to avoid reseting it across agent // restarts. Once a deployment status is set Nomad no longer monitors // alloc health, so we must persist deployment state across restarts. @@ -696,7 +689,28 @@ func (ar *allocRunner) handleTaskStateUpdates() { // Get the client allocation calloc := ar.clientAlloc(states) - ar.resetMaxRunTimerWithAlloc(calloc) + hookAlloc := calloc + if hookAlloc != nil { + hookAlloc = hookAlloc.Copy() + serverAlloc := ar.Alloc() + hookAlloc.Job = serverAlloc.Job + hookAlloc.TaskGroup = serverAlloc.TaskGroup + hookAlloc.DesiredStatus = serverAlloc.DesiredStatus + } + + req := &interfaces.RunnerUpdateRequest{ + Alloc: hookAlloc, + TaskStates: states, + } + for _, hook := range ar.runnerHooks { + h, ok := hook.(interfaces.RunnerTaskStateHook) + if !ok { + continue + } + if err := h.TaskStateUpdated(req); err != nil { + ar.logger.Error("error running task state hook", "hook", h.Name(), "error", err) + } + } // Update the server ar.stateUpdater.AllocStateUpdated(calloc) @@ -746,8 +760,14 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState { return nil } - return structs.NewTaskEvent(structs.TaskKilling). + event := structs.NewTaskEvent(structs.TaskKilling). SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout) + + if ar.state.MaxRunDurationExceeded { + event.SetDisplayMessage(structs.AllocTimeoutReasonMaxRunDuration) + } + + return event } // Kill leader first, synchronously @@ -855,7 +875,7 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st // Compute the ClientStatus if ar.state.MaxRunDurationExceeded { - a.ClientStatus = structs.AllocClientStatusFailed + a.ClientStatus = structs.AllocClientStatusComplete a.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration } else if ar.state.ClientStatus != "" { // The client status is being forced @@ -1063,12 +1083,25 @@ func (ar *allocRunner) handleAllocUpdates() { // If there is already a pending update it will be discarded and replaced by // the latest update. func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) { + current := ar.Alloc() + // Detect Stop updates - stopping := !ar.Alloc().TerminalStatus() && update.TerminalStatus() + stopping := !current.TerminalStatus() && update.TerminalStatus() + + if update.Job == nil && current != nil && current.Job != nil { + update = update.Copy() + update.Job = current.Job.Copy() + if update.TaskGroup == "" { + update.TaskGroup = current.TaskGroup + } + if update.DesiredStatus == "" { + update.DesiredStatus = current.DesiredStatus + } + + } // Update ar.alloc ar.setAlloc(update) - ar.resetMaxRunTimer() // Run update hooks if not stopping or dead if !update.TerminalStatus() { @@ -1094,82 +1127,16 @@ func (ar *allocRunner) Listener() *cstructs.AllocListener { return ar.allocBroadcaster.Listen() } -func (ar *allocRunner) resetMaxRunTimer() { - ar.maxRunTimerLock.Lock() - defer ar.maxRunTimerLock.Unlock() - - if ar.maxRunTimer != nil { - ar.maxRunTimer.Stop() - ar.maxRunTimer = nil - } -} - -func (ar *allocRunner) resetMaxRunTimerWithAlloc(alloc *structs.Allocation) { - ar.maxRunTimerLock.Lock() - if ar.maxRunTimer != nil { - ar.maxRunTimer.Stop() - ar.maxRunTimer = nil - } - ar.maxRunTimerLock.Unlock() - - if alloc == nil || alloc.TerminalStatus() || alloc.DesiredStatus != structs.AllocDesiredStatusRun { - return - } +func (ar *allocRunner) EnforceMaxRunDurationTimeout(deadline time.Time) { + now := time.Now().UTC() - deadline, ok := alloc.MaxRunDurationDeadline() - if !ok { - return - } + if ar.isShuttingDown() { - remaining := time.Until(deadline) - if remaining <= 0 { - go ar.enforceMaxRunDuration() return } - timer := time.NewTimer(remaining) - - ar.maxRunTimerLock.Lock() - ar.maxRunTimer = timer - ar.maxRunTimerLock.Unlock() - - go func(t *time.Timer) { - select { - case <-t.C: - ar.maxRunTimerLock.Lock() - if ar.maxRunTimer != t { - ar.maxRunTimerLock.Unlock() - return - } - ar.maxRunTimer = nil - ar.maxRunTimerLock.Unlock() - - ar.enforceMaxRunDuration() - case <-ar.waitCh: - ar.maxRunTimerLock.Lock() - ar.maxRunTimerLock.Unlock() - - if !t.Stop() { - select { - case <-t.C: - default: - } - } - } - }(timer) -} - -func (ar *allocRunner) stopMaxRunTimer() { - ar.resetMaxRunTimer() -} - -func (ar *allocRunner) enforceMaxRunDuration() { - if ar.isShuttingDown() { - return - } + if now.Before(deadline) { - alloc := ar.Alloc() - if alloc == nil || !alloc.MaxRunDurationExpired(time.Now().UTC()) { return } @@ -1179,8 +1146,8 @@ func (ar *allocRunner) enforceMaxRunDuration() { ar.state.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration ar.stateLock.Unlock() - ar.logger.Debug("allocation exceeded max_run_duration, shutting down tasks") - ar.Shutdown() + ar.logger.Debug("allocation exceeded max_run_duration, killing tasks", "deadline", deadline) + ar.killTasks() } func (ar *allocRunner) destroyImpl() { diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index abc2898b08b..e18e7fcdda8 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -111,6 +111,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { ar.runnerHooks = []interfaces.RunnerHook{ newIdentityHook(hookLogger, ar.widmgr), newAllocDirHook(hookLogger, ar.allocDir), + newMaxRunDurationHook(hookLogger, alloc, ar), newConsulHook(consulHookConfig{ alloc: ar.alloc, allocdir: ar.allocDir, @@ -224,8 +225,9 @@ func (ar *allocRunner) update(update *structs.Allocation) error { ).SetAllocDir(ar.allocDir.AllocDirPath()).Build() req := &interfaces.RunnerUpdateRequest{ - Alloc: update, - AllocEnv: allocEnv, + Alloc: update, + AllocEnv: allocEnv, + TaskStates: ar.AllocState().TaskStates, } var merr multierror.Error diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index aa59adc8ccb..28bc7f64f79 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -2707,8 +2707,8 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) { if state == nil { return false, fmt.Errorf("No alloc state") } - if state.ClientStatus != structs.AllocClientStatusFailed { - return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusFailed) + if state.ClientStatus != structs.AllocClientStatusComplete { + return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete) } if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration { return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration) @@ -2720,6 +2720,65 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) { }) } +func TestAllocRunner_MaxRunDuration_PreservesOriginalJobAcrossAllocUpdatesWithoutJob(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + task.Driver = "mock_driver" + task.Config = map[string]interface{}{ + "run_for": "10s", + } + task.KillTimeout = 10 * time.Millisecond + maxRunDuration := 50 * time.Millisecond + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + conf, cleanup := testAllocRunnerConfig(t, alloc) + defer cleanup() + + arIface, err := NewAllocRunner(conf) + must.NoError(t, err) + ar := arIface.(*allocRunner) + + go ar.Run() + defer destroy(ar) + + testutil.WaitForResult(func() (bool, error) { + state := ar.AllocState() + if state == nil { + return false, fmt.Errorf("No alloc state") + } + if state.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusRunning) + } + return true, nil + }, func(err error) { + state := ar.AllocState() + t.Fatalf("timed out waiting for alloc runner to start: %v; state=%#v", err, state) + }) + + update := ar.Alloc().CopySkipJob() + update.AllocModifyIndex++ + ar.Update(update) + + testutil.WaitForResult(func() (bool, error) { + state := ar.AllocState() + if state == nil { + return false, fmt.Errorf("No alloc state") + } + if state.ClientStatus != structs.AllocClientStatusComplete { + return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete) + } + if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration { + return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration) + } + return true, nil + }, func(err error) { + state := ar.AllocState() + t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement after alloc update without job: %v; state=%#v", err, state) + }) +} + func TestAllocRunner_setHookStatsHandler(t *testing.T) { ci.Parallel(t) diff --git a/client/allocrunner/interfaces/runner_lifecycle.go b/client/allocrunner/interfaces/runner_lifecycle.go index 9e1f0e43852..f46c2f3ee1b 100644 --- a/client/allocrunner/interfaces/runner_lifecycle.go +++ b/client/allocrunner/interfaces/runner_lifecycle.go @@ -56,8 +56,16 @@ type RunnerUpdateHook interface { } type RunnerUpdateRequest struct { - Alloc *structs.Allocation - AllocEnv *taskenv.TaskEnv + Alloc *structs.Allocation + AllocEnv *taskenv.TaskEnv + TaskStates map[string]*structs.TaskState +} + +// A RunnerTaskStateHook is notified when task state changes are synchronized +// into the alloc runner's client allocation view. +type RunnerTaskStateHook interface { + RunnerHook + TaskStateUpdated(*RunnerUpdateRequest) error } // A RunnerTaskRestartHook is executed just before the allocation runner is diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go new file mode 100644 index 00000000000..ff0e493e76e --- /dev/null +++ b/client/allocrunner/max_run_duration_hook.go @@ -0,0 +1,246 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package allocrunner + +import ( + "sync" + "time" + + log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/allocrunner/interfaces" + "github.com/hashicorp/nomad/client/taskenv" + "github.com/hashicorp/nomad/nomad/structs" +) + +type maxRunDurationSetter interface { + EnforceMaxRunDurationTimeout(time.Time) +} + +type maxRunDurationHook struct { + logger log.Logger + setter maxRunDurationSetter + + hookLock sync.Mutex + + alloc *structs.Allocation + originalJob *structs.Job + originalTG string + originalWant string + timer *time.Timer +} + +func newMaxRunDurationHook( + logger log.Logger, + alloc *structs.Allocation, + setter maxRunDurationSetter, +) *maxRunDurationHook { + h := &maxRunDurationHook{ + alloc: alloc, + setter: setter, + } + if alloc != nil { + h.originalTG = alloc.TaskGroup + h.originalWant = alloc.DesiredStatus + if alloc.Job != nil { + h.originalJob = alloc.Job.Copy() + } + } + h.logger = logger.Named(h.Name()) + return h +} + +var ( + _ interfaces.RunnerPrerunHook = (*maxRunDurationHook)(nil) + _ interfaces.RunnerUpdateHook = (*maxRunDurationHook)(nil) + _ interfaces.RunnerTaskStateHook = (*maxRunDurationHook)(nil) + _ interfaces.RunnerPostrunHook = (*maxRunDurationHook)(nil) + _ interfaces.ShutdownHook = (*maxRunDurationHook)(nil) +) + +func (h *maxRunDurationHook) Name() string { + return "max_run_duration" +} + +func (h *maxRunDurationHook) Prerun(_ *taskenv.TaskEnv) error { + h.hookLock.Lock() + defer h.hookLock.Unlock() + + h.resetTimerLocked(nil) + return nil +} + +func (h *maxRunDurationHook) Update(req *interfaces.RunnerUpdateRequest) error { + h.hookLock.Lock() + defer h.hookLock.Unlock() + + h.alloc = h.mergeAllocLocked(req.Alloc) + h.resetTimerLocked(req.TaskStates) + return nil +} + +func (h *maxRunDurationHook) TaskStateUpdated(req *interfaces.RunnerUpdateRequest) error { + h.hookLock.Lock() + defer h.hookLock.Unlock() + + if req.Alloc != nil { + h.alloc = h.mergeAllocLocked(req.Alloc) + } + + h.resetTimerLocked(req.TaskStates) + return nil +} + +func (h *maxRunDurationHook) Postrun() error { + h.hookLock.Lock() + defer h.hookLock.Unlock() + + h.stopTimerLocked() + return nil +} + +func (h *maxRunDurationHook) Shutdown() { + _ = h.Postrun() +} + +func (h *maxRunDurationHook) allocWithTaskStatesLocked(taskStates map[string]*structs.TaskState) *structs.Allocation { + if h.alloc == nil { + return nil + } + + alloc := h.alloc.Copy() + if h.originalJob != nil { + alloc.Job = h.originalJob.Copy() + } + if alloc.TaskGroup == "" { + alloc.TaskGroup = h.originalTG + } + if alloc.DesiredStatus == "" { + alloc.DesiredStatus = h.originalWant + } + + alloc.TaskStates = make(map[string]*structs.TaskState, len(taskStates)) + for name, ts := range taskStates { + if ts == nil { + alloc.TaskStates[name] = nil + continue + } + alloc.TaskStates[name] = ts.Copy() + } + + return alloc +} + +func (h *maxRunDurationHook) mergeAllocLocked(update *structs.Allocation) *structs.Allocation { + if update == nil { + return h.alloc + } + + merged := update.Copy() + + if merged.TaskGroup != "" { + h.originalTG = merged.TaskGroup + } + if merged.DesiredStatus != "" { + h.originalWant = merged.DesiredStatus + } + + if h.originalJob != nil { + merged.Job = h.originalJob.Copy() + } + if merged.TaskGroup == "" { + merged.TaskGroup = h.originalTG + } + if merged.DesiredStatus == "" { + merged.DesiredStatus = h.originalWant + } + + return merged +} + +func (h *maxRunDurationHook) resetTimerLocked(taskStates map[string]*structs.TaskState) { + h.stopTimerLocked() + + alloc := h.allocWithTaskStatesLocked(taskStates) + if alloc == nil { + return + } + + if alloc.TerminalStatus() { + return + } + + if alloc.DesiredStatus != "" && alloc.DesiredStatus != structs.AllocDesiredStatusRun { + return + } + + maxRunDuration, ok := alloc.MaxRunDuration() + if !ok { + return + } + + startedAt, ok := fullyRunningSince(taskStates) + if !ok { + return + } + + deadline := startedAt.Add(maxRunDuration) + remaining := time.Until(deadline) + if remaining <= 0 { + go h.setter.EnforceMaxRunDurationTimeout(deadline) + return + } + + timer := time.NewTimer(remaining) + h.timer = timer + + go func(t *time.Timer, deadline time.Time) { + <-t.C + + h.hookLock.Lock() + if h.timer != t { + h.hookLock.Unlock() + return + } + h.timer = nil + h.hookLock.Unlock() + + h.setter.EnforceMaxRunDurationTimeout(deadline) + }(timer, deadline) +} + +func (h *maxRunDurationHook) stopTimerLocked() { + if h.timer == nil { + return + } + + if !h.timer.Stop() { + select { + case <-h.timer.C: + default: + } + } + h.timer = nil +} + +func fullyRunningSince(taskStates map[string]*structs.TaskState) (time.Time, bool) { + if len(taskStates) == 0 { + return time.Time{}, false + } + + var latest time.Time + for _, ts := range taskStates { + if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() { + return time.Time{}, false + } + if ts.StartedAt.After(latest) { + latest = ts.StartedAt + } + } + + if latest.IsZero() { + return time.Time{}, false + } + + return latest, true +} diff --git a/client/allocrunner/state/state.go b/client/allocrunner/state/state.go index 89326eddc9b..dd1f2327bf9 100644 --- a/client/allocrunner/state/state.go +++ b/client/allocrunner/state/state.go @@ -20,8 +20,8 @@ type State struct { ClientDescription string // MaxRunDurationExceeded indicates the allocation exceeded its configured - // max_run_duration and should be reported as failed regardless of task exit - // status. + // max_run_duration and should be reported as complete with a timeout reason + // regardless of task exit status. MaxRunDurationExceeded bool // DeploymentStatus captures the status of the deployment diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index d4baeb9f360..99232856ba8 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1253,6 +1253,10 @@ func ApiTgToStructsTG(job *structs.Job, taskGroup *api.TaskGroup, tg *structs.Ta tg.ShutdownDelay = taskGroup.ShutdownDelay } + if taskGroup.MaxRunDuration != nil { + tg.MaxRunDuration = taskGroup.MaxRunDuration + } + if taskGroup.ReschedulePolicy != nil { tg.ReschedulePolicy = &structs.ReschedulePolicy{ Attempts: *taskGroup.ReschedulePolicy.Attempts, diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 33ea1b3835d..76576bec5b8 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6940,6 +6940,10 @@ func (tg *TaskGroup) Copy() *TaskGroup { ntg.ShutdownDelay = tg.ShutdownDelay } + if tg.MaxRunDuration != nil { + ntg.MaxRunDuration = tg.MaxRunDuration + } + return ntg } From 737f23d757f41175943c6090e7e58f890aaccc70 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Wed, 8 Apr 2026 19:00:14 +0200 Subject: [PATCH 13/37] TestJobs_ApiJobToStructsJob --- command/agent/job_endpoint_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go index 740126988c1..0abef0b8f70 100644 --- a/command/agent/job_endpoint_test.go +++ b/command/agent/job_endpoint_test.go @@ -2834,6 +2834,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { ProgressDeadline: pointer.Of(5 * time.Minute), AutoRevert: pointer.Of(true), }, + MaxRunDuration: pointer.Of(10 * time.Second), Meta: map[string]string{ "key": "value", }, @@ -3275,6 +3276,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { AutoPromote: false, Canary: 1, }, + MaxRunDuration: pointer.Of(10 * time.Second), Meta: map[string]string{ "key": "value", }, From 48a96521314ad51c313d2c192f91ccd56466b930 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Wed, 8 Apr 2026 19:07:51 +0200 Subject: [PATCH 14/37] don't need that interface --- client/allocrunner/alloc_runner.go | 13 ++++++------- client/allocrunner/alloc_runner_hooks.go | 3 ++- client/allocrunner/interfaces/runner_lifecycle.go | 7 ------- client/allocrunner/max_run_duration_hook.go | 9 ++++----- 4 files changed, 12 insertions(+), 20 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index f86b11fda39..e86961f36f9 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -152,6 +152,9 @@ type allocRunner struct { // transitions. runnerHooks []interfaces.RunnerHook + // maxRunDurationHook handles max_run_duration updates from task state changes. + maxRunDurationHook *maxRunDurationHook + // hookResources holds the output from allocrunner hooks so that later // allocrunner hooks or task runner hooks can read them hookResources *cstructs.AllocHookResources @@ -702,13 +705,9 @@ func (ar *allocRunner) handleTaskStateUpdates() { Alloc: hookAlloc, TaskStates: states, } - for _, hook := range ar.runnerHooks { - h, ok := hook.(interfaces.RunnerTaskStateHook) - if !ok { - continue - } - if err := h.TaskStateUpdated(req); err != nil { - ar.logger.Error("error running task state hook", "hook", h.Name(), "error", err) + if ar.maxRunDurationHook != nil { + if err := ar.maxRunDurationHook.TaskStateUpdated(req); err != nil { + ar.logger.Error("error running max run duration hook", "hook", ar.maxRunDurationHook.Name(), "error", err) } } diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index e18e7fcdda8..66ff12e4876 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -107,11 +107,12 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { // Create the alloc directory hook. This is run first to ensure the // directory path exists for other hooks. alloc := ar.Alloc() + ar.maxRunDurationHook = newMaxRunDurationHook(hookLogger, alloc, ar) ar.runnerHooks = []interfaces.RunnerHook{ newIdentityHook(hookLogger, ar.widmgr), newAllocDirHook(hookLogger, ar.allocDir), - newMaxRunDurationHook(hookLogger, alloc, ar), + ar.maxRunDurationHook, newConsulHook(consulHookConfig{ alloc: ar.alloc, allocdir: ar.allocDir, diff --git a/client/allocrunner/interfaces/runner_lifecycle.go b/client/allocrunner/interfaces/runner_lifecycle.go index f46c2f3ee1b..6267c978e00 100644 --- a/client/allocrunner/interfaces/runner_lifecycle.go +++ b/client/allocrunner/interfaces/runner_lifecycle.go @@ -61,13 +61,6 @@ type RunnerUpdateRequest struct { TaskStates map[string]*structs.TaskState } -// A RunnerTaskStateHook is notified when task state changes are synchronized -// into the alloc runner's client allocation view. -type RunnerTaskStateHook interface { - RunnerHook - TaskStateUpdated(*RunnerUpdateRequest) error -} - // A RunnerTaskRestartHook is executed just before the allocation runner is // going to restart all tasks. type RunnerTaskRestartHook interface { diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go index ff0e493e76e..c276afac361 100644 --- a/client/allocrunner/max_run_duration_hook.go +++ b/client/allocrunner/max_run_duration_hook.go @@ -51,11 +51,10 @@ func newMaxRunDurationHook( } var ( - _ interfaces.RunnerPrerunHook = (*maxRunDurationHook)(nil) - _ interfaces.RunnerUpdateHook = (*maxRunDurationHook)(nil) - _ interfaces.RunnerTaskStateHook = (*maxRunDurationHook)(nil) - _ interfaces.RunnerPostrunHook = (*maxRunDurationHook)(nil) - _ interfaces.ShutdownHook = (*maxRunDurationHook)(nil) + _ interfaces.RunnerPrerunHook = (*maxRunDurationHook)(nil) + _ interfaces.RunnerUpdateHook = (*maxRunDurationHook)(nil) + _ interfaces.RunnerPostrunHook = (*maxRunDurationHook)(nil) + _ interfaces.ShutdownHook = (*maxRunDurationHook)(nil) ) func (h *maxRunDurationHook) Name() string { From eda5a4f4e07c1a28755d9d80948d88eafba45ec6 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Thu, 9 Apr 2026 17:27:05 +0200 Subject: [PATCH 15/37] it's actually not a hook --- client/allocrunner/alloc_runner.go | 41 ++- client/allocrunner/alloc_runner_hooks.go | 2 - client/allocrunner/alloc_runner_test.go | 3 - client/allocrunner/max_run_duration_hook.go | 245 -------------- .../tasklifecycle/max_run_duration.go | 161 ++++++++++ .../tasklifecycle/max_run_duration_test.go | 300 ++++++++++++++++++ 6 files changed, 490 insertions(+), 262 deletions(-) delete mode 100644 client/allocrunner/max_run_duration_hook.go create mode 100644 client/allocrunner/tasklifecycle/max_run_duration.go create mode 100644 client/allocrunner/tasklifecycle/max_run_duration_test.go diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index e86961f36f9..6a00b97efb9 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -152,8 +152,9 @@ type allocRunner struct { // transitions. runnerHooks []interfaces.RunnerHook - // maxRunDurationHook handles max_run_duration updates from task state changes. - maxRunDurationHook *maxRunDurationHook + // maxRunDuration coordinates alloc-level max_run_duration enforcement from + // authoritative alloc and task state updates. + maxRunDuration *tasklifecycle.MaxRunDuration // hookResources holds the output from allocrunner hooks so that later // allocrunner hooks or task runner hooks can read them @@ -295,6 +296,7 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e ) ar.taskCoordinator = tasklifecycle.NewCoordinator(ar.logger, tg.Tasks, ar.waitCh) + ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.logger, ar.alloc, ar) shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background()) ar.shutdownDelayCtx = shutdownDelayCtx @@ -411,6 +413,10 @@ func (ar *allocRunner) Run() { return } + if ar.maxRunDuration != nil { + ar.maxRunDuration.Stop() + } + // Run the postrun hooks if err := ar.postrun(); err != nil { ar.logger.Error("postrun failed", "error", err) @@ -694,21 +700,27 @@ func (ar *allocRunner) handleTaskStateUpdates() { hookAlloc := calloc if hookAlloc != nil { - hookAlloc = hookAlloc.Copy() serverAlloc := ar.Alloc() - hookAlloc.Job = serverAlloc.Job - hookAlloc.TaskGroup = serverAlloc.TaskGroup - hookAlloc.DesiredStatus = serverAlloc.DesiredStatus + hookAlloc = &structs.Allocation{ + ID: calloc.ID, + TaskStates: calloc.TaskStates, + ClientStatus: calloc.ClientStatus, + ClientDescription: calloc.ClientDescription, + DeploymentStatus: calloc.DeploymentStatus, + NetworkStatus: calloc.NetworkStatus, + Job: serverAlloc.Job, + TaskGroup: serverAlloc.TaskGroup, + DesiredStatus: serverAlloc.DesiredStatus, + } } req := &interfaces.RunnerUpdateRequest{ Alloc: hookAlloc, TaskStates: states, } - if ar.maxRunDurationHook != nil { - if err := ar.maxRunDurationHook.TaskStateUpdated(req); err != nil { - ar.logger.Error("error running max run duration hook", "hook", ar.maxRunDurationHook.Name(), "error", err) - } + if ar.maxRunDuration != nil { + ar.maxRunDuration.SetAlloc(req.Alloc) + ar.maxRunDuration.TaskStateUpdated(req.TaskStates) } // Update the server @@ -1014,7 +1026,7 @@ func (ar *allocRunner) AllocState() *state.State { // If TaskStateUpdated has not been called yet, ar.state.TaskStates // won't be set as it is not the canonical source of TaskStates. if len(state.TaskStates) == 0 { - ar.state.TaskStates = make(map[string]*structs.TaskState, len(ar.tasks)) + state.TaskStates = make(map[string]*structs.TaskState, len(ar.tasks)) for k, tr := range ar.tasks { state.TaskStates[k] = tr.TaskState() } @@ -1324,7 +1336,12 @@ func (ar *allocRunner) Shutdown() { ar.logger.Trace("shutting down") // Shutdown tasks gracefully if they were run - wg := sync.WaitGroup{} + if ar.maxRunDuration != nil { + ar.maxRunDuration.Stop() + } + + // Shutdown task runners + var wg sync.WaitGroup for _, tr := range ar.tasks { wg.Add(1) go func(tr *taskrunner.TaskRunner) { diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index 66ff12e4876..d57d94167c4 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -107,12 +107,10 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { // Create the alloc directory hook. This is run first to ensure the // directory path exists for other hooks. alloc := ar.Alloc() - ar.maxRunDurationHook = newMaxRunDurationHook(hookLogger, alloc, ar) ar.runnerHooks = []interfaces.RunnerHook{ newIdentityHook(hookLogger, ar.widmgr), newAllocDirHook(hookLogger, ar.allocDir), - ar.maxRunDurationHook, newConsulHook(consulHookConfig{ alloc: ar.alloc, allocdir: ar.allocDir, diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 28bc7f64f79..0977e9cff1e 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -2696,9 +2696,6 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) { must.NoError(t, err) ar := arIface.(*allocRunner) - ar.setAlloc(ar.Alloc().Copy()) - ar.alloc.Job.TaskGroups[0].MaxRunDuration = nil - go ar.Run() defer destroy(ar) diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go deleted file mode 100644 index c276afac361..00000000000 --- a/client/allocrunner/max_run_duration_hook.go +++ /dev/null @@ -1,245 +0,0 @@ -// Copyright IBM Corp. 2015, 2025 -// SPDX-License-Identifier: BUSL-1.1 - -package allocrunner - -import ( - "sync" - "time" - - log "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/allocrunner/interfaces" - "github.com/hashicorp/nomad/client/taskenv" - "github.com/hashicorp/nomad/nomad/structs" -) - -type maxRunDurationSetter interface { - EnforceMaxRunDurationTimeout(time.Time) -} - -type maxRunDurationHook struct { - logger log.Logger - setter maxRunDurationSetter - - hookLock sync.Mutex - - alloc *structs.Allocation - originalJob *structs.Job - originalTG string - originalWant string - timer *time.Timer -} - -func newMaxRunDurationHook( - logger log.Logger, - alloc *structs.Allocation, - setter maxRunDurationSetter, -) *maxRunDurationHook { - h := &maxRunDurationHook{ - alloc: alloc, - setter: setter, - } - if alloc != nil { - h.originalTG = alloc.TaskGroup - h.originalWant = alloc.DesiredStatus - if alloc.Job != nil { - h.originalJob = alloc.Job.Copy() - } - } - h.logger = logger.Named(h.Name()) - return h -} - -var ( - _ interfaces.RunnerPrerunHook = (*maxRunDurationHook)(nil) - _ interfaces.RunnerUpdateHook = (*maxRunDurationHook)(nil) - _ interfaces.RunnerPostrunHook = (*maxRunDurationHook)(nil) - _ interfaces.ShutdownHook = (*maxRunDurationHook)(nil) -) - -func (h *maxRunDurationHook) Name() string { - return "max_run_duration" -} - -func (h *maxRunDurationHook) Prerun(_ *taskenv.TaskEnv) error { - h.hookLock.Lock() - defer h.hookLock.Unlock() - - h.resetTimerLocked(nil) - return nil -} - -func (h *maxRunDurationHook) Update(req *interfaces.RunnerUpdateRequest) error { - h.hookLock.Lock() - defer h.hookLock.Unlock() - - h.alloc = h.mergeAllocLocked(req.Alloc) - h.resetTimerLocked(req.TaskStates) - return nil -} - -func (h *maxRunDurationHook) TaskStateUpdated(req *interfaces.RunnerUpdateRequest) error { - h.hookLock.Lock() - defer h.hookLock.Unlock() - - if req.Alloc != nil { - h.alloc = h.mergeAllocLocked(req.Alloc) - } - - h.resetTimerLocked(req.TaskStates) - return nil -} - -func (h *maxRunDurationHook) Postrun() error { - h.hookLock.Lock() - defer h.hookLock.Unlock() - - h.stopTimerLocked() - return nil -} - -func (h *maxRunDurationHook) Shutdown() { - _ = h.Postrun() -} - -func (h *maxRunDurationHook) allocWithTaskStatesLocked(taskStates map[string]*structs.TaskState) *structs.Allocation { - if h.alloc == nil { - return nil - } - - alloc := h.alloc.Copy() - if h.originalJob != nil { - alloc.Job = h.originalJob.Copy() - } - if alloc.TaskGroup == "" { - alloc.TaskGroup = h.originalTG - } - if alloc.DesiredStatus == "" { - alloc.DesiredStatus = h.originalWant - } - - alloc.TaskStates = make(map[string]*structs.TaskState, len(taskStates)) - for name, ts := range taskStates { - if ts == nil { - alloc.TaskStates[name] = nil - continue - } - alloc.TaskStates[name] = ts.Copy() - } - - return alloc -} - -func (h *maxRunDurationHook) mergeAllocLocked(update *structs.Allocation) *structs.Allocation { - if update == nil { - return h.alloc - } - - merged := update.Copy() - - if merged.TaskGroup != "" { - h.originalTG = merged.TaskGroup - } - if merged.DesiredStatus != "" { - h.originalWant = merged.DesiredStatus - } - - if h.originalJob != nil { - merged.Job = h.originalJob.Copy() - } - if merged.TaskGroup == "" { - merged.TaskGroup = h.originalTG - } - if merged.DesiredStatus == "" { - merged.DesiredStatus = h.originalWant - } - - return merged -} - -func (h *maxRunDurationHook) resetTimerLocked(taskStates map[string]*structs.TaskState) { - h.stopTimerLocked() - - alloc := h.allocWithTaskStatesLocked(taskStates) - if alloc == nil { - return - } - - if alloc.TerminalStatus() { - return - } - - if alloc.DesiredStatus != "" && alloc.DesiredStatus != structs.AllocDesiredStatusRun { - return - } - - maxRunDuration, ok := alloc.MaxRunDuration() - if !ok { - return - } - - startedAt, ok := fullyRunningSince(taskStates) - if !ok { - return - } - - deadline := startedAt.Add(maxRunDuration) - remaining := time.Until(deadline) - if remaining <= 0 { - go h.setter.EnforceMaxRunDurationTimeout(deadline) - return - } - - timer := time.NewTimer(remaining) - h.timer = timer - - go func(t *time.Timer, deadline time.Time) { - <-t.C - - h.hookLock.Lock() - if h.timer != t { - h.hookLock.Unlock() - return - } - h.timer = nil - h.hookLock.Unlock() - - h.setter.EnforceMaxRunDurationTimeout(deadline) - }(timer, deadline) -} - -func (h *maxRunDurationHook) stopTimerLocked() { - if h.timer == nil { - return - } - - if !h.timer.Stop() { - select { - case <-h.timer.C: - default: - } - } - h.timer = nil -} - -func fullyRunningSince(taskStates map[string]*structs.TaskState) (time.Time, bool) { - if len(taskStates) == 0 { - return time.Time{}, false - } - - var latest time.Time - for _, ts := range taskStates { - if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() { - return time.Time{}, false - } - if ts.StartedAt.After(latest) { - latest = ts.StartedAt - } - } - - if latest.IsZero() { - return time.Time{}, false - } - - return latest, true -} diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go new file mode 100644 index 00000000000..9390233452d --- /dev/null +++ b/client/allocrunner/tasklifecycle/max_run_duration.go @@ -0,0 +1,161 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package tasklifecycle + +import ( + "sync" + "time" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/nomad/structs" +) + +type MaxRunDurationSetter interface { + EnforceMaxRunDurationTimeout(time.Time) +} + +// MaxRunDuration coordinates local enforcement of task group +// `max_run_duration`. +// +// This component is alloc-scoped, not task-scoped. It observes the current +// authoritative allocation plus task state snapshots and arms a timer once the +// allocation is fully running. The timer is canceled and recomputed whenever +// alloc or task state changes are observed. +type MaxRunDuration struct { + mu sync.Mutex + + alloc *structs.Allocation + timer *time.Timer + setter MaxRunDurationSetter + logger hclog.Logger +} + +func NewMaxRunDuration( + logger hclog.Logger, + alloc *structs.Allocation, + setter MaxRunDurationSetter, +) *MaxRunDuration { + return &MaxRunDuration{ + alloc: alloc, + setter: setter, + logger: logger.Named("max_run_duration"), + } +} + +// SetAlloc updates the authoritative allocation used for +// `max_run_duration` evaluation. +func (m *MaxRunDuration) SetAlloc(alloc *structs.Allocation) { + m.mu.Lock() + defer m.mu.Unlock() + + m.alloc = alloc +} + +// TaskStateUpdated recomputes the `max_run_duration` timer from the latest +// alloc and task state snapshot. +func (m *MaxRunDuration) TaskStateUpdated(taskStates map[string]*structs.TaskState) { + m.mu.Lock() + defer m.mu.Unlock() + + m.resetTimerLocked(taskStates) +} + +// Stop cancels any active timer. +func (m *MaxRunDuration) Stop() { + m.mu.Lock() + defer m.mu.Unlock() + + m.stopTimerLocked() +} + +func (m *MaxRunDuration) resetTimerLocked(taskStates map[string]*structs.TaskState) { + m.stopTimerLocked() + + if m.alloc == nil { + return + } + + // Only running allocations with a valid max_run_duration and a fully + // running task set should have an active timer. + if m.alloc.TerminalStatus() { + return + } + + if m.alloc.DesiredStatus != "" && m.alloc.DesiredStatus != structs.AllocDesiredStatusRun { + return + } + + maxRunDuration, ok := m.alloc.MaxRunDuration() + if !ok { + return + } + + startedAt, ok := FullyRunningSince(taskStates) + if !ok { + return + } + + deadline := startedAt.Add(maxRunDuration) + remaining := time.Until(deadline) + if remaining <= 0 { + go m.setter.EnforceMaxRunDurationTimeout(deadline) + return + } + + timer := time.NewTimer(remaining) + m.timer = timer + + go func(t *time.Timer, deadline time.Time) { + <-t.C + + m.mu.Lock() + if m.timer != t { + m.mu.Unlock() + return + } + m.timer = nil + m.mu.Unlock() + + m.setter.EnforceMaxRunDurationTimeout(deadline) + }(timer, deadline) +} + +func (m *MaxRunDuration) stopTimerLocked() { + if m.timer == nil { + return + } + + if !m.timer.Stop() { + select { + case <-m.timer.C: + default: + } + } + m.timer = nil +} + +// FullyRunningSince returns the latest StartedAt timestamp across all task +// states, but only when every known task state is running with a non-zero +// start time. +func FullyRunningSince(taskStates map[string]*structs.TaskState) (time.Time, bool) { + if len(taskStates) == 0 { + return time.Time{}, false + } + + var latest time.Time + for _, ts := range taskStates { + if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() { + return time.Time{}, false + } + if ts.StartedAt.After(latest) { + latest = ts.StartedAt + } + } + + if latest.IsZero() { + return time.Time{}, false + } + + return latest, true +} diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go new file mode 100644 index 00000000000..8d488a81dc1 --- /dev/null +++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go @@ -0,0 +1,300 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package tasklifecycle + +import ( + "testing" + "time" + + log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +type testMaxRunDurationSetter struct { + deadlines chan time.Time +} + +func newTestMaxRunDurationSetter() *testMaxRunDurationSetter { + return &testMaxRunDurationSetter{ + deadlines: make(chan time.Time, 8), + } +} + +func (s *testMaxRunDurationSetter) EnforceMaxRunDurationTimeout(deadline time.Time) { + s.deadlines <- deadline +} + +func TestMaxRunDuration_FullyRunningSince(t *testing.T) { + t.Parallel() + + now := time.Now().UTC() + earlier := now.Add(-2 * time.Second) + later := now.Add(-1 * time.Second) + + got, ok := FullyRunningSince(map[string]*structs.TaskState{ + "a": { + State: structs.TaskStateRunning, + StartedAt: earlier, + }, + "b": { + State: structs.TaskStateRunning, + StartedAt: later, + }, + }) + + must.True(t, ok) + must.Eq(t, later, got) +} + +func TestMaxRunDuration_FullyRunningSince_FalseWhenEmpty(t *testing.T) { + t.Parallel() + + got, ok := FullyRunningSince(nil) + must.False(t, ok) + must.Eq(t, time.Time{}, got) +} + +func TestMaxRunDuration_FullyRunningSince_FalseWhenTaskNotRunning(t *testing.T) { + t.Parallel() + + _, ok := FullyRunningSince(map[string]*structs.TaskState{ + "a": { + State: structs.TaskStateRunning, + StartedAt: time.Now().Add(-time.Second).UTC(), + }, + "b": { + State: structs.TaskStatePending, + }, + }) + + must.False(t, ok) +} + +func TestMaxRunDuration_FullyRunningSince_FalseWhenStartedAtMissing(t *testing.T) { + t.Parallel() + + _, ok := FullyRunningSince(map[string]*structs.TaskState{ + "a": { + State: structs.TaskStateRunning, + }, + }) + + must.False(t, ok) +} + +func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + maxRunDuration := 50 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + setter := newTestMaxRunDurationSetter() + m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + + startedAt := time.Now().UTC() + m.TaskStateUpdated(map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: startedAt, + }, + }) + + select { + case deadline := <-setter.deadlines: + must.Eq(t, startedAt.Add(maxRunDuration), deadline) + case <-time.After(500 * time.Millisecond): + t.Fatal("timed out waiting for max_run_duration deadline") + } +} + +func TestMaxRunDuration_TaskStateUpdated_ImmediateEnforcementWhenExpired(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + setter := newTestMaxRunDurationSetter() + m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + + startedAt := time.Now().Add(-2 * maxRunDuration).UTC() + m.TaskStateUpdated(map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: startedAt, + }, + }) + + select { + case deadline := <-setter.deadlines: + must.Eq(t, startedAt.Add(maxRunDuration), deadline) + case <-time.After(250 * time.Millisecond): + t.Fatal("timed out waiting for immediate max_run_duration enforcement") + } +} + +func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenNotFullyRunning(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + setter := newTestMaxRunDurationSetter() + m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + + m.TaskStateUpdated(map[string]*structs.TaskState{ + "web": { + State: structs.TaskStatePending, + }, + }) + + select { + case deadline := <-setter.deadlines: + t.Fatalf("unexpected deadline fired: %v", deadline) + case <-time.After(100 * time.Millisecond): + } +} + +func TestMaxRunDuration_TaskStateUpdated_DoesNotFireForNonBatchJobs(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeService + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + setter := newTestMaxRunDurationSetter() + m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + + m.TaskStateUpdated(map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: time.Now().UTC(), + }, + }) + + select { + case deadline := <-setter.deadlines: + t.Fatalf("unexpected deadline fired: %v", deadline) + case <-time.After(100 * time.Millisecond): + } +} + +func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenDesiredStatusNotRun(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + alloc.DesiredStatus = structs.AllocDesiredStatusStop + + setter := newTestMaxRunDurationSetter() + m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + + m.TaskStateUpdated(map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: time.Now().UTC(), + }, + }) + + select { + case deadline := <-setter.deadlines: + t.Fatalf("unexpected deadline fired: %v", deadline) + case <-time.After(100 * time.Millisecond): + } +} + +func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocTerminal(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + alloc.ClientStatus = structs.AllocClientStatusComplete + + setter := newTestMaxRunDurationSetter() + m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + + m.TaskStateUpdated(map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: time.Now().UTC(), + }, + }) + + select { + case deadline := <-setter.deadlines: + t.Fatalf("unexpected deadline fired: %v", deadline) + case <-time.After(100 * time.Millisecond): + } +} + +func TestMaxRunDuration_SetAlloc_UsesLatestAllocConfig(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + initial := 200 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &initial + + setter := newTestMaxRunDurationSetter() + m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + + updated := alloc.Copy() + latest := 40 * time.Millisecond + updated.Job.TaskGroups[0].MaxRunDuration = &latest + m.SetAlloc(updated) + + startedAt := time.Now().UTC() + m.TaskStateUpdated(map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: startedAt, + }, + }) + + select { + case deadline := <-setter.deadlines: + must.Eq(t, startedAt.Add(latest), deadline) + case <-time.After(500 * time.Millisecond): + t.Fatal("timed out waiting for updated max_run_duration deadline") + } +} + +func TestMaxRunDuration_Stop_CancelsTimer(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + maxRunDuration := 150 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + setter := newTestMaxRunDurationSetter() + m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + + m.TaskStateUpdated(map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: time.Now().UTC(), + }, + }) + + m.Stop() + + select { + case deadline := <-setter.deadlines: + t.Fatalf("unexpected deadline fired after stop: %v", deadline) + case <-time.After(250 * time.Millisecond): + } +} From ab7565d163e48576c85d06636e17feda230756af Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Thu, 9 Apr 2026 17:31:10 +0200 Subject: [PATCH 16/37] refinements --- client/allocrunner/alloc_runner.go | 2 +- client/allocrunner/alloc_runner_test.go | 2 +- .../tasklifecycle/max_run_duration.go | 4 ---- .../tasklifecycle/max_run_duration_test.go | 17 ++++++++--------- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 6a00b97efb9..392e1b5f43c 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -296,7 +296,7 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e ) ar.taskCoordinator = tasklifecycle.NewCoordinator(ar.logger, tg.Tasks, ar.waitCh) - ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.logger, ar.alloc, ar) + ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.alloc, ar) shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background()) ar.shutdownDelayCtx = shutdownDelayCtx diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 0977e9cff1e..94446849ada 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -2717,7 +2717,7 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) { }) } -func TestAllocRunner_MaxRunDuration_PreservesOriginalJobAcrossAllocUpdatesWithoutJob(t *testing.T) { +func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossAllocUpdatesWithoutJob(t *testing.T) { ci.Parallel(t) alloc := mock.BatchAlloc() diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go index 9390233452d..144720da702 100644 --- a/client/allocrunner/tasklifecycle/max_run_duration.go +++ b/client/allocrunner/tasklifecycle/max_run_duration.go @@ -7,7 +7,6 @@ import ( "sync" "time" - "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/nomad/structs" ) @@ -28,18 +27,15 @@ type MaxRunDuration struct { alloc *structs.Allocation timer *time.Timer setter MaxRunDurationSetter - logger hclog.Logger } func NewMaxRunDuration( - logger hclog.Logger, alloc *structs.Allocation, setter MaxRunDurationSetter, ) *MaxRunDuration { return &MaxRunDuration{ alloc: alloc, setter: setter, - logger: logger.Named("max_run_duration"), } } diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go index 8d488a81dc1..7d70c1a92c7 100644 --- a/client/allocrunner/tasklifecycle/max_run_duration_test.go +++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go @@ -7,7 +7,6 @@ import ( "testing" "time" - log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/shoenig/test/must" @@ -94,7 +93,7 @@ func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) { alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + m := NewMaxRunDuration(alloc, setter) startedAt := time.Now().UTC() m.TaskStateUpdated(map[string]*structs.TaskState{ @@ -121,7 +120,7 @@ func TestMaxRunDuration_TaskStateUpdated_ImmediateEnforcementWhenExpired(t *test alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + m := NewMaxRunDuration(alloc, setter) startedAt := time.Now().Add(-2 * maxRunDuration).UTC() m.TaskStateUpdated(map[string]*structs.TaskState{ @@ -148,7 +147,7 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenNotFullyRunning(t *testi alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + m := NewMaxRunDuration(alloc, setter) m.TaskStateUpdated(map[string]*structs.TaskState{ "web": { @@ -172,7 +171,7 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireForNonBatchJobs(t *testing.T alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + m := NewMaxRunDuration(alloc, setter) m.TaskStateUpdated(map[string]*structs.TaskState{ "web": { @@ -198,7 +197,7 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenDesiredStatusNotRun(t *t alloc.DesiredStatus = structs.AllocDesiredStatusStop setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + m := NewMaxRunDuration(alloc, setter) m.TaskStateUpdated(map[string]*structs.TaskState{ "web": { @@ -224,7 +223,7 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocTerminal(t *testing alloc.ClientStatus = structs.AllocClientStatusComplete setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + m := NewMaxRunDuration(alloc, setter) m.TaskStateUpdated(map[string]*structs.TaskState{ "web": { @@ -249,7 +248,7 @@ func TestMaxRunDuration_SetAlloc_UsesLatestAllocConfig(t *testing.T) { alloc.Job.TaskGroups[0].MaxRunDuration = &initial setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + m := NewMaxRunDuration(alloc, setter) updated := alloc.Copy() latest := 40 * time.Millisecond @@ -281,7 +280,7 @@ func TestMaxRunDuration_Stop_CancelsTimer(t *testing.T) { alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter) + m := NewMaxRunDuration(alloc, setter) m.TaskStateUpdated(map[string]*structs.TaskState{ "web": { From c44ba70361c22b3f904b7d2e186bfc2f048dfa4b Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Thu, 9 Apr 2026 17:35:21 +0200 Subject: [PATCH 17/37] corrections --- client/allocrunner/alloc_runner_test.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 94446849ada..4b2c42735b2 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -2702,7 +2702,7 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) { testutil.WaitForResult(func() (bool, error) { state := ar.AllocState() if state == nil { - return false, fmt.Errorf("No alloc state") + return false, fmt.Errorf("no alloc state") } if state.ClientStatus != structs.AllocClientStatusComplete { return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete) @@ -2710,6 +2710,9 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) { if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration { return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration) } + if !state.MaxRunDurationExceeded { + return false, fmt.Errorf("max run duration was not marked exceeded") + } return true, nil }, func(err error) { state := ar.AllocState() @@ -2717,7 +2720,7 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) { }) } -func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossAllocUpdatesWithoutJob(t *testing.T) { +func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossPartialAllocUpdate(t *testing.T) { ci.Parallel(t) alloc := mock.BatchAlloc() @@ -2743,7 +2746,7 @@ func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossAllocUpdatesWithoutJob( testutil.WaitForResult(func() (bool, error) { state := ar.AllocState() if state == nil { - return false, fmt.Errorf("No alloc state") + return false, fmt.Errorf("no alloc state") } if state.ClientStatus != structs.AllocClientStatusRunning { return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusRunning) @@ -2761,7 +2764,7 @@ func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossAllocUpdatesWithoutJob( testutil.WaitForResult(func() (bool, error) { state := ar.AllocState() if state == nil { - return false, fmt.Errorf("No alloc state") + return false, fmt.Errorf("no alloc state") } if state.ClientStatus != structs.AllocClientStatusComplete { return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete) @@ -2769,10 +2772,13 @@ func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossAllocUpdatesWithoutJob( if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration { return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration) } + if !state.MaxRunDurationExceeded { + return false, fmt.Errorf("max run duration was not marked exceeded") + } return true, nil }, func(err error) { state := ar.AllocState() - t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement after alloc update without job: %v; state=%#v", err, state) + t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement after partial alloc update: %v; state=%#v", err, state) }) } From 9abba4f26aca9cee5ec25bbf7218a389399a3197 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Thu, 9 Apr 2026 17:38:37 +0200 Subject: [PATCH 18/37] test refactror --- .../tasklifecycle/max_run_duration_test.go | 207 ++++++------------ 1 file changed, 69 insertions(+), 138 deletions(-) diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go index 7d70c1a92c7..5050677f358 100644 --- a/client/allocrunner/tasklifecycle/max_run_duration_test.go +++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go @@ -48,15 +48,7 @@ func TestMaxRunDuration_FullyRunningSince(t *testing.T) { must.Eq(t, later, got) } -func TestMaxRunDuration_FullyRunningSince_FalseWhenEmpty(t *testing.T) { - t.Parallel() - - got, ok := FullyRunningSince(nil) - must.False(t, ok) - must.Eq(t, time.Time{}, got) -} - -func TestMaxRunDuration_FullyRunningSince_FalseWhenTaskNotRunning(t *testing.T) { +func TestMaxRunDuration_FullyRunningSince_FalseWhenNotFullyRunning(t *testing.T) { t.Parallel() _, ok := FullyRunningSince(map[string]*structs.TaskState{ @@ -72,18 +64,6 @@ func TestMaxRunDuration_FullyRunningSince_FalseWhenTaskNotRunning(t *testing.T) must.False(t, ok) } -func TestMaxRunDuration_FullyRunningSince_FalseWhenStartedAtMissing(t *testing.T) { - t.Parallel() - - _, ok := FullyRunningSince(map[string]*structs.TaskState{ - "a": { - State: structs.TaskStateRunning, - }, - }) - - must.False(t, ok) -} - func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) { t.Parallel() @@ -111,131 +91,82 @@ func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) { } } -func TestMaxRunDuration_TaskStateUpdated_ImmediateEnforcementWhenExpired(t *testing.T) { +func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocNotEligible(t *testing.T) { t.Parallel() - alloc := mock.BatchAlloc() - maxRunDuration := 25 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - - setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(alloc, setter) - - startedAt := time.Now().Add(-2 * maxRunDuration).UTC() - m.TaskStateUpdated(map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: startedAt, + cases := []struct { + name string + alloc *structs.Allocation + }{ + { + name: "not fully running", + alloc: func() *structs.Allocation { + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + return alloc + }(), }, - }) - - select { - case deadline := <-setter.deadlines: - must.Eq(t, startedAt.Add(maxRunDuration), deadline) - case <-time.After(250 * time.Millisecond): - t.Fatal("timed out waiting for immediate max_run_duration enforcement") - } -} - -func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenNotFullyRunning(t *testing.T) { - t.Parallel() - - alloc := mock.BatchAlloc() - maxRunDuration := 25 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - - setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(alloc, setter) - - m.TaskStateUpdated(map[string]*structs.TaskState{ - "web": { - State: structs.TaskStatePending, + { + name: "non-batch job", + alloc: func() *structs.Allocation { + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeService + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + return alloc + }(), }, - }) - - select { - case deadline := <-setter.deadlines: - t.Fatalf("unexpected deadline fired: %v", deadline) - case <-time.After(100 * time.Millisecond): - } -} - -func TestMaxRunDuration_TaskStateUpdated_DoesNotFireForNonBatchJobs(t *testing.T) { - t.Parallel() - - alloc := mock.BatchAlloc() - maxRunDuration := 25 * time.Millisecond - alloc.Job.Type = structs.JobTypeService - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - - setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(alloc, setter) - - m.TaskStateUpdated(map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: time.Now().UTC(), + { + name: "desired status not run", + alloc: func() *structs.Allocation { + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + alloc.DesiredStatus = structs.AllocDesiredStatusStop + return alloc + }(), }, - }) - - select { - case deadline := <-setter.deadlines: - t.Fatalf("unexpected deadline fired: %v", deadline) - case <-time.After(100 * time.Millisecond): - } -} - -func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenDesiredStatusNotRun(t *testing.T) { - t.Parallel() - - alloc := mock.BatchAlloc() - maxRunDuration := 25 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - alloc.DesiredStatus = structs.AllocDesiredStatusStop - - setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(alloc, setter) - - m.TaskStateUpdated(map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: time.Now().UTC(), + { + name: "terminal alloc", + alloc: func() *structs.Allocation { + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + alloc.ClientStatus = structs.AllocClientStatusComplete + return alloc + }(), }, - }) - - select { - case deadline := <-setter.deadlines: - t.Fatalf("unexpected deadline fired: %v", deadline) - case <-time.After(100 * time.Millisecond): } -} - -func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocTerminal(t *testing.T) { - t.Parallel() - alloc := mock.BatchAlloc() - maxRunDuration := 25 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - alloc.ClientStatus = structs.AllocClientStatusComplete - - setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(alloc, setter) - - m.TaskStateUpdated(map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: time.Now().UTC(), - }, - }) - - select { - case deadline := <-setter.deadlines: - t.Fatalf("unexpected deadline fired: %v", deadline) - case <-time.After(100 * time.Millisecond): + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + setter := newTestMaxRunDurationSetter() + m := NewMaxRunDuration(tc.alloc, setter) + + taskStates := map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: time.Now().UTC(), + }, + } + if tc.name == "not fully running" { + taskStates["web"] = &structs.TaskState{State: structs.TaskStatePending} + } + + m.TaskStateUpdated(taskStates) + + select { + case deadline := <-setter.deadlines: + t.Fatalf("unexpected deadline fired: %v", deadline) + case <-time.After(100 * time.Millisecond): + } + }) } } From 0aecff5175fefb183e851fbe87fe1f5dbbaef24a Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Thu, 9 Apr 2026 17:57:01 +0200 Subject: [PATCH 19/37] tidying up --- client/allocrunner/alloc_runner.go | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 392e1b5f43c..1e16ded0f34 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -1094,22 +1094,8 @@ func (ar *allocRunner) handleAllocUpdates() { // If there is already a pending update it will be discarded and replaced by // the latest update. func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) { - current := ar.Alloc() - // Detect Stop updates - stopping := !current.TerminalStatus() && update.TerminalStatus() - - if update.Job == nil && current != nil && current.Job != nil { - update = update.Copy() - update.Job = current.Job.Copy() - if update.TaskGroup == "" { - update.TaskGroup = current.TaskGroup - } - if update.DesiredStatus == "" { - update.DesiredStatus = current.DesiredStatus - } - - } + stopping := !ar.Alloc().TerminalStatus() && update.TerminalStatus() // Update ar.alloc ar.setAlloc(update) @@ -1142,12 +1128,10 @@ func (ar *allocRunner) EnforceMaxRunDurationTimeout(deadline time.Time) { now := time.Now().UTC() if ar.isShuttingDown() { - return } if now.Before(deadline) { - return } From f4545261a6ff232803b53067dffc43616b3d0464 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Thu, 9 Apr 2026 17:57:22 +0200 Subject: [PATCH 20/37] more tidying up --- client/allocrunner/alloc_runner_test.go | 62 ------------------------- 1 file changed, 62 deletions(-) diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 4b2c42735b2..01dce5c61d3 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -2720,68 +2720,6 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) { }) } -func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossPartialAllocUpdate(t *testing.T) { - ci.Parallel(t) - - alloc := mock.BatchAlloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "10s", - } - task.KillTimeout = 10 * time.Millisecond - maxRunDuration := 50 * time.Millisecond - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - - conf, cleanup := testAllocRunnerConfig(t, alloc) - defer cleanup() - - arIface, err := NewAllocRunner(conf) - must.NoError(t, err) - ar := arIface.(*allocRunner) - - go ar.Run() - defer destroy(ar) - - testutil.WaitForResult(func() (bool, error) { - state := ar.AllocState() - if state == nil { - return false, fmt.Errorf("no alloc state") - } - if state.ClientStatus != structs.AllocClientStatusRunning { - return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusRunning) - } - return true, nil - }, func(err error) { - state := ar.AllocState() - t.Fatalf("timed out waiting for alloc runner to start: %v; state=%#v", err, state) - }) - - update := ar.Alloc().CopySkipJob() - update.AllocModifyIndex++ - ar.Update(update) - - testutil.WaitForResult(func() (bool, error) { - state := ar.AllocState() - if state == nil { - return false, fmt.Errorf("no alloc state") - } - if state.ClientStatus != structs.AllocClientStatusComplete { - return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete) - } - if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration { - return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration) - } - if !state.MaxRunDurationExceeded { - return false, fmt.Errorf("max run duration was not marked exceeded") - } - return true, nil - }, func(err error) { - state := ar.AllocState() - t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement after partial alloc update: %v; state=%#v", err, state) - }) -} - func TestAllocRunner_setHookStatsHandler(t *testing.T) { ci.Parallel(t) From 9b735a022a4c143b61ac2565a7d73a172e2729cd Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Thu, 9 Apr 2026 18:24:19 +0200 Subject: [PATCH 21/37] bugfix --- .../tasklifecycle/max_run_duration.go | 42 +++++++++++++---- .../tasklifecycle/max_run_duration_test.go | 47 +++++++++++++++++++ 2 files changed, 80 insertions(+), 9 deletions(-) diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go index 144720da702..5a450cf54f6 100644 --- a/client/allocrunner/tasklifecycle/max_run_duration.go +++ b/client/allocrunner/tasklifecycle/max_run_duration.go @@ -24,9 +24,11 @@ type MaxRunDurationSetter interface { type MaxRunDuration struct { mu sync.Mutex - alloc *structs.Allocation - timer *time.Timer - setter MaxRunDurationSetter + alloc *structs.Allocation + deadline time.Time + hasDeadline bool + timer *time.Timer + setter MaxRunDurationSetter } func NewMaxRunDuration( @@ -54,7 +56,8 @@ func (m *MaxRunDuration) TaskStateUpdated(taskStates map[string]*structs.TaskSta m.mu.Lock() defer m.mu.Unlock() - m.resetTimerLocked(taskStates) + m.updateDeadlineLocked(taskStates) + m.resetTimerLocked() } // Stop cancels any active timer. @@ -65,25 +68,35 @@ func (m *MaxRunDuration) Stop() { m.stopTimerLocked() } -func (m *MaxRunDuration) resetTimerLocked(taskStates map[string]*structs.TaskState) { - m.stopTimerLocked() - +func (m *MaxRunDuration) updateDeadlineLocked(taskStates map[string]*structs.TaskState) { if m.alloc == nil { + m.deadline = time.Time{} + m.hasDeadline = false return } // Only running allocations with a valid max_run_duration and a fully - // running task set should have an active timer. + // running task set should establish a deadline. if m.alloc.TerminalStatus() { + m.deadline = time.Time{} + m.hasDeadline = false return } if m.alloc.DesiredStatus != "" && m.alloc.DesiredStatus != structs.AllocDesiredStatusRun { + m.deadline = time.Time{} + m.hasDeadline = false return } maxRunDuration, ok := m.alloc.MaxRunDuration() if !ok { + m.deadline = time.Time{} + m.hasDeadline = false + return + } + + if m.hasDeadline { return } @@ -92,7 +105,18 @@ func (m *MaxRunDuration) resetTimerLocked(taskStates map[string]*structs.TaskSta return } - deadline := startedAt.Add(maxRunDuration) + m.deadline = startedAt.Add(maxRunDuration) + m.hasDeadline = true +} + +func (m *MaxRunDuration) resetTimerLocked() { + m.stopTimerLocked() + + if !m.hasDeadline { + return + } + + deadline := m.deadline remaining := time.Until(deadline) if remaining <= 0 { go m.setter.EnforceMaxRunDurationTimeout(deadline) diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go index 5050677f358..649c5bd88a2 100644 --- a/client/allocrunner/tasklifecycle/max_run_duration_test.go +++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go @@ -91,6 +91,53 @@ func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) { } } +func TestMaxRunDuration_TaskStateUpdated_PreservesDeadlineWhenOneTaskFinishesEarly(t *testing.T) { + t.Parallel() + + alloc := mock.BatchAlloc() + maxRunDuration := 50 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy() + task2.Name = "web2" + alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2) + + setter := newTestMaxRunDurationSetter() + m := NewMaxRunDuration(alloc, setter) + + startedAt := time.Now().UTC() + m.TaskStateUpdated(map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: startedAt, + }, + "web2": { + State: structs.TaskStateRunning, + StartedAt: startedAt, + }, + }) + + m.TaskStateUpdated(map[string]*structs.TaskState{ + "web": { + State: structs.TaskStateRunning, + StartedAt: startedAt, + }, + "web2": { + State: structs.TaskStateDead, + StartedAt: startedAt, + FinishedAt: startedAt.Add(2 * time.Millisecond), + }, + }) + + select { + case deadline := <-setter.deadlines: + must.Eq(t, startedAt.Add(maxRunDuration), deadline) + case <-time.After(500 * time.Millisecond): + t.Fatal("timed out waiting for max_run_duration deadline after one task finished early") + } +} + func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocNotEligible(t *testing.T) { t.Parallel() From 7e0509a7c80a2e817eaef6540ab0e993d9a671e6 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:53:52 +0200 Subject: [PATCH 22/37] unify fully running since --- .../tasklifecycle/max_run_duration.go | 31 ++----------------- .../tasklifecycle/max_run_duration_test.go | 4 +-- nomad/state/state_store.go | 24 ++++---------- nomad/state/state_store_test.go | 21 ++----------- nomad/structs/alloc.go | 16 +++++++--- 5 files changed, 25 insertions(+), 71 deletions(-) diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go index 5a450cf54f6..127a443b82c 100644 --- a/client/allocrunner/tasklifecycle/max_run_duration.go +++ b/client/allocrunner/tasklifecycle/max_run_duration.go @@ -56,7 +56,7 @@ func (m *MaxRunDuration) TaskStateUpdated(taskStates map[string]*structs.TaskSta m.mu.Lock() defer m.mu.Unlock() - m.updateDeadlineLocked(taskStates) + m.setDeadline(taskStates) m.resetTimerLocked() } @@ -68,7 +68,7 @@ func (m *MaxRunDuration) Stop() { m.stopTimerLocked() } -func (m *MaxRunDuration) updateDeadlineLocked(taskStates map[string]*structs.TaskState) { +func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) { if m.alloc == nil { m.deadline = time.Time{} m.hasDeadline = false @@ -100,7 +100,7 @@ func (m *MaxRunDuration) updateDeadlineLocked(taskStates map[string]*structs.Tas return } - startedAt, ok := FullyRunningSince(taskStates) + startedAt, ok := structs.FullyRunningSince(taskStates) if !ok { return } @@ -154,28 +154,3 @@ func (m *MaxRunDuration) stopTimerLocked() { } m.timer = nil } - -// FullyRunningSince returns the latest StartedAt timestamp across all task -// states, but only when every known task state is running with a non-zero -// start time. -func FullyRunningSince(taskStates map[string]*structs.TaskState) (time.Time, bool) { - if len(taskStates) == 0 { - return time.Time{}, false - } - - var latest time.Time - for _, ts := range taskStates { - if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() { - return time.Time{}, false - } - if ts.StartedAt.After(latest) { - latest = ts.StartedAt - } - } - - if latest.IsZero() { - return time.Time{}, false - } - - return latest, true -} diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go index 649c5bd88a2..c9c9d8b6821 100644 --- a/client/allocrunner/tasklifecycle/max_run_duration_test.go +++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go @@ -33,7 +33,7 @@ func TestMaxRunDuration_FullyRunningSince(t *testing.T) { earlier := now.Add(-2 * time.Second) later := now.Add(-1 * time.Second) - got, ok := FullyRunningSince(map[string]*structs.TaskState{ + got, ok := structs.FullyRunningSince(map[string]*structs.TaskState{ "a": { State: structs.TaskStateRunning, StartedAt: earlier, @@ -51,7 +51,7 @@ func TestMaxRunDuration_FullyRunningSince(t *testing.T) { func TestMaxRunDuration_FullyRunningSince_FalseWhenNotFullyRunning(t *testing.T) { t.Parallel() - _, ok := FullyRunningSince(map[string]*structs.TaskState{ + _, ok := structs.FullyRunningSince(map[string]*structs.TaskState{ "a": { State: structs.TaskStateRunning, StartedAt: time.Now().Add(-time.Second).UTC(), diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 2232dd6ca82..f660d9ad94d 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -4054,9 +4054,7 @@ func (s *StateStore) nestedUpdateAllocFromClient(txn *txn, index uint64, alloc * // Pull in anything the client is the authority on copyAlloc.ClientStatus = alloc.ClientStatus - if alloc.ClientDescription != "" { - copyAlloc.ClientDescription = alloc.ClientDescription - } + copyAlloc.ClientDescription = alloc.ClientDescription copyAlloc.TaskStates = alloc.TaskStates copyAlloc.NetworkStatus = alloc.NetworkStatus @@ -4290,22 +4288,12 @@ func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation // Keep the clients task states alloc.TaskStates = exist.TaskStates - // If the scheduler is marking this allocation as lost, unknown, or another - // explicit terminal client status, preserve the scheduler-provided status - // and description instead of reusing the existing allocation values. - switch alloc.ClientStatus { - case structs.AllocClientStatusLost, - structs.AllocClientStatusUnknown, - structs.AllocClientStatusFailed, - structs.AllocClientStatusComplete: - if alloc.ClientDescription == "" { - alloc.ClientDescription = exist.ClientDescription - } - default: + // If the scheduler is marking this allocation as lost or unknown we do not + // want to reuse the status of the existing allocation. + if alloc.ClientStatus != structs.AllocClientStatusLost && + alloc.ClientStatus != structs.AllocClientStatusUnknown { alloc.ClientStatus = exist.ClientStatus - if alloc.ClientDescription == "" { - alloc.ClientDescription = exist.ClientDescription - } + alloc.ClientDescription = exist.ClientDescription } // The job has been denormalized so re-attach the original job diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index ba6ae7a26c1..a04e1262119 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -171,15 +171,6 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) { DesiredDescription: "desired desc", ClientStatus: structs.AllocClientStatusLost, } - - timeoutStoppedAlloc := mock.Alloc() - timeoutStoppedAlloc.Job = job - timeoutStoppedAlloc.JobID = job.ID - timeoutStoppedAllocDiff := &structs.AllocationDiff{ - ID: timeoutStoppedAlloc.ID, - DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration, - ClientStatus: structs.AllocClientStatusFailed, - } preemptedAlloc := mock.Alloc() preemptedAlloc.Job = job preemptedAlloc.JobID = job.ID @@ -190,7 +181,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) { must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 900, nil, job)) must.NoError(t, state.UpsertAllocs( - structs.MsgTypeTestSetup, 999, []*structs.Allocation{stoppedAlloc, timeoutStoppedAlloc, preemptedAlloc})) + structs.MsgTypeTestSetup, 999, []*structs.Allocation{stoppedAlloc, preemptedAlloc})) // modify job and ensure that stopped and preempted alloc point to original Job mJob := job.Copy() @@ -207,7 +198,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) { // Create a plan result res := structs.ApplyPlanResultsRequest{ AllocsUpdated: []*structs.Allocation{alloc}, - AllocsStopped: []*structs.AllocationDiff{stoppedAllocDiff, timeoutStoppedAllocDiff}, + AllocsStopped: []*structs.AllocationDiff{stoppedAllocDiff}, Job: mJob, EvalID: eval.ID, AllocsPreempted: []*structs.AllocationDiff{preemptedAllocDiff}, @@ -236,14 +227,6 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) { test.Eq(t, planModifyIndex, updatedStoppedAlloc.AllocModifyIndex) test.Eq(t, job.TaskGroups, updatedStoppedAlloc.Job.TaskGroups) - updatedTimeoutStoppedAlloc, err := state.AllocByID(ws, timeoutStoppedAlloc.ID) - must.NoError(t, err) - test.Eq(t, timeoutStoppedAllocDiff.DesiredDescription, updatedTimeoutStoppedAlloc.DesiredDescription) - test.Eq(t, structs.AllocDesiredStatusStop, updatedTimeoutStoppedAlloc.DesiredStatus) - test.Eq(t, timeoutStoppedAllocDiff.ClientStatus, updatedTimeoutStoppedAlloc.ClientStatus) - test.Eq(t, planModifyIndex, updatedTimeoutStoppedAlloc.AllocModifyIndex) - test.Eq(t, job.TaskGroups, updatedTimeoutStoppedAlloc.Job.TaskGroups) - updatedPreemptedAlloc, err := state.AllocByID(ws, preemptedAlloc.ID) must.NoError(t, err) test.Eq(t, structs.AllocDesiredStatusEvict, updatedPreemptedAlloc.DesiredStatus) diff --git a/nomad/structs/alloc.go b/nomad/structs/alloc.go index 7af3e6683b8..e662b634175 100644 --- a/nomad/structs/alloc.go +++ b/nomad/structs/alloc.go @@ -384,14 +384,14 @@ func (a *Allocation) MaxRunDuration() (time.Duration, bool) { // FullyRunningSince returns the latest StartedAt timestamp across all task // states, but only when every known task state is running with a non-zero start // time. -func (a *Allocation) FullyRunningSince() (time.Time, bool) { - if a == nil { +func FullyRunningSince(taskStates map[string]*TaskState) (time.Time, bool) { + if len(taskStates) == 0 { return time.Time{}, false } var latest time.Time - for _, ts := range a.TaskStates { + for _, ts := range taskStates { if ts == nil || ts.State != TaskStateRunning || ts.StartedAt.IsZero() { return time.Time{}, false } @@ -407,6 +407,14 @@ func (a *Allocation) FullyRunningSince() (time.Time, bool) { return latest, true } +func (a *Allocation) fullyRunningSince() (time.Time, bool) { + if a == nil { + return time.Time{}, false + } + + return FullyRunningSince(a.TaskStates) +} + // MaxRunDurationDeadline returns the deadline at which the allocation should be // considered timed out based on max_run_duration. func (a *Allocation) MaxRunDurationDeadline() (time.Time, bool) { @@ -415,7 +423,7 @@ func (a *Allocation) MaxRunDurationDeadline() (time.Time, bool) { return time.Time{}, false } - startedAt, ok := a.FullyRunningSince() + startedAt, ok := a.fullyRunningSince() if !ok { return time.Time{}, false } From ea2f1ad4c259275723e106a33f509042e46f7ad0 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:54:01 +0200 Subject: [PATCH 23/37] hook alloc can never be nil --- client/allocrunner/alloc_runner.go | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 1e16ded0f34..076c30a369d 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -699,19 +699,17 @@ func (ar *allocRunner) handleTaskStateUpdates() { calloc := ar.clientAlloc(states) hookAlloc := calloc - if hookAlloc != nil { - serverAlloc := ar.Alloc() - hookAlloc = &structs.Allocation{ - ID: calloc.ID, - TaskStates: calloc.TaskStates, - ClientStatus: calloc.ClientStatus, - ClientDescription: calloc.ClientDescription, - DeploymentStatus: calloc.DeploymentStatus, - NetworkStatus: calloc.NetworkStatus, - Job: serverAlloc.Job, - TaskGroup: serverAlloc.TaskGroup, - DesiredStatus: serverAlloc.DesiredStatus, - } + serverAlloc := ar.Alloc() + hookAlloc = &structs.Allocation{ + ID: calloc.ID, + TaskStates: calloc.TaskStates, + ClientStatus: calloc.ClientStatus, + ClientDescription: calloc.ClientDescription, + DeploymentStatus: calloc.DeploymentStatus, + NetworkStatus: calloc.NetworkStatus, + Job: serverAlloc.Job, + TaskGroup: serverAlloc.TaskGroup, + DesiredStatus: serverAlloc.DesiredStatus, } req := &interfaces.RunnerUpdateRequest{ From f99673109f7fddc0b4bd9bcc293c637f944b2eeb Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:55:29 +0200 Subject: [PATCH 24/37] even stream corrections --- nomad/state/events.go | 3 +- nomad/state/events_test.go | 70 ++++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/nomad/state/events.go b/nomad/state/events.go index 3470e06bcf8..396d4d42fd0 100644 --- a/nomad/state/events.go +++ b/nomad/state/events.go @@ -358,7 +358,8 @@ func eventFromChange(change memdb.Change) (structs.Event, bool) { alloc.Job = nil allocEvent := &structs.AllocationEvent{Allocation: alloc} - if alloc.ClientDescription == structs.AllocTimeoutReasonMaxRunDuration { + if alloc.ClientStatus == structs.AllocClientStatusComplete && + alloc.ClientDescription == structs.AllocTimeoutReasonMaxRunDuration { allocEvent.Timeout = true allocEvent.TimeoutReason = alloc.ClientDescription } diff --git a/nomad/state/events_test.go b/nomad/state/events_test.go index 57dad2710b2..4f885e6db20 100644 --- a/nomad/state/events_test.go +++ b/nomad/state/events_test.go @@ -546,50 +546,46 @@ func TestEventsFromChanges_ApplyPlanResultsRequestType(t *testing.T) { } } -func TestEventsFromChanges_ApplyPlanResultsRequestType_TimeoutStoppedAlloc(t *testing.T) { +func TestEventFromChange_AllocationTimeoutFields(t *testing.T) { ci.Parallel(t) s := TestStateStoreCfg(t, TestStateStorePublisher(t)) defer s.StopEventBroker() - mockJob := mock.Job() - must.NoError(t, s.UpsertJob(structs.MsgTypeTestSetup, 9, nil, mockJob)) - - stoppedAlloc := mock.Alloc() - stoppedAlloc.Job = mockJob - stoppedAlloc.JobID = mockJob.ID - stoppedAlloc.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration + timeoutAlloc := mock.Alloc() + timeoutAlloc.ClientStatus = structs.AllocClientStatusComplete + timeoutAlloc.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration - must.NoError(t, s.UpsertAllocs(structs.MsgTypeTestSetup, 10, []*structs.Allocation{stoppedAlloc})) - - msgType := structs.ApplyPlanResultsRequestType - req := &structs.ApplyPlanResultsRequest{ - AllocsStopped: []*structs.AllocationDiff{{ - ID: stoppedAlloc.ID, - ClientStatus: structs.AllocClientStatusFailed, - ClientDescription: structs.AllocTimeoutReasonMaxRunDuration, - }}, - Job: mockJob, - } - - must.NoError(t, s.UpsertPlanResults(msgType, 100, req)) - - events := WaitForEvents(t, s, 100, 1, 1*time.Second) - must.Len(t, 2, events) + timeoutEvent, ok := eventFromChange(memdb.Change{ + Table: "allocs", + Before: nil, + After: timeoutAlloc, + }) + must.True(t, ok) - var allocEvent *structs.AllocationEvent - for _, e := range events { - must.Eq(t, structs.TypePlanResult, e.Type) - if e.Topic == structs.TopicAllocation { - allocEvent = e.Payload.(*structs.AllocationEvent) - } - } + timeoutPayload, ok := timeoutEvent.Payload.(*structs.AllocationEvent) + must.True(t, ok) + must.True(t, timeoutPayload.Timeout) + must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, timeoutPayload.TimeoutReason) + must.Eq(t, structs.AllocClientStatusComplete, timeoutPayload.Allocation.ClientStatus) + must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, timeoutPayload.Allocation.ClientDescription) + + nonTimeoutAlloc := mock.Alloc() + nonTimeoutAlloc.ClientStatus = structs.AllocClientStatusFailed + nonTimeoutAlloc.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration + + nonTimeoutEvent, ok := eventFromChange(memdb.Change{ + Table: "allocs", + Before: nil, + After: nonTimeoutAlloc, + }) + must.True(t, ok) - must.NotNil(t, allocEvent) - must.True(t, allocEvent.Timeout) - must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.TimeoutReason) - must.Eq(t, structs.AllocDesiredStatusStop, allocEvent.Allocation.DesiredStatus) - must.Eq(t, structs.AllocClientStatusFailed, allocEvent.Allocation.ClientStatus) - must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.Allocation.ClientDescription) + nonTimeoutPayload, ok := nonTimeoutEvent.Payload.(*structs.AllocationEvent) + must.True(t, ok) + must.False(t, nonTimeoutPayload.Timeout) + must.Eq(t, "", nonTimeoutPayload.TimeoutReason) + must.Eq(t, structs.AllocClientStatusFailed, nonTimeoutPayload.Allocation.ClientStatus) + must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, nonTimeoutPayload.Allocation.ClientDescription) } func TestEventsFromChanges_BatchNodeUpdateDrainRequestType(t *testing.T) { From a5212394e65abc46c771a0027a59544851d3eddc Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 14 Apr 2026 17:58:12 +0200 Subject: [PATCH 25/37] desired status comment --- client/allocrunner/tasklifecycle/max_run_duration.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go index 127a443b82c..71a607032f2 100644 --- a/client/allocrunner/tasklifecycle/max_run_duration.go +++ b/client/allocrunner/tasklifecycle/max_run_duration.go @@ -83,6 +83,9 @@ func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) { return } + // DesiredStatus defaults to the zero value for normal running allocations. + // Treat an empty status the same as "run" and only clear the deadline when + // the alloc is explicitly marked with a non-run desired status. if m.alloc.DesiredStatus != "" && m.alloc.DesiredStatus != structs.AllocDesiredStatusRun { m.deadline = time.Time{} m.hasDeadline = false From 2f420ac0426be138b79410695234f6e5038bc15e Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 14 Apr 2026 18:00:15 +0200 Subject: [PATCH 26/37] allocrunner unnecessary copy fix --- client/allocrunner/alloc_runner.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 076c30a369d..5ab5469864b 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -698,18 +698,16 @@ func (ar *allocRunner) handleTaskStateUpdates() { // Get the client allocation calloc := ar.clientAlloc(states) - hookAlloc := calloc - serverAlloc := ar.Alloc() - hookAlloc = &structs.Allocation{ + hookAlloc := &structs.Allocation{ ID: calloc.ID, TaskStates: calloc.TaskStates, ClientStatus: calloc.ClientStatus, ClientDescription: calloc.ClientDescription, DeploymentStatus: calloc.DeploymentStatus, NetworkStatus: calloc.NetworkStatus, - Job: serverAlloc.Job, - TaskGroup: serverAlloc.TaskGroup, - DesiredStatus: serverAlloc.DesiredStatus, + Job: ar.Alloc().Job, + TaskGroup: ar.Alloc().TaskGroup, + DesiredStatus: ar.Alloc().DesiredStatus, } req := &interfaces.RunnerUpdateRequest{ From ae2d08a4abe925b098e67071d02e4fbcb9fcb8ae Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 14 Apr 2026 18:13:54 +0200 Subject: [PATCH 27/37] fix UTC and interface --- client/allocrunner/alloc_runner.go | 10 +++------- client/allocrunner/alloc_runner_hooks.go | 5 ++--- client/allocrunner/interfaces/runner_lifecycle.go | 5 ++--- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 5ab5469864b..83363d316f2 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -710,13 +710,9 @@ func (ar *allocRunner) handleTaskStateUpdates() { DesiredStatus: ar.Alloc().DesiredStatus, } - req := &interfaces.RunnerUpdateRequest{ - Alloc: hookAlloc, - TaskStates: states, - } if ar.maxRunDuration != nil { - ar.maxRunDuration.SetAlloc(req.Alloc) - ar.maxRunDuration.TaskStateUpdated(req.TaskStates) + ar.maxRunDuration.SetAlloc(hookAlloc) + ar.maxRunDuration.TaskStateUpdated(states) } // Update the server @@ -1121,7 +1117,7 @@ func (ar *allocRunner) Listener() *cstructs.AllocListener { } func (ar *allocRunner) EnforceMaxRunDurationTimeout(deadline time.Time) { - now := time.Now().UTC() + now := time.Now() if ar.isShuttingDown() { return diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index d57d94167c4..abc2898b08b 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -224,9 +224,8 @@ func (ar *allocRunner) update(update *structs.Allocation) error { ).SetAllocDir(ar.allocDir.AllocDirPath()).Build() req := &interfaces.RunnerUpdateRequest{ - Alloc: update, - AllocEnv: allocEnv, - TaskStates: ar.AllocState().TaskStates, + Alloc: update, + AllocEnv: allocEnv, } var merr multierror.Error diff --git a/client/allocrunner/interfaces/runner_lifecycle.go b/client/allocrunner/interfaces/runner_lifecycle.go index 6267c978e00..9e1f0e43852 100644 --- a/client/allocrunner/interfaces/runner_lifecycle.go +++ b/client/allocrunner/interfaces/runner_lifecycle.go @@ -56,9 +56,8 @@ type RunnerUpdateHook interface { } type RunnerUpdateRequest struct { - Alloc *structs.Allocation - AllocEnv *taskenv.TaskEnv - TaskStates map[string]*structs.TaskState + Alloc *structs.Allocation + AllocEnv *taskenv.TaskEnv } // A RunnerTaskRestartHook is executed just before the allocation runner is From bc6344c3176901b9277ef896080d575a864d848c Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 14 Apr 2026 18:29:03 +0200 Subject: [PATCH 28/37] max_run_duration cleanups (thanks for the comments @mismithhisler) --- client/allocrunner/alloc_runner.go | 2 +- .../tasklifecycle/max_run_duration.go | 36 ++++--------- .../tasklifecycle/max_run_duration_test.go | 52 +++++++++---------- 3 files changed, 37 insertions(+), 53 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 83363d316f2..cbb01a08792 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -296,7 +296,7 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e ) ar.taskCoordinator = tasklifecycle.NewCoordinator(ar.logger, tg.Tasks, ar.waitCh) - ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.alloc, ar) + ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.EnforceMaxRunDurationTimeout) shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background()) ar.shutdownDelayCtx = shutdownDelayCtx diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go index 71a607032f2..40d2cd44a86 100644 --- a/client/allocrunner/tasklifecycle/max_run_duration.go +++ b/client/allocrunner/tasklifecycle/max_run_duration.go @@ -10,9 +10,7 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -type MaxRunDurationSetter interface { - EnforceMaxRunDurationTimeout(time.Time) -} +type MaxRunDurationTimeoutFn func(time.Time) // MaxRunDuration coordinates local enforcement of task group // `max_run_duration`. @@ -24,21 +22,14 @@ type MaxRunDurationSetter interface { type MaxRunDuration struct { mu sync.Mutex - alloc *structs.Allocation - deadline time.Time - hasDeadline bool - timer *time.Timer - setter MaxRunDurationSetter + alloc *structs.Allocation + deadline time.Time + timer *time.Timer + onTimeout MaxRunDurationTimeoutFn } -func NewMaxRunDuration( - alloc *structs.Allocation, - setter MaxRunDurationSetter, -) *MaxRunDuration { - return &MaxRunDuration{ - alloc: alloc, - setter: setter, - } +func NewMaxRunDuration(onTimeout MaxRunDurationTimeoutFn) *MaxRunDuration { + return &MaxRunDuration{onTimeout: onTimeout} } // SetAlloc updates the authoritative allocation used for @@ -71,7 +62,6 @@ func (m *MaxRunDuration) Stop() { func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) { if m.alloc == nil { m.deadline = time.Time{} - m.hasDeadline = false return } @@ -79,7 +69,6 @@ func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) { // running task set should establish a deadline. if m.alloc.TerminalStatus() { m.deadline = time.Time{} - m.hasDeadline = false return } @@ -88,18 +77,16 @@ func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) { // the alloc is explicitly marked with a non-run desired status. if m.alloc.DesiredStatus != "" && m.alloc.DesiredStatus != structs.AllocDesiredStatusRun { m.deadline = time.Time{} - m.hasDeadline = false return } maxRunDuration, ok := m.alloc.MaxRunDuration() if !ok { m.deadline = time.Time{} - m.hasDeadline = false return } - if m.hasDeadline { + if !m.deadline.IsZero() { return } @@ -109,20 +96,19 @@ func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) { } m.deadline = startedAt.Add(maxRunDuration) - m.hasDeadline = true } func (m *MaxRunDuration) resetTimerLocked() { m.stopTimerLocked() - if !m.hasDeadline { + if m.deadline.IsZero() { return } deadline := m.deadline remaining := time.Until(deadline) if remaining <= 0 { - go m.setter.EnforceMaxRunDurationTimeout(deadline) + go m.onTimeout(deadline) return } @@ -140,7 +126,7 @@ func (m *MaxRunDuration) resetTimerLocked() { m.timer = nil m.mu.Unlock() - m.setter.EnforceMaxRunDurationTimeout(deadline) + m.onTimeout(deadline) }(timer, deadline) } diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go index c9c9d8b6821..eea7f38326d 100644 --- a/client/allocrunner/tasklifecycle/max_run_duration_test.go +++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go @@ -12,18 +12,11 @@ import ( "github.com/shoenig/test/must" ) -type testMaxRunDurationSetter struct { - deadlines chan time.Time -} - -func newTestMaxRunDurationSetter() *testMaxRunDurationSetter { - return &testMaxRunDurationSetter{ - deadlines: make(chan time.Time, 8), - } -} - -func (s *testMaxRunDurationSetter) EnforceMaxRunDurationTimeout(deadline time.Time) { - s.deadlines <- deadline +func newTestMaxRunDurationCallback() (func(time.Time), chan time.Time) { + deadlines := make(chan time.Time, 8) + return func(deadline time.Time) { + deadlines <- deadline + }, deadlines } func TestMaxRunDuration_FullyRunningSince(t *testing.T) { @@ -72,8 +65,9 @@ func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) { alloc.Job.Type = structs.JobTypeBatch alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(alloc, setter) + onTimeout, deadlines := newTestMaxRunDurationCallback() + m := NewMaxRunDuration(onTimeout) + m.SetAlloc(alloc) startedAt := time.Now().UTC() m.TaskStateUpdated(map[string]*structs.TaskState{ @@ -84,7 +78,7 @@ func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) { }) select { - case deadline := <-setter.deadlines: + case deadline := <-deadlines: must.Eq(t, startedAt.Add(maxRunDuration), deadline) case <-time.After(500 * time.Millisecond): t.Fatal("timed out waiting for max_run_duration deadline") @@ -103,8 +97,9 @@ func TestMaxRunDuration_TaskStateUpdated_PreservesDeadlineWhenOneTaskFinishesEar task2.Name = "web2" alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2) - setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(alloc, setter) + onTimeout, deadlines := newTestMaxRunDurationCallback() + m := NewMaxRunDuration(onTimeout) + m.SetAlloc(alloc) startedAt := time.Now().UTC() m.TaskStateUpdated(map[string]*structs.TaskState{ @@ -131,7 +126,7 @@ func TestMaxRunDuration_TaskStateUpdated_PreservesDeadlineWhenOneTaskFinishesEar }) select { - case deadline := <-setter.deadlines: + case deadline := <-deadlines: must.Eq(t, startedAt.Add(maxRunDuration), deadline) case <-time.After(500 * time.Millisecond): t.Fatal("timed out waiting for max_run_duration deadline after one task finished early") @@ -193,8 +188,9 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocNotEligible(t *test t.Run(tc.name, func(t *testing.T) { t.Parallel() - setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(tc.alloc, setter) + onTimeout, deadlines := newTestMaxRunDurationCallback() + m := NewMaxRunDuration(onTimeout) + m.SetAlloc(tc.alloc) taskStates := map[string]*structs.TaskState{ "web": { @@ -209,7 +205,7 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocNotEligible(t *test m.TaskStateUpdated(taskStates) select { - case deadline := <-setter.deadlines: + case deadline := <-deadlines: t.Fatalf("unexpected deadline fired: %v", deadline) case <-time.After(100 * time.Millisecond): } @@ -225,8 +221,9 @@ func TestMaxRunDuration_SetAlloc_UsesLatestAllocConfig(t *testing.T) { alloc.Job.Type = structs.JobTypeBatch alloc.Job.TaskGroups[0].MaxRunDuration = &initial - setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(alloc, setter) + onTimeout, deadlines := newTestMaxRunDurationCallback() + m := NewMaxRunDuration(onTimeout) + m.SetAlloc(alloc) updated := alloc.Copy() latest := 40 * time.Millisecond @@ -242,7 +239,7 @@ func TestMaxRunDuration_SetAlloc_UsesLatestAllocConfig(t *testing.T) { }) select { - case deadline := <-setter.deadlines: + case deadline := <-deadlines: must.Eq(t, startedAt.Add(latest), deadline) case <-time.After(500 * time.Millisecond): t.Fatal("timed out waiting for updated max_run_duration deadline") @@ -257,8 +254,9 @@ func TestMaxRunDuration_Stop_CancelsTimer(t *testing.T) { alloc.Job.Type = structs.JobTypeBatch alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - setter := newTestMaxRunDurationSetter() - m := NewMaxRunDuration(alloc, setter) + onTimeout, deadlines := newTestMaxRunDurationCallback() + m := NewMaxRunDuration(onTimeout) + m.SetAlloc(alloc) m.TaskStateUpdated(map[string]*structs.TaskState{ "web": { @@ -270,7 +268,7 @@ func TestMaxRunDuration_Stop_CancelsTimer(t *testing.T) { m.Stop() select { - case deadline := <-setter.deadlines: + case deadline := <-deadlines: t.Fatalf("unexpected deadline fired after stop: %v", deadline) case <-time.After(250 * time.Millisecond): } From f246b59acc8401dbcc79962b15e7798435358039 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Fri, 17 Apr 2026 13:55:38 +0200 Subject: [PATCH 29/37] client: enforce max_run_duration regardless of task state (#27827) --- client/allocrunner/alloc_runner.go | 31 -- client/allocrunner/alloc_runner_hooks.go | 1 + client/allocrunner/max_run_duration_hook.go | 159 ++++++++++ .../allocrunner/max_run_duration_hook_test.go | 234 +++++++++++++++ .../tasklifecycle/max_run_duration.go | 145 --------- .../tasklifecycle/max_run_duration_test.go | 275 ------------------ 6 files changed, 394 insertions(+), 451 deletions(-) create mode 100644 client/allocrunner/max_run_duration_hook.go create mode 100644 client/allocrunner/max_run_duration_hook_test.go delete mode 100644 client/allocrunner/tasklifecycle/max_run_duration.go delete mode 100644 client/allocrunner/tasklifecycle/max_run_duration_test.go diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index cbb01a08792..eaae5d9de0b 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -152,10 +152,6 @@ type allocRunner struct { // transitions. runnerHooks []interfaces.RunnerHook - // maxRunDuration coordinates alloc-level max_run_duration enforcement from - // authoritative alloc and task state updates. - maxRunDuration *tasklifecycle.MaxRunDuration - // hookResources holds the output from allocrunner hooks so that later // allocrunner hooks or task runner hooks can read them hookResources *cstructs.AllocHookResources @@ -296,7 +292,6 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e ) ar.taskCoordinator = tasklifecycle.NewCoordinator(ar.logger, tg.Tasks, ar.waitCh) - ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.EnforceMaxRunDurationTimeout) shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background()) ar.shutdownDelayCtx = shutdownDelayCtx @@ -413,10 +408,6 @@ func (ar *allocRunner) Run() { return } - if ar.maxRunDuration != nil { - ar.maxRunDuration.Stop() - } - // Run the postrun hooks if err := ar.postrun(); err != nil { ar.logger.Error("postrun failed", "error", err) @@ -698,23 +689,6 @@ func (ar *allocRunner) handleTaskStateUpdates() { // Get the client allocation calloc := ar.clientAlloc(states) - hookAlloc := &structs.Allocation{ - ID: calloc.ID, - TaskStates: calloc.TaskStates, - ClientStatus: calloc.ClientStatus, - ClientDescription: calloc.ClientDescription, - DeploymentStatus: calloc.DeploymentStatus, - NetworkStatus: calloc.NetworkStatus, - Job: ar.Alloc().Job, - TaskGroup: ar.Alloc().TaskGroup, - DesiredStatus: ar.Alloc().DesiredStatus, - } - - if ar.maxRunDuration != nil { - ar.maxRunDuration.SetAlloc(hookAlloc) - ar.maxRunDuration.TaskStateUpdated(states) - } - // Update the server ar.stateUpdater.AllocStateUpdated(calloc) @@ -1311,11 +1285,6 @@ func (ar *allocRunner) Shutdown() { go func() { ar.logger.Trace("shutting down") - // Shutdown tasks gracefully if they were run - if ar.maxRunDuration != nil { - ar.maxRunDuration.Stop() - } - // Shutdown task runners var wg sync.WaitGroup for _, tr := range ar.tasks { diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index abc2898b08b..51a14368599 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -111,6 +111,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { ar.runnerHooks = []interfaces.RunnerHook{ newIdentityHook(hookLogger, ar.widmgr), newAllocDirHook(hookLogger, ar.allocDir), + newMaxRunDurationHook(hookLogger, alloc, ar.EnforceMaxRunDurationTimeout), newConsulHook(consulHookConfig{ alloc: ar.alloc, allocdir: ar.allocDir, diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go new file mode 100644 index 00000000000..bf71e20d9f8 --- /dev/null +++ b/client/allocrunner/max_run_duration_hook.go @@ -0,0 +1,159 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package allocrunner + +import ( + "sync" + "time" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/allocrunner/interfaces" + "github.com/hashicorp/nomad/client/taskenv" + "github.com/hashicorp/nomad/nomad/structs" +) + +var ( + _ interfaces.RunnerPrerunHook = (*maxRunDurationHook)(nil) + _ interfaces.RunnerPostrunHook = (*maxRunDurationHook)(nil) + _ interfaces.RunnerUpdateHook = (*maxRunDurationHook)(nil) + _ interfaces.ShutdownHook = (*maxRunDurationHook)(nil) +) + +type maxRunDurationHook struct { + mu sync.Mutex + + alloc *structs.Allocation + + timer *time.Timer + deadline time.Time + maxRunDuration time.Duration + hasMaxRunDuration bool + + onTimeout func(time.Time) + logger hclog.Logger +} + +func newMaxRunDurationHook( + logger hclog.Logger, + alloc *structs.Allocation, + onTimeout func(time.Time), +) interfaces.RunnerHook { + return &maxRunDurationHook{ + alloc: alloc, + onTimeout: onTimeout, + logger: logger.Named("max_run_duration"), + } +} + +func (h *maxRunDurationHook) Name() string { + return "max_run_duration" +} + +func (h *maxRunDurationHook) Prerun(*taskenv.TaskEnv) error { + h.mu.Lock() + defer h.mu.Unlock() + + h.resetTimer() + return nil +} + +func (h *maxRunDurationHook) Update(req *interfaces.RunnerUpdateRequest) error { + h.mu.Lock() + defer h.mu.Unlock() + + h.alloc = req.Alloc + h.resetTimer() + return nil +} + +func (h *maxRunDurationHook) Postrun() error { + h.mu.Lock() + defer h.mu.Unlock() + + h.stopTimer() + return nil +} + +func (h *maxRunDurationHook) Shutdown() { + h.mu.Lock() + defer h.mu.Unlock() + + h.stopTimer() +} + +func (h *maxRunDurationHook) resetTimer() { + maxRunDuration, ok := h.currentMaxRunDuration() + if !ok { + h.stopTimer() + h.deadline = time.Time{} + h.maxRunDuration = 0 + h.hasMaxRunDuration = false + return + } + + if h.hasMaxRunDuration && h.maxRunDuration == maxRunDuration && !h.deadline.IsZero() { + return + } + + h.stopTimer() + + h.maxRunDuration = maxRunDuration + h.hasMaxRunDuration = true + h.deadline = time.Now().Add(maxRunDuration) + + deadline := h.deadline + remaining := time.Until(deadline) + + if remaining <= 0 { + h.logger.Debug("allocation exceeded max_run_duration, enforcing immediately", "deadline", deadline) + go h.onTimeout(deadline) + return + } + + timer := time.NewTimer(remaining) + h.timer = timer + + h.logger.Trace("armed max_run_duration timer", "deadline", deadline, "remaining", remaining) + + go func(t *time.Timer, deadline time.Time) { + <-t.C + + h.mu.Lock() + if h.timer != t { + h.mu.Unlock() + return + } + h.timer = nil + h.mu.Unlock() + + h.onTimeout(deadline) + }(timer, deadline) +} + +func (h *maxRunDurationHook) stopTimer() { + if h.timer == nil { + return + } + + if !h.timer.Stop() { + select { + case <-h.timer.C: + default: + } + } + + h.timer = nil +} + +func (h *maxRunDurationHook) currentMaxRunDuration() (time.Duration, bool) { + if h.alloc.TerminalStatus() { + return 0, false + } + + if h.alloc.DesiredStatus != "" && h.alloc.DesiredStatus != structs.AllocDesiredStatusRun { + return 0, false + } + + return h.alloc.MaxRunDuration() +} diff --git a/client/allocrunner/max_run_duration_hook_test.go b/client/allocrunner/max_run_duration_hook_test.go new file mode 100644 index 00000000000..0002b3ccd3c --- /dev/null +++ b/client/allocrunner/max_run_duration_hook_test.go @@ -0,0 +1,234 @@ +// Copyright IBM Corp. 2015, 2025 +// SPDX-License-Identifier: BUSL-1.1 + +package allocrunner + +import ( + "testing" + "time" + + log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/allocrunner/interfaces" + "github.com/hashicorp/nomad/client/taskenv" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func newTestMaxRunDurationHookCallback() (func(time.Time), chan time.Time) { + deadlines := make(chan time.Time, 8) + return func(deadline time.Time) { + deadlines <- deadline + }, deadlines +} + +func newTestMaxRunDurationHook( + alloc *structs.Allocation, + onTimeout func(time.Time), +) *maxRunDurationHook { + hook := newMaxRunDurationHook(log.NewNullLogger(), alloc, onTimeout) + + h, ok := hook.(*maxRunDurationHook) + if !ok { + panic("newMaxRunDurationHook returned unexpected hook type") + } + + return h +} + +func TestMaxRunDurationHook_Prerun_ArmsTimerImmediately(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + maxRunDuration := 50 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + onTimeout, deadlines := newTestMaxRunDurationHookCallback() + hook := newTestMaxRunDurationHook(alloc, onTimeout) + + err := hook.Prerun((*taskenv.TaskEnv)(nil)) + must.NoError(t, err) + + select { + case deadline := <-deadlines: + must.False(t, deadline.IsZero()) + case <-time.After(500 * time.Millisecond): + t.Fatal("timed out waiting for max_run_duration deadline") + } +} + +func TestMaxRunDurationHook_Update_DoesNotExtendDeadlineOnUnrelatedAllocChange(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + maxRunDuration := 50 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + onTimeout, deadlines := newTestMaxRunDurationHookCallback() + hook := newTestMaxRunDurationHook(alloc, onTimeout) + + err := hook.Prerun((*taskenv.TaskEnv)(nil)) + must.NoError(t, err) + + updated := alloc.Copy() + updated.ClientDescription = "unrelated alloc update" + + time.Sleep(20 * time.Millisecond) + + err = hook.Update(&interfaces.RunnerUpdateRequest{Alloc: updated}) + must.NoError(t, err) + + select { + case <-deadlines: + case <-time.After(200 * time.Millisecond): + t.Fatal("timed out waiting for original max_run_duration deadline after unrelated update") + } +} + +func TestMaxRunDurationHook_Update_RearmsOnDurationChange(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + initial := 200 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &initial + + onTimeout, deadlines := newTestMaxRunDurationHookCallback() + hook := newTestMaxRunDurationHook(alloc, onTimeout) + + err := hook.Prerun((*taskenv.TaskEnv)(nil)) + must.NoError(t, err) + + updated := alloc.Copy() + latest := 40 * time.Millisecond + updated.Job.TaskGroups[0].MaxRunDuration = &latest + + err = hook.Update(&interfaces.RunnerUpdateRequest{Alloc: updated}) + must.NoError(t, err) + + select { + case <-deadlines: + case <-time.After(500 * time.Millisecond): + t.Fatal("timed out waiting for updated max_run_duration deadline") + } +} + +func TestMaxRunDurationHook_DoesNotFireWhenAllocNotEligible(t *testing.T) { + ci.Parallel(t) + + cases := []struct { + name string + alloc *structs.Allocation + }{ + { + name: "non-batch job", + alloc: func() *structs.Allocation { + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeService + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + return alloc + }(), + }, + { + name: "desired status not run", + alloc: func() *structs.Allocation { + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + alloc.DesiredStatus = structs.AllocDesiredStatusStop + return alloc + }(), + }, + { + name: "terminal alloc", + alloc: func() *structs.Allocation { + alloc := mock.BatchAlloc() + maxRunDuration := 25 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + alloc.ClientStatus = structs.AllocClientStatusComplete + return alloc + }(), + }, + { + name: "no max run duration", + alloc: func() *structs.Allocation { + alloc := mock.BatchAlloc() + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = nil + return alloc + }(), + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + onTimeout, deadlines := newTestMaxRunDurationHookCallback() + hook := newTestMaxRunDurationHook(tc.alloc, onTimeout) + + err := hook.Prerun((*taskenv.TaskEnv)(nil)) + must.NoError(t, err) + + select { + case deadline := <-deadlines: + t.Fatalf("unexpected deadline fired: %v", deadline) + case <-time.After(100 * time.Millisecond): + } + }) + } +} + +func TestMaxRunDurationHook_Postrun_CancelsTimer(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + maxRunDuration := 150 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + onTimeout, deadlines := newTestMaxRunDurationHookCallback() + hook := newTestMaxRunDurationHook(alloc, onTimeout) + + err := hook.Prerun((*taskenv.TaskEnv)(nil)) + must.NoError(t, err) + + err = hook.Postrun() + must.NoError(t, err) + + select { + case deadline := <-deadlines: + t.Fatalf("unexpected deadline fired after postrun: %v", deadline) + case <-time.After(250 * time.Millisecond): + } +} + +func TestMaxRunDurationHook_Shutdown_CancelsTimer(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + maxRunDuration := 150 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + onTimeout, deadlines := newTestMaxRunDurationHookCallback() + hook := newTestMaxRunDurationHook(alloc, onTimeout) + + err := hook.Prerun((*taskenv.TaskEnv)(nil)) + must.NoError(t, err) + + hook.Shutdown() + + select { + case deadline := <-deadlines: + t.Fatalf("unexpected deadline fired after shutdown: %v", deadline) + case <-time.After(250 * time.Millisecond): + } +} diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go deleted file mode 100644 index 40d2cd44a86..00000000000 --- a/client/allocrunner/tasklifecycle/max_run_duration.go +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright IBM Corp. 2015, 2025 -// SPDX-License-Identifier: BUSL-1.1 - -package tasklifecycle - -import ( - "sync" - "time" - - "github.com/hashicorp/nomad/nomad/structs" -) - -type MaxRunDurationTimeoutFn func(time.Time) - -// MaxRunDuration coordinates local enforcement of task group -// `max_run_duration`. -// -// This component is alloc-scoped, not task-scoped. It observes the current -// authoritative allocation plus task state snapshots and arms a timer once the -// allocation is fully running. The timer is canceled and recomputed whenever -// alloc or task state changes are observed. -type MaxRunDuration struct { - mu sync.Mutex - - alloc *structs.Allocation - deadline time.Time - timer *time.Timer - onTimeout MaxRunDurationTimeoutFn -} - -func NewMaxRunDuration(onTimeout MaxRunDurationTimeoutFn) *MaxRunDuration { - return &MaxRunDuration{onTimeout: onTimeout} -} - -// SetAlloc updates the authoritative allocation used for -// `max_run_duration` evaluation. -func (m *MaxRunDuration) SetAlloc(alloc *structs.Allocation) { - m.mu.Lock() - defer m.mu.Unlock() - - m.alloc = alloc -} - -// TaskStateUpdated recomputes the `max_run_duration` timer from the latest -// alloc and task state snapshot. -func (m *MaxRunDuration) TaskStateUpdated(taskStates map[string]*structs.TaskState) { - m.mu.Lock() - defer m.mu.Unlock() - - m.setDeadline(taskStates) - m.resetTimerLocked() -} - -// Stop cancels any active timer. -func (m *MaxRunDuration) Stop() { - m.mu.Lock() - defer m.mu.Unlock() - - m.stopTimerLocked() -} - -func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) { - if m.alloc == nil { - m.deadline = time.Time{} - return - } - - // Only running allocations with a valid max_run_duration and a fully - // running task set should establish a deadline. - if m.alloc.TerminalStatus() { - m.deadline = time.Time{} - return - } - - // DesiredStatus defaults to the zero value for normal running allocations. - // Treat an empty status the same as "run" and only clear the deadline when - // the alloc is explicitly marked with a non-run desired status. - if m.alloc.DesiredStatus != "" && m.alloc.DesiredStatus != structs.AllocDesiredStatusRun { - m.deadline = time.Time{} - return - } - - maxRunDuration, ok := m.alloc.MaxRunDuration() - if !ok { - m.deadline = time.Time{} - return - } - - if !m.deadline.IsZero() { - return - } - - startedAt, ok := structs.FullyRunningSince(taskStates) - if !ok { - return - } - - m.deadline = startedAt.Add(maxRunDuration) -} - -func (m *MaxRunDuration) resetTimerLocked() { - m.stopTimerLocked() - - if m.deadline.IsZero() { - return - } - - deadline := m.deadline - remaining := time.Until(deadline) - if remaining <= 0 { - go m.onTimeout(deadline) - return - } - - timer := time.NewTimer(remaining) - m.timer = timer - - go func(t *time.Timer, deadline time.Time) { - <-t.C - - m.mu.Lock() - if m.timer != t { - m.mu.Unlock() - return - } - m.timer = nil - m.mu.Unlock() - - m.onTimeout(deadline) - }(timer, deadline) -} - -func (m *MaxRunDuration) stopTimerLocked() { - if m.timer == nil { - return - } - - if !m.timer.Stop() { - select { - case <-m.timer.C: - default: - } - } - m.timer = nil -} diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go deleted file mode 100644 index eea7f38326d..00000000000 --- a/client/allocrunner/tasklifecycle/max_run_duration_test.go +++ /dev/null @@ -1,275 +0,0 @@ -// Copyright IBM Corp. 2015, 2025 -// SPDX-License-Identifier: BUSL-1.1 - -package tasklifecycle - -import ( - "testing" - "time" - - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/shoenig/test/must" -) - -func newTestMaxRunDurationCallback() (func(time.Time), chan time.Time) { - deadlines := make(chan time.Time, 8) - return func(deadline time.Time) { - deadlines <- deadline - }, deadlines -} - -func TestMaxRunDuration_FullyRunningSince(t *testing.T) { - t.Parallel() - - now := time.Now().UTC() - earlier := now.Add(-2 * time.Second) - later := now.Add(-1 * time.Second) - - got, ok := structs.FullyRunningSince(map[string]*structs.TaskState{ - "a": { - State: structs.TaskStateRunning, - StartedAt: earlier, - }, - "b": { - State: structs.TaskStateRunning, - StartedAt: later, - }, - }) - - must.True(t, ok) - must.Eq(t, later, got) -} - -func TestMaxRunDuration_FullyRunningSince_FalseWhenNotFullyRunning(t *testing.T) { - t.Parallel() - - _, ok := structs.FullyRunningSince(map[string]*structs.TaskState{ - "a": { - State: structs.TaskStateRunning, - StartedAt: time.Now().Add(-time.Second).UTC(), - }, - "b": { - State: structs.TaskStatePending, - }, - }) - - must.False(t, ok) -} - -func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) { - t.Parallel() - - alloc := mock.BatchAlloc() - maxRunDuration := 50 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - - onTimeout, deadlines := newTestMaxRunDurationCallback() - m := NewMaxRunDuration(onTimeout) - m.SetAlloc(alloc) - - startedAt := time.Now().UTC() - m.TaskStateUpdated(map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: startedAt, - }, - }) - - select { - case deadline := <-deadlines: - must.Eq(t, startedAt.Add(maxRunDuration), deadline) - case <-time.After(500 * time.Millisecond): - t.Fatal("timed out waiting for max_run_duration deadline") - } -} - -func TestMaxRunDuration_TaskStateUpdated_PreservesDeadlineWhenOneTaskFinishesEarly(t *testing.T) { - t.Parallel() - - alloc := mock.BatchAlloc() - maxRunDuration := 50 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - - task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy() - task2.Name = "web2" - alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2) - - onTimeout, deadlines := newTestMaxRunDurationCallback() - m := NewMaxRunDuration(onTimeout) - m.SetAlloc(alloc) - - startedAt := time.Now().UTC() - m.TaskStateUpdated(map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: startedAt, - }, - "web2": { - State: structs.TaskStateRunning, - StartedAt: startedAt, - }, - }) - - m.TaskStateUpdated(map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: startedAt, - }, - "web2": { - State: structs.TaskStateDead, - StartedAt: startedAt, - FinishedAt: startedAt.Add(2 * time.Millisecond), - }, - }) - - select { - case deadline := <-deadlines: - must.Eq(t, startedAt.Add(maxRunDuration), deadline) - case <-time.After(500 * time.Millisecond): - t.Fatal("timed out waiting for max_run_duration deadline after one task finished early") - } -} - -func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocNotEligible(t *testing.T) { - t.Parallel() - - cases := []struct { - name string - alloc *structs.Allocation - }{ - { - name: "not fully running", - alloc: func() *structs.Allocation { - alloc := mock.BatchAlloc() - maxRunDuration := 25 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - return alloc - }(), - }, - { - name: "non-batch job", - alloc: func() *structs.Allocation { - alloc := mock.BatchAlloc() - maxRunDuration := 25 * time.Millisecond - alloc.Job.Type = structs.JobTypeService - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - return alloc - }(), - }, - { - name: "desired status not run", - alloc: func() *structs.Allocation { - alloc := mock.BatchAlloc() - maxRunDuration := 25 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - alloc.DesiredStatus = structs.AllocDesiredStatusStop - return alloc - }(), - }, - { - name: "terminal alloc", - alloc: func() *structs.Allocation { - alloc := mock.BatchAlloc() - maxRunDuration := 25 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - alloc.ClientStatus = structs.AllocClientStatusComplete - return alloc - }(), - }, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - onTimeout, deadlines := newTestMaxRunDurationCallback() - m := NewMaxRunDuration(onTimeout) - m.SetAlloc(tc.alloc) - - taskStates := map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: time.Now().UTC(), - }, - } - if tc.name == "not fully running" { - taskStates["web"] = &structs.TaskState{State: structs.TaskStatePending} - } - - m.TaskStateUpdated(taskStates) - - select { - case deadline := <-deadlines: - t.Fatalf("unexpected deadline fired: %v", deadline) - case <-time.After(100 * time.Millisecond): - } - }) - } -} - -func TestMaxRunDuration_SetAlloc_UsesLatestAllocConfig(t *testing.T) { - t.Parallel() - - alloc := mock.BatchAlloc() - initial := 200 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &initial - - onTimeout, deadlines := newTestMaxRunDurationCallback() - m := NewMaxRunDuration(onTimeout) - m.SetAlloc(alloc) - - updated := alloc.Copy() - latest := 40 * time.Millisecond - updated.Job.TaskGroups[0].MaxRunDuration = &latest - m.SetAlloc(updated) - - startedAt := time.Now().UTC() - m.TaskStateUpdated(map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: startedAt, - }, - }) - - select { - case deadline := <-deadlines: - must.Eq(t, startedAt.Add(latest), deadline) - case <-time.After(500 * time.Millisecond): - t.Fatal("timed out waiting for updated max_run_duration deadline") - } -} - -func TestMaxRunDuration_Stop_CancelsTimer(t *testing.T) { - t.Parallel() - - alloc := mock.BatchAlloc() - maxRunDuration := 150 * time.Millisecond - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration - - onTimeout, deadlines := newTestMaxRunDurationCallback() - m := NewMaxRunDuration(onTimeout) - m.SetAlloc(alloc) - - m.TaskStateUpdated(map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: time.Now().UTC(), - }, - }) - - m.Stop() - - select { - case deadline := <-deadlines: - t.Fatalf("unexpected deadline fired after stop: %v", deadline) - case <-time.After(250 * time.Millisecond): - } -} From 3b6f4df95ab812f8785a1161230091f86f5266b6 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Fri, 17 Apr 2026 15:46:26 +0200 Subject: [PATCH 30/37] allow for updates to max_run_duration during job run --- client/allocrunner/alloc_runner_test.go | 74 ++++++++++++++++++ client/allocrunner/max_run_duration_hook.go | 25 ++++-- scheduler/generic_sched_test.go | 85 +++++++++++++++++++++ scheduler/util.go | 16 ++++ 4 files changed, 192 insertions(+), 8 deletions(-) diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 01dce5c61d3..ee965ab2853 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -2720,6 +2720,80 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) { }) } +func TestAllocRunner_MaxRunDuration_UpdateExtendsRunningAlloc(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + task.Driver = "mock_driver" + task.Config = map[string]interface{}{ + "run_for": "10s", + } + task.KillTimeout = 10 * time.Millisecond + + initialMaxRunDuration := 75 * time.Millisecond + alloc.Job.TaskGroups[0].MaxRunDuration = &initialMaxRunDuration + + conf, cleanup := testAllocRunnerConfig(t, alloc) + defer cleanup() + + arIface, err := NewAllocRunner(conf) + must.NoError(t, err) + ar := arIface.(*allocRunner) + + go ar.Run() + defer destroy(ar) + + testutil.WaitForResult(func() (bool, error) { + state := ar.AllocState() + if state == nil { + return false, fmt.Errorf("no alloc state") + } + if state.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusRunning) + } + return true, nil + }, func(err error) { + state := ar.AllocState() + t.Fatalf("timed out waiting for alloc runner to start: %v; state=%#v", err, state) + }) + + time.Sleep(40 * time.Millisecond) + + updatedAlloc := ar.Alloc().Copy() + updatedAlloc.AllocModifyIndex++ + updatedMaxRunDuration := 200 * time.Millisecond + updatedAlloc.Job.TaskGroups[0].MaxRunDuration = &updatedMaxRunDuration + ar.Update(updatedAlloc) + + time.Sleep(60 * time.Millisecond) + + state := ar.AllocState() + must.NotNil(t, state) + must.False(t, state.MaxRunDurationExceeded) + must.Eq(t, structs.AllocClientStatusRunning, state.ClientStatus) + + testutil.WaitForResult(func() (bool, error) { + state := ar.AllocState() + if state == nil { + return false, fmt.Errorf("no alloc state") + } + if state.ClientStatus != structs.AllocClientStatusComplete { + return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete) + } + if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration { + return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration) + } + if !state.MaxRunDurationExceeded { + return false, fmt.Errorf("max run duration was not marked exceeded") + } + return true, nil + }, func(err error) { + state := ar.AllocState() + t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement after update: %v; state=%#v", err, state) + }) +} + func TestAllocRunner_setHookStatsHandler(t *testing.T) { ci.Parallel(t) diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go index bf71e20d9f8..827231b92f4 100644 --- a/client/allocrunner/max_run_duration_hook.go +++ b/client/allocrunner/max_run_duration_hook.go @@ -83,7 +83,7 @@ func (h *maxRunDurationHook) Shutdown() { } func (h *maxRunDurationHook) resetTimer() { - maxRunDuration, ok := h.currentMaxRunDuration() + deadline, maxRunDuration, ok := h.currentDeadline() if !ok { h.stopTimer() h.deadline = time.Time{} @@ -92,7 +92,7 @@ func (h *maxRunDurationHook) resetTimer() { return } - if h.hasMaxRunDuration && h.maxRunDuration == maxRunDuration && !h.deadline.IsZero() { + if h.hasMaxRunDuration && h.maxRunDuration == maxRunDuration && h.deadline.Equal(deadline) { return } @@ -100,9 +100,8 @@ func (h *maxRunDurationHook) resetTimer() { h.maxRunDuration = maxRunDuration h.hasMaxRunDuration = true - h.deadline = time.Now().Add(maxRunDuration) + h.deadline = deadline - deadline := h.deadline remaining := time.Until(deadline) if remaining <= 0 { @@ -146,14 +145,24 @@ func (h *maxRunDurationHook) stopTimer() { h.timer = nil } -func (h *maxRunDurationHook) currentMaxRunDuration() (time.Duration, bool) { +func (h *maxRunDurationHook) currentDeadline() (time.Time, time.Duration, bool) { if h.alloc.TerminalStatus() { - return 0, false + return time.Time{}, 0, false } if h.alloc.DesiredStatus != "" && h.alloc.DesiredStatus != structs.AllocDesiredStatusRun { - return 0, false + return time.Time{}, 0, false } - return h.alloc.MaxRunDuration() + maxRunDuration, ok := h.alloc.MaxRunDuration() + if !ok { + return time.Time{}, 0, false + } + + deadline, ok := h.alloc.MaxRunDurationDeadline() + if !ok { + return time.Time{}, 0, false + } + + return deadline, maxRunDuration, true } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index ff008e91258..3b4c6211e46 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -3380,6 +3380,91 @@ func TestServiceSched_JobModify_InPlace08(t *testing.T) { must.NotNil(t, newAlloc.AllocatedResources) } +func TestServiceSched_JobModify_MaxRunDuration_InPlace(t *testing.T) { + ci.Parallel(t) + + h := tests.NewHarness(t) + + // Create a node + node := mock.Node() + must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node)) + + // Create a batch job with a running allocation + job := mock.BatchJob() + job.TaskGroups[0].Count = 1 + initialMaxRunDuration := 1 * time.Hour + job.TaskGroups[0].MaxRunDuration = &initialMaxRunDuration + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job)) + + alloc := mock.BatchAlloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = node.ID + alloc.Name = fmt.Sprintf("%s.%s[%d]", job.ID, job.TaskGroups[0].Name, 0) + must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc})) + + // Update only max_run_duration + job2 := job.Copy() + updatedMaxRunDuration := 4 * time.Hour + job2.TaskGroups[0].MaxRunDuration = &updatedMaxRunDuration + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2)) + + // Create a mock evaluation + eval := &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: 50, + TriggeredBy: structs.EvalTriggerJobRegister, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval})) + + // Process the evaluation + err := h.Process(NewBatchScheduler, eval) + must.NoError(t, err) + + // Ensure a single plan + must.SliceLen(t, 1, h.Plans) + plan := h.Plans[0] + + // Ensure the plan did not evict any allocs + var update []*structs.Allocation + for _, updateList := range plan.NodeUpdate { + update = append(update, updateList...) + } + must.SliceLen(t, 0, update) + + // Ensure the plan updated the existing alloc in place + var planned []*structs.Allocation + for _, allocList := range plan.NodeAllocation { + planned = append(planned, allocList...) + } + must.SliceLen(t, 1, planned) + must.Nil(t, planned[0].Job) + must.NotNil(t, plan.Annotations) + must.NotNil(t, plan.Annotations.DesiredTGUpdates) + must.NotNil(t, plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name]) + must.Eq(t, uint64(1), plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name].InPlaceUpdate) + must.Eq(t, uint64(0), plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name].DestructiveUpdate) + must.Eq(t, uint64(0), plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name].Place) + must.Eq(t, uint64(0), plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name].Stop) + + // Lookup the allocations by JobID + ws := memdb.NewWatchSet() + out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) + must.NoError(t, err) + must.Len(t, 1, out) + + updatedAlloc := out[0] + maxRunDuration, ok := updatedAlloc.MaxRunDuration() + must.True(t, ok) + must.Eq(t, updatedMaxRunDuration, maxRunDuration) + must.Greater(t, updatedAlloc.AllocModifyIndex, alloc.AllocModifyIndex) + + h.AssertEvalStatus(t, structs.EvalStatusComplete) +} + func TestServiceSched_JobModify_DistinctProperty(t *testing.T) { ci.Parallel(t) diff --git a/scheduler/util.go b/scheduler/util.go index 0af7dd72de9..b8b972ede73 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -844,6 +844,22 @@ func genericAllocUpdateFn(ctx feasible.Context, stack feasible.Stack, evalID str return false, true, nil } + // max_run_duration-only updates. This field does not affect placement + // or allocated resources, so we can update the alloc in place without + // re-running feasibility. + if existingTG := existing.Job.LookupTaskGroup(newTG.Name); existingTG != nil { + oldMax, oldOK := existing.MaxRunDuration() + newAlloc := existing.Copy() + newAlloc.EvalID = evalID + newAlloc.Job = nil + newAlloc.Resources = nil + + newMax, newOK := newAlloc.MaxRunDuration() + if oldOK != newOK || oldMax != newMax { + return false, false, newAlloc + } + } + // Set the existing node as the base set stack.SetNodes([]*structs.Node{node}) From 53f621064971686ea00d1a1fd260f2167c2e9616 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:03:58 +0200 Subject: [PATCH 31/37] metrics --- client/allocrunner/alloc_runner_hooks.go | 2 +- client/allocrunner/max_run_duration_hook.go | 31 +++++-- .../allocrunner/max_run_duration_hook_test.go | 82 +++++++++++++++++-- 3 files changed, 102 insertions(+), 13 deletions(-) diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index 51a14368599..f17d9c03c43 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -111,7 +111,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { ar.runnerHooks = []interfaces.RunnerHook{ newIdentityHook(hookLogger, ar.widmgr), newAllocDirHook(hookLogger, ar.allocDir), - newMaxRunDurationHook(hookLogger, alloc, ar.EnforceMaxRunDurationTimeout), + newMaxRunDurationHook(hookLogger, alloc, ar.clientBaseLabels, ar.EnforceMaxRunDurationTimeout), newConsulHook(consulHookConfig{ alloc: ar.alloc, allocdir: ar.allocDir, diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go index 827231b92f4..2726dbfea4e 100644 --- a/client/allocrunner/max_run_duration_hook.go +++ b/client/allocrunner/max_run_duration_hook.go @@ -8,6 +8,7 @@ import ( "time" "github.com/hashicorp/go-hclog" + metrics "github.com/hashicorp/go-metrics/compat" "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/nomad/structs" @@ -30,19 +31,22 @@ type maxRunDurationHook struct { maxRunDuration time.Duration hasMaxRunDuration bool - onTimeout func(time.Time) - logger hclog.Logger + onTimeout func(time.Time) + logger hclog.Logger + baseLabels []metrics.Label } func newMaxRunDurationHook( logger hclog.Logger, alloc *structs.Allocation, + baseLabels []metrics.Label, onTimeout func(time.Time), ) interfaces.RunnerHook { return &maxRunDurationHook{ - alloc: alloc, - onTimeout: onTimeout, - logger: logger.Named("max_run_duration"), + alloc: alloc, + onTimeout: onTimeout, + logger: logger.Named("max_run_duration"), + baseLabels: baseLabels, } } @@ -101,6 +105,7 @@ func (h *maxRunDurationHook) resetTimer() { h.maxRunDuration = maxRunDuration h.hasMaxRunDuration = true h.deadline = deadline + h.emitMetrics(maxRunDuration, deadline) remaining := time.Until(deadline) @@ -145,6 +150,22 @@ func (h *maxRunDurationHook) stopTimer() { h.timer = nil } +func (h *maxRunDurationHook) emitMetrics(maxRunDuration time.Duration, deadline time.Time) { + labels := h.baseLabels + labels = append(labels, metrics.Label{Name: "task_group", Value: h.alloc.TaskGroup}) + + metrics.SetGaugeWithLabels( + []string{"client", "allocs", "max_run_duration", "configured_seconds"}, + float32(maxRunDuration.Seconds()), + labels, + ) + metrics.SetGaugeWithLabels( + []string{"client", "allocs", "max_run_duration", "remaining_seconds"}, + float32(time.Until(deadline).Seconds()), + labels, + ) +} + func (h *maxRunDurationHook) currentDeadline() (time.Time, time.Duration, bool) { if h.alloc.TerminalStatus() { return time.Time{}, 0, false diff --git a/client/allocrunner/max_run_duration_hook_test.go b/client/allocrunner/max_run_duration_hook_test.go index 0002b3ccd3c..57606255224 100644 --- a/client/allocrunner/max_run_duration_hook_test.go +++ b/client/allocrunner/max_run_duration_hook_test.go @@ -8,6 +8,7 @@ import ( "time" log "github.com/hashicorp/go-hclog" + metrics "github.com/hashicorp/go-metrics/compat" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/taskenv" @@ -25,9 +26,10 @@ func newTestMaxRunDurationHookCallback() (func(time.Time), chan time.Time) { func newTestMaxRunDurationHook( alloc *structs.Allocation, + baseLabels []metrics.Label, onTimeout func(time.Time), ) *maxRunDurationHook { - hook := newMaxRunDurationHook(log.NewNullLogger(), alloc, onTimeout) + hook := newMaxRunDurationHook(log.NewNullLogger(), alloc, baseLabels, onTimeout) h, ok := hook.(*maxRunDurationHook) if !ok { @@ -46,7 +48,7 @@ func TestMaxRunDurationHook_Prerun_ArmsTimerImmediately(t *testing.T) { alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration onTimeout, deadlines := newTestMaxRunDurationHookCallback() - hook := newTestMaxRunDurationHook(alloc, onTimeout) + hook := newTestMaxRunDurationHook(alloc, nil, onTimeout) err := hook.Prerun((*taskenv.TaskEnv)(nil)) must.NoError(t, err) @@ -68,7 +70,7 @@ func TestMaxRunDurationHook_Update_DoesNotExtendDeadlineOnUnrelatedAllocChange(t alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration onTimeout, deadlines := newTestMaxRunDurationHookCallback() - hook := newTestMaxRunDurationHook(alloc, onTimeout) + hook := newTestMaxRunDurationHook(alloc, nil, onTimeout) err := hook.Prerun((*taskenv.TaskEnv)(nil)) must.NoError(t, err) @@ -97,7 +99,7 @@ func TestMaxRunDurationHook_Update_RearmsOnDurationChange(t *testing.T) { alloc.Job.TaskGroups[0].MaxRunDuration = &initial onTimeout, deadlines := newTestMaxRunDurationHookCallback() - hook := newTestMaxRunDurationHook(alloc, onTimeout) + hook := newTestMaxRunDurationHook(alloc, nil, onTimeout) err := hook.Prerun((*taskenv.TaskEnv)(nil)) must.NoError(t, err) @@ -172,7 +174,7 @@ func TestMaxRunDurationHook_DoesNotFireWhenAllocNotEligible(t *testing.T) { t.Parallel() onTimeout, deadlines := newTestMaxRunDurationHookCallback() - hook := newTestMaxRunDurationHook(tc.alloc, onTimeout) + hook := newTestMaxRunDurationHook(tc.alloc, nil, onTimeout) err := hook.Prerun((*taskenv.TaskEnv)(nil)) must.NoError(t, err) @@ -195,7 +197,7 @@ func TestMaxRunDurationHook_Postrun_CancelsTimer(t *testing.T) { alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration onTimeout, deadlines := newTestMaxRunDurationHookCallback() - hook := newTestMaxRunDurationHook(alloc, onTimeout) + hook := newTestMaxRunDurationHook(alloc, nil, onTimeout) err := hook.Prerun((*taskenv.TaskEnv)(nil)) must.NoError(t, err) @@ -219,7 +221,7 @@ func TestMaxRunDurationHook_Shutdown_CancelsTimer(t *testing.T) { alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration onTimeout, deadlines := newTestMaxRunDurationHookCallback() - hook := newTestMaxRunDurationHook(alloc, onTimeout) + hook := newTestMaxRunDurationHook(alloc, nil, onTimeout) err := hook.Prerun((*taskenv.TaskEnv)(nil)) must.NoError(t, err) @@ -232,3 +234,69 @@ func TestMaxRunDurationHook_Shutdown_CancelsTimer(t *testing.T) { case <-time.After(250 * time.Millisecond): } } + +func TestMaxRunDurationHook_EmitMetrics(t *testing.T) { + ci.Parallel(t) + + inMemorySink := metrics.NewInmemSink(10*time.Millisecond, 50*time.Millisecond) + _, err := metrics.NewGlobal(metrics.DefaultConfig("nomad_test"), inMemorySink) + must.NoError(t, err) + + alloc := mock.BatchAlloc() + maxRunDuration := 2 * time.Minute + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + baseLabels := []metrics.Label{ + {Name: "node_id", Value: "node-123"}, + } + + onTimeout, _ := newTestMaxRunDurationHookCallback() + hook := newTestMaxRunDurationHook(alloc, baseLabels, onTimeout) + + err = hook.Prerun((*taskenv.TaskEnv)(nil)) + must.NoError(t, err) + + data := inMemorySink.Data() + + var configuredFound bool + for _, interval := range data { + for _, gauge := range interval.Gauges { + if gauge.Name != "nomad_test.client.allocs.max_run_duration.configured_seconds" { + continue + } + + labels := make(map[string]string, len(gauge.Labels)) + for _, label := range gauge.Labels { + labels[label.Name] = label.Value + } + + if labels["node_id"] == "node-123" && labels["task_group"] == alloc.TaskGroup { + must.Eq(t, float32(maxRunDuration.Seconds()), gauge.Value) + configuredFound = true + } + } + } + must.True(t, configuredFound) + + var remainingFound bool + for _, interval := range data { + for _, gauge := range interval.Gauges { + if gauge.Name != "nomad_test.client.allocs.max_run_duration.remaining_seconds" { + continue + } + + labels := make(map[string]string, len(gauge.Labels)) + for _, label := range gauge.Labels { + labels[label.Name] = label.Value + } + + if labels["node_id"] == "node-123" && labels["task_group"] == alloc.TaskGroup { + must.Positive(t, gauge.Value) + must.LessEq(t, gauge.Value, float32(maxRunDuration.Seconds())) + remainingFound = true + } + } + } + must.True(t, remainingFound) +} From 7841eda5c06917b293f8e2d374336116def3bbe5 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:04:52 +0200 Subject: [PATCH 32/37] better logs --- client/allocrunner/max_run_duration_hook.go | 29 +++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go index 2726dbfea4e..64357156026 100644 --- a/client/allocrunner/max_run_duration_hook.go +++ b/client/allocrunner/max_run_duration_hook.go @@ -100,6 +100,10 @@ func (h *maxRunDurationHook) resetTimer() { return } + prevMaxRunDuration := h.maxRunDuration + prevDeadline := h.deadline + hadMaxRunDuration := h.hasMaxRunDuration + h.stopTimer() h.maxRunDuration = maxRunDuration @@ -109,8 +113,24 @@ func (h *maxRunDurationHook) resetTimer() { remaining := time.Until(deadline) + if hadMaxRunDuration { + h.logger.Debug("updated max_run_duration", + "task_group", h.alloc.TaskGroup, + "old_configured", prevMaxRunDuration, + "new_configured", maxRunDuration, + "old_deadline", prevDeadline, + "new_deadline", deadline, + "remaining", remaining, + ) + } + if remaining <= 0 { - h.logger.Debug("allocation exceeded max_run_duration, enforcing immediately", "deadline", deadline) + h.logger.Debug("allocation exceeded max_run_duration, enforcing immediately", + "task_group", h.alloc.TaskGroup, + "configured", maxRunDuration, + "remaining", remaining, + "deadline", deadline, + ) go h.onTimeout(deadline) return } @@ -118,7 +138,12 @@ func (h *maxRunDurationHook) resetTimer() { timer := time.NewTimer(remaining) h.timer = timer - h.logger.Trace("armed max_run_duration timer", "deadline", deadline, "remaining", remaining) + h.logger.Trace("armed max_run_duration timer", + "task_group", h.alloc.TaskGroup, + "configured", maxRunDuration, + "remaining", remaining, + "deadline", deadline, + ) go func(t *time.Timer, deadline time.Time) { <-t.C From ce45ae80436547284042f78cd993b99696e32b8d Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:17:53 +0200 Subject: [PATCH 33/37] fix clock arming bug --- client/allocrunner/max_run_duration_hook.go | 7 ++--- .../allocrunner/max_run_duration_hook_test.go | 30 +++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go index 64357156026..7369e01571e 100644 --- a/client/allocrunner/max_run_duration_hook.go +++ b/client/allocrunner/max_run_duration_hook.go @@ -205,10 +205,9 @@ func (h *maxRunDurationHook) currentDeadline() (time.Time, time.Duration, bool) return time.Time{}, 0, false } - deadline, ok := h.alloc.MaxRunDurationDeadline() - if !ok { - return time.Time{}, 0, false + if deadline, ok := h.alloc.MaxRunDurationDeadline(); ok { + return deadline, maxRunDuration, true } - return deadline, maxRunDuration, true + return time.Now().Add(maxRunDuration), maxRunDuration, true } diff --git a/client/allocrunner/max_run_duration_hook_test.go b/client/allocrunner/max_run_duration_hook_test.go index 57606255224..daa4ae0e298 100644 --- a/client/allocrunner/max_run_duration_hook_test.go +++ b/client/allocrunner/max_run_duration_hook_test.go @@ -235,6 +235,36 @@ func TestMaxRunDurationHook_Shutdown_CancelsTimer(t *testing.T) { } } +func TestMaxRunDurationHook_Prerun_ArmsTimerBeforeTasksAreFullyRunning(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + maxRunDuration := 50 * time.Millisecond + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + // Simulate the initial prerun state where tasks have not yet started and + // therefore no StartedAt timestamps are available to compute a fully-running + // deadline from task state. + for _, ts := range alloc.TaskStates { + ts.State = structs.TaskStatePending + ts.StartedAt = time.Time{} + } + + onTimeout, deadlines := newTestMaxRunDurationHookCallback() + hook := newTestMaxRunDurationHook(alloc, nil, onTimeout) + + err := hook.Prerun((*taskenv.TaskEnv)(nil)) + must.NoError(t, err) + + select { + case deadline := <-deadlines: + must.False(t, deadline.IsZero()) + case <-time.After(500 * time.Millisecond): + t.Fatal("timed out waiting for max_run_duration deadline before tasks were fully running") + } +} + func TestMaxRunDurationHook_EmitMetrics(t *testing.T) { ci.Parallel(t) From 638056127e329af7cbb41715b339998651836992 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:28:06 +0200 Subject: [PATCH 34/37] don't start poststop tasks if the timeout has passed --- client/allocrunner/alloc_runner.go | 17 +++++ client/allocrunner/alloc_runner_test.go | 89 +++++++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index eaae5d9de0b..e289ea27c65 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -815,6 +815,23 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState { } wg.Wait() + // Skip poststop tasks entirely when max_run_duration has been exceeded so + // they are not started after the allocation has timed out. + if ar.state.MaxRunDurationExceeded { + for name, tr := range ar.tasks { + if !tr.IsPoststopTask() { + continue + } + + state := tr.TaskState() + if state != nil { + states[name] = state + } + } + + return states + } + // Perform no action on post stop tasks, but retain their states if they exist. This // commonly happens at the time of alloc GC from the client node. for name, tr := range ar.tasks { diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index ee965ab2853..01401358998 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -497,6 +497,95 @@ func TestAllocRunner_Lifecycle_Poststop(t *testing.T) { } +func TestAllocRunner_MaxRunDuration_SkipsPoststopTasks(t *testing.T) { + ci.Parallel(t) + + alloc := mock.LifecycleAlloc() + tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] + + alloc.Job.Type = structs.JobTypeBatch + maxRunDuration := 50 * time.Millisecond + alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration + + mainTask := alloc.Job.TaskGroups[0].Tasks[0] + mainTask.Config["run_for"] = "100s" + mainTask.KillTimeout = 10 * time.Millisecond + + poststopTask := alloc.Job.TaskGroups[0].Tasks[1] + poststopTask.Name = "poststop" + poststopTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststop + poststopTask.Config["run_for"] = "10s" + + alloc.Job.TaskGroups[0].Tasks = []*structs.Task{mainTask, poststopTask} + alloc.AllocatedResources.Tasks = map[string]*structs.AllocatedTaskResources{ + mainTask.Name: tr, + poststopTask.Name: tr, + } + + conf, cleanup := testAllocRunnerConfig(t, alloc) + defer cleanup() + + arIface, err := NewAllocRunner(conf) + must.NoError(t, err) + ar := arIface.(*allocRunner) + + go ar.Run() + defer destroy(ar) + + upd := conf.StateUpdater.(*MockStateUpdater) + + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + if last == nil { + return false, fmt.Errorf("no updates") + } + + if last.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus) + } + + if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateRunning { + return false, fmt.Errorf("expected main task to be running not %s", s) + } + + if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStatePending { + return false, fmt.Errorf("expected poststop task to be pending not %s", s) + } + + return true, nil + }, func(err error) { + t.Fatalf("error waiting for initial state:\n%v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + if last == nil { + return false, fmt.Errorf("no updates") + } + + if last.ClientStatus != structs.AllocClientStatusComplete { + return false, fmt.Errorf("expected alloc to be complete not %s", last.ClientStatus) + } + + if last.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration { + return false, fmt.Errorf("expected alloc description %q not %q", structs.AllocTimeoutReasonMaxRunDuration, last.ClientDescription) + } + + if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateDead { + return false, fmt.Errorf("expected main task to be dead not %s", s) + } + + if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStatePending { + return false, fmt.Errorf("expected poststop task to remain pending not %s", s) + } + + return true, nil + }, func(err error) { + last := upd.Last() + t.Fatalf("error waiting for max_run_duration state:\n%v\nlast=%#v", err, last) + }) +} + func TestAllocRunner_Lifecycle_Restart(t *testing.T) { ci.Parallel(t) From 879219a6559352ff501437f0a795e81aa9a93766 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:48:08 +0200 Subject: [PATCH 35/37] TestMaxRunDurationHook_EmitMetrics correction --- .../allocrunner/max_run_duration_hook_test.go | 54 ++++++------------- 1 file changed, 16 insertions(+), 38 deletions(-) diff --git a/client/allocrunner/max_run_duration_hook_test.go b/client/allocrunner/max_run_duration_hook_test.go index daa4ae0e298..18930fb9019 100644 --- a/client/allocrunner/max_run_duration_hook_test.go +++ b/client/allocrunner/max_run_duration_hook_test.go @@ -287,46 +287,24 @@ func TestMaxRunDurationHook_EmitMetrics(t *testing.T) { err = hook.Prerun((*taskenv.TaskEnv)(nil)) must.NoError(t, err) - data := inMemorySink.Data() - - var configuredFound bool - for _, interval := range data { - for _, gauge := range interval.Gauges { - if gauge.Name != "nomad_test.client.allocs.max_run_duration.configured_seconds" { - continue - } - - labels := make(map[string]string, len(gauge.Labels)) - for _, label := range gauge.Labels { - labels[label.Name] = label.Value - } - - if labels["node_id"] == "node-123" && labels["task_group"] == alloc.TaskGroup { - must.Eq(t, float32(maxRunDuration.Seconds()), gauge.Value) - configuredFound = true - } - } + var metricKeySuffix string + for _, label := range baseLabels { + metricKeySuffix += ";" + label.Name + "=" + label.Value } - must.True(t, configuredFound) + metricKeySuffix += ";task_group=" + alloc.TaskGroup - var remainingFound bool - for _, interval := range data { - for _, gauge := range interval.Gauges { - if gauge.Name != "nomad_test.client.allocs.max_run_duration.remaining_seconds" { - continue - } + configuredName := "nomad_test.client.allocs.max_run_duration.configured_seconds" + metricKeySuffix + remainingName := "nomad_test.client.allocs.max_run_duration.remaining_seconds" + metricKeySuffix - labels := make(map[string]string, len(gauge.Labels)) - for _, label := range gauge.Labels { - labels[label.Name] = label.Value - } + data := inMemorySink.Data() + must.Len(t, 1, data) + must.MapContainsKey(t, data[0].Gauges, configuredName) + must.MapContainsKey(t, data[0].Gauges, remainingName) - if labels["node_id"] == "node-123" && labels["task_group"] == alloc.TaskGroup { - must.Positive(t, gauge.Value) - must.LessEq(t, gauge.Value, float32(maxRunDuration.Seconds())) - remainingFound = true - } - } - } - must.True(t, remainingFound) + configuredGauge := data[0].Gauges[configuredName] + must.Eq(t, float32(maxRunDuration.Seconds()), configuredGauge.Value) + + remainingGauge := data[0].Gauges[remainingName] + must.Positive(t, remainingGauge.Value) + must.LessEq(t, remainingGauge.Value, float32(maxRunDuration.Seconds())) } From 6a6e6a6d80b50583aa6c6d4221c76917a9a8fa87 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:57:17 +0200 Subject: [PATCH 36/37] TestServiceSched_JobModify_MaxRunDuration_InPlace fix --- scheduler/generic_sched_test.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 3b4c6211e46..5c2351b6aff 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -3411,12 +3411,13 @@ func TestServiceSched_JobModify_MaxRunDuration_InPlace(t *testing.T) { // Create a mock evaluation eval := &structs.Evaluation{ - Namespace: structs.DefaultNamespace, - ID: uuid.Generate(), - Priority: 50, - TriggeredBy: structs.EvalTriggerJobRegister, - JobID: job.ID, - Status: structs.EvalStatusPending, + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: 50, + TriggeredBy: structs.EvalTriggerJobRegister, + JobID: job.ID, + Status: structs.EvalStatusPending, + AnnotatePlan: true, } must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval})) @@ -3441,7 +3442,7 @@ func TestServiceSched_JobModify_MaxRunDuration_InPlace(t *testing.T) { planned = append(planned, allocList...) } must.SliceLen(t, 1, planned) - must.Nil(t, planned[0].Job) + must.Eq(t, job2, planned[0].Job) must.NotNil(t, plan.Annotations) must.NotNil(t, plan.Annotations.DesiredTGUpdates) must.NotNil(t, plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name]) @@ -3460,7 +3461,8 @@ func TestServiceSched_JobModify_MaxRunDuration_InPlace(t *testing.T) { maxRunDuration, ok := updatedAlloc.MaxRunDuration() must.True(t, ok) must.Eq(t, updatedMaxRunDuration, maxRunDuration) - must.Greater(t, updatedAlloc.AllocModifyIndex, alloc.AllocModifyIndex) + must.Eq(t, alloc.ID, planned[0].ID) + must.Eq(t, updatedAlloc.ID, planned[0].ID) h.AssertEvalStatus(t, structs.EvalStatusComplete) } From e34dbc9dbbd292aaeeb5e3020acc74b60d529e03 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Fri, 17 Apr 2026 17:22:11 +0200 Subject: [PATCH 37/37] yet another fix to TestMaxRunDurationHook_EmitMetrics --- .../allocrunner/max_run_duration_hook_test.go | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/client/allocrunner/max_run_duration_hook_test.go b/client/allocrunner/max_run_duration_hook_test.go index 18930fb9019..e24a2489179 100644 --- a/client/allocrunner/max_run_duration_hook_test.go +++ b/client/allocrunner/max_run_duration_hook_test.go @@ -4,6 +4,7 @@ package allocrunner import ( + "strings" "testing" "time" @@ -287,24 +288,28 @@ func TestMaxRunDurationHook_EmitMetrics(t *testing.T) { err = hook.Prerun((*taskenv.TaskEnv)(nil)) must.NoError(t, err) - var metricKeySuffix string - for _, label := range baseLabels { - metricKeySuffix += ";" + label.Name + "=" + label.Value - } - metricKeySuffix += ";task_group=" + alloc.TaskGroup - - configuredName := "nomad_test.client.allocs.max_run_duration.configured_seconds" + metricKeySuffix - remainingName := "nomad_test.client.allocs.max_run_duration.remaining_seconds" + metricKeySuffix - data := inMemorySink.Data() must.Len(t, 1, data) - must.MapContainsKey(t, data[0].Gauges, configuredName) - must.MapContainsKey(t, data[0].Gauges, remainingName) - configuredGauge := data[0].Gauges[configuredName] - must.Eq(t, float32(maxRunDuration.Seconds()), configuredGauge.Value) + configuredSuffix := "client.allocs.max_run_duration.configured_seconds;node_id=node-123;task_group=" + alloc.TaskGroup + remainingSuffix := "client.allocs.max_run_duration.remaining_seconds;node_id=node-123;task_group=" + alloc.TaskGroup + + var configuredFound bool + var remainingFound bool + + for key, gauge := range data[0].Gauges { + if strings.HasSuffix(key, configuredSuffix) { + must.Eq(t, float32(maxRunDuration.Seconds()), gauge.Value) + configuredFound = true + } + + if strings.HasSuffix(key, remainingSuffix) { + must.Positive(t, gauge.Value) + must.True(t, gauge.Value <= float32(maxRunDuration.Seconds())) + remainingFound = true + } + } - remainingGauge := data[0].Gauges[remainingName] - must.Positive(t, remainingGauge.Value) - must.LessEq(t, remainingGauge.Value, float32(maxRunDuration.Seconds())) + must.True(t, configuredFound) + must.True(t, remainingFound) }