From 605395dee953f30233953a88d35f03005628f319 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:26:09 +0200
Subject: [PATCH 01/37] core: support batch job timeouts via new field
 max_run_duration

---
 api/tasks.go                       |   1 +
 nomad/batch_timeout_shims.go       |  20 +++
 nomad/batchtimeout/watcher.go      | 170 ++++++++++++++++++
 nomad/batchtimeout/watcher_test.go | 275 +++++++++++++++++++++++++++++
 nomad/leader.go                    |   3 +
 nomad/server.go                    |  13 ++
 nomad/structs/structs.go           |  17 ++
 nomad/structs/structs_test.go      |  75 ++++++++
 8 files changed, 574 insertions(+)
 create mode 100644 nomad/batch_timeout_shims.go
 create mode 100644 nomad/batchtimeout/watcher.go
 create mode 100644 nomad/batchtimeout/watcher_test.go

diff --git a/api/tasks.go b/api/tasks.go
index a0624df90dc..a2aebd8a76c 100644
--- a/api/tasks.go
+++ b/api/tasks.go
@@ -507,6 +507,7 @@ type TaskGroup struct {
 	Meta             map[string]string         `hcl:"meta,block"`
 	Services         []*Service                `hcl:"service,block"`
 	ShutdownDelay    *time.Duration            `mapstructure:"shutdown_delay" hcl:"shutdown_delay,optional"`
+	MaxRunDuration   *time.Duration            `mapstructure:"max_run_duration" hcl:"max_run_duration,optional"`
 	// Deprecated: StopAfterClientDisconnect is deprecated in Nomad 1.8 and ignored in Nomad 1.10. Use Disconnect.StopOnClientAfter.
 	StopAfterClientDisconnect *time.Duration `mapstructure:"stop_after_client_disconnect" hcl:"stop_after_client_disconnect,optional"`
 	// Deprecated: MaxClientDisconnect is deprecated in Nomad 1.8.0 and ignored in Nomad 1.10. Use Disconnect.LostAfter.
diff --git a/nomad/batch_timeout_shims.go b/nomad/batch_timeout_shims.go
new file mode 100644
index 00000000000..57f1709a32c
--- /dev/null
+++ b/nomad/batch_timeout_shims.go
@@ -0,0 +1,20 @@
+// Copyright IBM Corp. 2015, 2025
+// SPDX-License-Identifier: BUSL-1.1
+
+package nomad
+
+import (
+	"github.com/hashicorp/nomad/nomad/batchtimeout"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+type batchTimeoutRaftShim struct {
+	s *Server
+}
+
+var _ batchtimeout.RaftApplier = batchTimeoutRaftShim{}
+
+func (b batchTimeoutRaftShim) UpdateAllocDesiredTransition(req *structs.AllocUpdateDesiredTransitionRequest) (uint64, error) {
+	_, index, err := b.s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, req)
+	return index, err
+}
diff --git a/nomad/batchtimeout/watcher.go b/nomad/batchtimeout/watcher.go
new file mode 100644
index 00000000000..e6aa7ac7c7d
--- /dev/null
+++ b/nomad/batchtimeout/watcher.go
@@ -0,0 +1,170 @@
+// Copyright IBM Corp. 2015, 2025
+// SPDX-License-Identifier: BUSL-1.1
+
+package batchtimeout
+
+import (
+	"time"
+
+	log "github.com/hashicorp/go-hclog"
+
+	"github.com/hashicorp/nomad/helper/pointer"
+	"github.com/hashicorp/nomad/nomad/state"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+const (
+	defaultScanInterval = 5 * time.Second
+
+	timeoutDescription = "allocation exceeded max_run_duration"
+)
+
+type RaftApplier interface {
+	UpdateAllocDesiredTransition(req *structs.AllocUpdateDesiredTransitionRequest) (uint64, error)
+}
+
+type Watcher struct {
+	logger       log.Logger
+	raft         RaftApplier
+	state        *state.StateStore
+	scanInterval time.Duration
+	enabled      bool
+}
+
+func NewWatcher(logger log.Logger, raft RaftApplier, scanInterval time.Duration) *Watcher {
+	if scanInterval <= 0 {
+		scanInterval = defaultScanInterval
+	}
+
+	return &Watcher{
+		logger:       logger.Named("batch_timeout_watcher"),
+		raft:         raft,
+		scanInterval: scanInterval,
+	}
+}
+
+func (w *Watcher) SetEnabled(enabled bool, st *state.StateStore) {
+	w.enabled = enabled
+	if st != nil {
+		w.state = st
+	}
+
+	if enabled && w.state != nil {
+		go w.watch()
+	}
+}
+
+func (w *Watcher) watch() {
+	ticker := time.NewTicker(w.scanInterval)
+	defer ticker.Stop()
+
+	for {
+		if !w.enabled {
+			return
+		}
+
+		w.scan(time.Now().UTC())
+
+		<-ticker.C
+	}
+}
+
+func (w *Watcher) scan(now time.Time) {
+	if w.state == nil {
+		return
+	}
+
+	iter, err := w.state.Allocs(nil, state.SortDefault)
+	if err != nil {
+		w.logger.Warn("failed to iterate allocations", "error", err)
+		return
+	}
+
+	transitions := make(map[string]*structs.DesiredTransition)
+
+	for {
+		raw := iter.Next()
+		if raw == nil {
+			break
+		}
+
+		alloc := raw.(*structs.Allocation)
+		if !shouldStopAlloc(now, alloc) {
+			continue
+		}
+
+		transitions[alloc.ID] = &structs.DesiredTransition{
+			NoShutdownDelay: pointer.Of(false),
+		}
+	}
+
+	if len(transitions) == 0 {
+		return
+	}
+
+	req := &structs.AllocUpdateDesiredTransitionRequest{
+		Allocs: transitions,
+	}
+
+	if _, err := w.raft.UpdateAllocDesiredTransition(req); err != nil {
+		w.logger.Warn("failed to stop timed out allocations", "error", err, "count", len(transitions))
+		return
+	}
+
+	w.logger.Debug("stopped timed out allocations", "count", len(transitions))
+}
+
+func shouldStopAlloc(now time.Time, alloc *structs.Allocation) bool {
+	if alloc == nil || alloc.Job == nil {
+		return false
+	}
+
+	if alloc.DesiredStatus != structs.AllocDesiredStatusRun {
+		return false
+	}
+
+	if alloc.ClientStatus != structs.AllocClientStatusRunning {
+		return false
+	}
+
+	if alloc.ClientTerminalStatus() || alloc.ServerTerminalStatus() {
+		return false
+	}
+
+	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
+	if tg == nil || tg.MaxRunDuration == nil || *tg.MaxRunDuration <= 0 {
+		return false
+	}
+
+	switch alloc.Job.Type {
+	case structs.JobTypeBatch, structs.JobTypeSysBatch:
+	default:
+		return false
+	}
+
+	startedAt, ok := allocRunningSince(alloc)
+	if !ok {
+		return false
+	}
+
+	return !startedAt.Add(*tg.MaxRunDuration).After(now)
+}
+
+func allocRunningSince(alloc *structs.Allocation) (time.Time, bool) {
+	var latest time.Time
+
+	for _, ts := range alloc.TaskStates {
+		if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() {
+			return time.Time{}, false
+		}
+		if ts.StartedAt.After(latest) {
+			latest = ts.StartedAt
+		}
+	}
+
+	if latest.IsZero() {
+		return time.Time{}, false
+	}
+
+	return latest, true
+}
diff --git a/nomad/batchtimeout/watcher_test.go b/nomad/batchtimeout/watcher_test.go
new file mode 100644
index 00000000000..673e6898210
--- /dev/null
+++ b/nomad/batchtimeout/watcher_test.go
@@ -0,0 +1,275 @@
+// Copyright IBM Corp. 2015, 2025
+// SPDX-License-Identifier: BUSL-1.1
+
+package batchtimeout
+
+import (
+	"testing"
+	"time"
+
+	"github.com/hashicorp/nomad/helper/pointer"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/shoenig/test/must"
+)
+
+func TestShouldStopAlloc(t *testing.T) {
+	t.Parallel()
+
+	now := time.Now().UTC()
+
+	cases := []struct {
+		name     string
+		allocFn  func() *structs.Allocation
+		expected bool
+	}{
+		{
+			name: "batch alloc times out after max run duration",
+			allocFn: func() *structs.Allocation {
+				alloc := mock.Alloc()
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
+				alloc.ClientStatus = structs.AllocClientStatusRunning
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+				alloc.TaskStates = map[string]*structs.TaskState{
+					"web": {
+						State:     structs.TaskStateRunning,
+						StartedAt: now.Add(-10 * time.Minute),
+					},
+				}
+				return alloc
+			},
+			expected: true,
+		},
+		{
+			name: "sysbatch alloc times out after max run duration",
+			allocFn: func() *structs.Allocation {
+				alloc := mock.SysBatchAlloc()
+				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
+				alloc.ClientStatus = structs.AllocClientStatusRunning
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+				alloc.TaskStates = map[string]*structs.TaskState{
+					"ping-example": {
+						State:     structs.TaskStateRunning,
+						StartedAt: now.Add(-10 * time.Minute),
+					},
+				}
+				return alloc
+			},
+			expected: true,
+		},
+		{
+			name: "alloc without max run duration does not time out",
+			allocFn: func() *structs.Allocation {
+				alloc := mock.Alloc()
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = nil
+				alloc.ClientStatus = structs.AllocClientStatusRunning
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+				alloc.TaskStates = map[string]*structs.TaskState{
+					"web": {
+						State:     structs.TaskStateRunning,
+						StartedAt: now.Add(-10 * time.Minute),
+					},
+				}
+				return alloc
+			},
+			expected: false,
+		},
+		{
+			name: "service alloc does not time out",
+			allocFn: func() *structs.Allocation {
+				alloc := mock.Alloc()
+				alloc.Job.Type = structs.JobTypeService
+				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
+				alloc.ClientStatus = structs.AllocClientStatusRunning
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+				alloc.TaskStates = map[string]*structs.TaskState{
+					"web": {
+						State:     structs.TaskStateRunning,
+						StartedAt: now.Add(-10 * time.Minute),
+					},
+				}
+				return alloc
+			},
+			expected: false,
+		},
+		{
+			name: "pending alloc does not time out",
+			allocFn: func() *structs.Allocation {
+				alloc := mock.Alloc()
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
+				alloc.ClientStatus = structs.AllocClientStatusPending
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+				alloc.TaskStates = map[string]*structs.TaskState{
+					"web": {
+						State:     structs.TaskStateRunning,
+						StartedAt: now.Add(-10 * time.Minute),
+					},
+				}
+				return alloc
+			},
+			expected: false,
+		},
+		{
+			name: "stopping alloc does not time out again",
+			allocFn: func() *structs.Allocation {
+				alloc := mock.Alloc()
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
+				alloc.ClientStatus = structs.AllocClientStatusRunning
+				alloc.DesiredStatus = structs.AllocDesiredStatusStop
+				alloc.TaskStates = map[string]*structs.TaskState{
+					"web": {
+						State:     structs.TaskStateRunning,
+						StartedAt: now.Add(-10 * time.Minute),
+					},
+				}
+				return alloc
+			},
+			expected: false,
+		},
+		{
+			name: "alloc within max run duration does not time out",
+			allocFn: func() *structs.Allocation {
+				alloc := mock.Alloc()
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(15 * time.Minute)
+				alloc.ClientStatus = structs.AllocClientStatusRunning
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+				alloc.TaskStates = map[string]*structs.TaskState{
+					"web": {
+						State:     structs.TaskStateRunning,
+						StartedAt: now.Add(-10 * time.Minute),
+					},
+				}
+				return alloc
+			},
+			expected: false,
+		},
+		{
+			name: "alloc with no running task state does not time out",
+			allocFn: func() *structs.Allocation {
+				alloc := mock.Alloc()
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
+				alloc.ClientStatus = structs.AllocClientStatusRunning
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+				alloc.TaskStates = map[string]*structs.TaskState{
+					"web": {
+						State:     structs.TaskStatePending,
+						StartedAt: now.Add(-10 * time.Minute),
+					},
+				}
+				return alloc
+			},
+			expected: false,
+		},
+		{
+			name: "alloc with zero start time does not time out",
+			allocFn: func() *structs.Allocation {
+				alloc := mock.Alloc()
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
+				alloc.ClientStatus = structs.AllocClientStatusRunning
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+				alloc.TaskStates = map[string]*structs.TaskState{
+					"web": {
+						State: structs.TaskStateRunning,
+					},
+				}
+				return alloc
+			},
+			expected: false,
+		},
+		{
+			name: "alloc with mixed task states does not time out until all tasks are running",
+			allocFn: func() *structs.Allocation {
+				alloc := mock.Alloc()
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, &structs.Task{Name: "sidecar"})
+				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
+				alloc.ClientStatus = structs.AllocClientStatusRunning
+				alloc.DesiredStatus = structs.AllocDesiredStatusRun
+				alloc.TaskStates = map[string]*structs.TaskState{
+					"web": {
+						State:     structs.TaskStateRunning,
+						StartedAt: now.Add(-10 * time.Minute),
+					},
+					"sidecar": {
+						State: structs.TaskStatePending,
+					},
+				}
+				return alloc
+			},
+			expected: false,
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			must.Eq(t, tc.expected, shouldStopAlloc(now, tc.allocFn()))
+		})
+	}
+}
+
+func TestAllocRunningSince(t *testing.T) {
+	t.Parallel()
+
+	now := time.Now().UTC()
+
+	t.Run("returns latest running task start time", func(t *testing.T) {
+		alloc := mock.Alloc()
+		alloc.TaskStates = map[string]*structs.TaskState{
+			"web": {
+				State:     structs.TaskStateRunning,
+				StartedAt: now.Add(-10 * time.Minute),
+			},
+			"sidecar": {
+				State:     structs.TaskStateRunning,
+				StartedAt: now.Add(-5 * time.Minute),
+			},
+		}
+
+		startedAt, ok := allocRunningSince(alloc)
+		must.True(t, ok)
+		must.Eq(t, now.Add(-5*time.Minute), startedAt)
+	})
+
+	t.Run("returns false when task states are missing", func(t *testing.T) {
+		alloc := mock.Alloc()
+		alloc.TaskStates = nil
+
+		_, ok := allocRunningSince(alloc)
+		must.False(t, ok)
+	})
+
+	t.Run("returns false when any task is not running", func(t *testing.T) {
+		alloc := mock.Alloc()
+		alloc.TaskStates = map[string]*structs.TaskState{
+			"web": {
+				State:     structs.TaskStateRunning,
+				StartedAt: now.Add(-10 * time.Minute),
+			},
+			"sidecar": {
+				State: structs.TaskStatePending,
+			},
+		}
+
+		_, ok := allocRunningSince(alloc)
+		must.False(t, ok)
+	})
+
+	t.Run("returns false when any running task has zero start time", func(t *testing.T) {
+		alloc := mock.Alloc()
+		alloc.TaskStates = map[string]*structs.TaskState{
+			"web": {
+				State: structs.TaskStateRunning,
+			},
+		}
+
+		_, ok := allocRunningSince(alloc)
+		must.False(t, ok)
+	})
+}
diff --git a/nomad/leader.go b/nomad/leader.go
index 26a5f1d9521..1855fc2bda4 100644
--- a/nomad/leader.go
+++ b/nomad/leader.go
@@ -412,6 +412,9 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error {
 	// Enable the deployment watcher, since we are now the leader
 	s.deploymentWatcher.SetEnabled(true, s.State())
 
+	// Enable the batch timeout watcher, since we are now the leader
+	s.batchTimeoutWatcher.SetEnabled(true, s.State())
+
 	// Enable the NodeDrainer
 	s.nodeDrainer.SetEnabled(true, s.State())
 
diff --git a/nomad/server.go b/nomad/server.go
index c45047dffcb..ba49483be79 100644
--- a/nomad/server.go
+++ b/nomad/server.go
@@ -43,6 +43,7 @@ import (
 	"github.com/hashicorp/nomad/helper/tlsutil"
 	"github.com/hashicorp/nomad/lib/auth/oidc"
 	"github.com/hashicorp/nomad/nomad/auth"
+	"github.com/hashicorp/nomad/nomad/batchtimeout"
 	"github.com/hashicorp/nomad/nomad/deploymentwatcher"
 	"github.com/hashicorp/nomad/nomad/drainer"
 	"github.com/hashicorp/nomad/nomad/lock"
@@ -229,6 +230,10 @@ type Server struct {
 	// make the required calls to continue to transition the deployment.
 	deploymentWatcher *deploymentwatcher.Watcher
 
+	// batchTimeoutWatcher is used to stop batch and sysbatch allocations that
+	// exceed their configured max_run_duration.
+	batchTimeoutWatcher *batchtimeout.Watcher
+
 	// nodeDrainer is used to drain allocations from nodes.
 	nodeDrainer *drainer.NodeDrainer
 
@@ -527,6 +532,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigFunc
 	// updates when the processes SetEnabled is triggered.
 	go s.evalBroker.enabledNotifier.Run()
 
+	// Setup the batch timeout watcher.
+	s.setupBatchTimeoutWatcher()
+
 	// Setup the node drainer.
 	s.setupNodeDrainer()
 
@@ -1184,6 +1192,11 @@ func (s *Server) setupVolumeWatcher() error {
 
 // setupNodeDrainer creates a node drainer which will be enabled when a server
 // becomes a leader.
+func (s *Server) setupBatchTimeoutWatcher() {
+	shim := batchTimeoutRaftShim{s}
+	s.batchTimeoutWatcher = batchtimeout.NewWatcher(s.logger, shim, 0)
+}
+
 func (s *Server) setupNodeDrainer() {
 	// Create a shim around Raft requests
 	shim := drainerShim{s}
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index df142bab0dc..33ea1b3835d 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -6867,6 +6867,11 @@ type TaskGroup struct {
 	// group services in consul and stopping tasks.
 	ShutdownDelay *time.Duration
 
+	// MaxRunDuration is the maximum amount of time a batch or sysbatch task
+	// group allocation may run after entering the running state before Nomad
+	// stops it.
+	MaxRunDuration *time.Duration
+
 	// StopAfterClientDisconnect, if set, configures the client to stop the task group
 	// after this duration since the last known good heartbeat
 	// To be deprecated after 1.8.0 infavor of Disconnect.StopOnClientAfter
@@ -7054,6 +7059,18 @@ func (tg *TaskGroup) Validate(j *Job) error {
 		}
 	}
 
+	if tg.MaxRunDuration != nil {
+		if *tg.MaxRunDuration <= 0 {
+			mErr = multierror.Append(mErr, errors.New("MaxRunDuration must be greater than zero"))
+		}
+
+		switch j.Type {
+		case JobTypeBatch, JobTypeSysBatch:
+		default:
+			mErr = multierror.Append(mErr, fmt.Errorf("Job type %q does not allow max_run_duration", j.Type))
+		}
+	}
+
 	for idx, constr := range tg.Constraints {
 		if err := constr.Validate(); err != nil {
 			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go
index 6f77ff76b76..e49f6d19d99 100644
--- a/nomad/structs/structs_test.go
+++ b/nomad/structs/structs_test.go
@@ -1543,6 +1543,81 @@ func TestTaskGroup_Validate(t *testing.T) {
 			},
 			jobType: JobTypeSystem,
 		},
+		{
+			name: "invalid max run duration for service job",
+			tg: &TaskGroup{
+				Name:           "web",
+				Count:          1,
+				MaxRunDuration: pointer.Of(5 * time.Minute),
+				Tasks: []*Task{
+					{Name: "web"},
+				},
+			},
+			expErr: []string{
+				`Job type "service" does not allow max_run_duration`,
+			},
+			jobType: JobTypeService,
+		},
+		{
+			name: "invalid non-positive max run duration for batch job",
+			tg: &TaskGroup{
+				Name:           "web",
+				Count:          1,
+				MaxRunDuration: pointer.Of(time.Duration(0)),
+				Tasks: []*Task{
+					{Name: "web"},
+				},
+			},
+			expErr: []string{
+				"MaxRunDuration must be greater than zero",
+			},
+			jobType: JobTypeBatch,
+		},
+		{
+			name: "valid max run duration for batch job",
+			tg: &TaskGroup{
+				Name:           "web",
+				Count:          1,
+				MaxRunDuration: pointer.Of(5 * time.Minute),
+				Tasks: []*Task{
+					{
+						Name:      "web",
+						Driver:    "mock_driver",
+						Resources: DefaultResources(),
+						LogConfig: DefaultLogConfig(),
+					},
+				},
+				RestartPolicy:    NewRestartPolicy(JobTypeBatch),
+				ReschedulePolicy: NewReschedulePolicy(JobTypeBatch),
+				EphemeralDisk:    DefaultEphemeralDisk(),
+			},
+			jobType: JobTypeBatch,
+		},
+		{
+			name: "valid max run duration for sysbatch job",
+			tg: &TaskGroup{
+				Name:           "web",
+				Count:          1,
+				MaxRunDuration: pointer.Of(5 * time.Minute),
+				Tasks: []*Task{
+					{
+						Name:      "web",
+						Driver:    "mock_driver",
+						Resources: DefaultResources(),
+						LogConfig: DefaultLogConfig(),
+					},
+				},
+				RestartPolicy: &RestartPolicy{
+					Attempts:        0,
+					Interval:        5 * time.Second,
+					Delay:           5 * time.Second,
+					Mode:            RestartPolicyModeFail,
+					RenderTemplates: false,
+				},
+				EphemeralDisk: DefaultEphemeralDisk(),
+			},
+			jobType: JobTypeSysBatch,
+		},
 		{
 			name: "duplicated por label",
 			tg: &TaskGroup{

From 2388417e8e56e5da07b93cbc5ac89e4ba844426e Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:30:14 +0200
Subject: [PATCH 02/37] refine batch timeout watcher

---
 nomad/batch_timeout_shims.go       |   4 +-
 nomad/batchtimeout/watcher.go      |  86 ++++++++++++++++------
 nomad/batchtimeout/watcher_test.go | 112 +++++++++++++++++++++++++++++
 nomad/state/state_store_test.go    |  21 +++++-
 4 files changed, 197 insertions(+), 26 deletions(-)

diff --git a/nomad/batch_timeout_shims.go b/nomad/batch_timeout_shims.go
index 57f1709a32c..82e7520fdca 100644
--- a/nomad/batch_timeout_shims.go
+++ b/nomad/batch_timeout_shims.go
@@ -14,7 +14,7 @@ type batchTimeoutRaftShim struct {
 
 var _ batchtimeout.RaftApplier = batchTimeoutRaftShim{}
 
-func (b batchTimeoutRaftShim) UpdateAllocDesiredTransition(req *structs.AllocUpdateDesiredTransitionRequest) (uint64, error) {
-	_, index, err := b.s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, req)
+func (b batchTimeoutRaftShim) ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) {
+	_, index, err := b.s.raftApply(structs.ApplyPlanResultsRequestType, req)
 	return index, err
 }
diff --git a/nomad/batchtimeout/watcher.go b/nomad/batchtimeout/watcher.go
index e6aa7ac7c7d..8dc907e43ac 100644
--- a/nomad/batchtimeout/watcher.go
+++ b/nomad/batchtimeout/watcher.go
@@ -4,11 +4,11 @@
 package batchtimeout
 
 import (
+	"sync"
 	"time"
 
 	log "github.com/hashicorp/go-hclog"
 
-	"github.com/hashicorp/nomad/helper/pointer"
 	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
@@ -20,7 +20,7 @@ const (
 )
 
 type RaftApplier interface {
-	UpdateAllocDesiredTransition(req *structs.AllocUpdateDesiredTransitionRequest) (uint64, error)
+	ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error)
 }
 
 type Watcher struct {
@@ -28,7 +28,11 @@ type Watcher struct {
 	raft         RaftApplier
 	state        *state.StateStore
 	scanInterval time.Duration
-	enabled      bool
+
+	mu      sync.Mutex
+	enabled bool
+	stopCh  chan struct{}
+	running bool
 }
 
 func NewWatcher(logger log.Logger, raft RaftApplier, scanInterval time.Duration) *Watcher {
@@ -44,43 +48,78 @@ func NewWatcher(logger log.Logger, raft RaftApplier, scanInterval time.Duration)
 }
 
 func (w *Watcher) SetEnabled(enabled bool, st *state.StateStore) {
-	w.enabled = enabled
+	w.mu.Lock()
+	defer w.mu.Unlock()
+
 	if st != nil {
 		w.state = st
 	}
 
-	if enabled && w.state != nil {
-		go w.watch()
+	if enabled {
+		w.enabled = true
+		if w.running || w.state == nil {
+			return
+		}
+
+		w.stopCh = make(chan struct{})
+		w.running = true
+		go w.watch(w.stopCh)
+		return
+	}
+
+	w.enabled = false
+	if !w.running {
+		return
 	}
+
+	close(w.stopCh)
+	w.stopCh = nil
+	w.running = false
 }
 
-func (w *Watcher) watch() {
+func (w *Watcher) watch(stopCh <-chan struct{}) {
 	ticker := time.NewTicker(w.scanInterval)
 	defer ticker.Stop()
+	defer w.markStopped(stopCh)
 
 	for {
-		if !w.enabled {
+		w.scan(time.Now().UTC())
+
+		select {
+		case <-ticker.C:
+		case <-stopCh:
 			return
 		}
+	}
+}
 
-		w.scan(time.Now().UTC())
+func (w *Watcher) markStopped(stopCh <-chan struct{}) {
+	w.mu.Lock()
+	defer w.mu.Unlock()
 
-		<-ticker.C
+	if w.stopCh == stopCh {
+		w.stopCh = nil
 	}
+	w.running = false
 }
 
 func (w *Watcher) scan(now time.Time) {
-	if w.state == nil {
+	w.mu.Lock()
+	st := w.state
+	enabled := w.enabled
+	w.mu.Unlock()
+
+	if !enabled || st == nil {
 		return
 	}
 
-	iter, err := w.state.Allocs(nil, state.SortDefault)
+	iter, err := st.Allocs(nil, state.SortDefault)
 	if err != nil {
 		w.logger.Warn("failed to iterate allocations", "error", err)
 		return
 	}
 
-	transitions := make(map[string]*structs.DesiredTransition)
+	var stopped []*structs.AllocationDiff
 
 	for {
 		raw := iter.Next()
@@ -93,25 +132,28 @@ func (w *Watcher) scan(now time.Time) {
 			continue
 		}
 
-		transitions[alloc.ID] = &structs.DesiredTransition{
-			NoShutdownDelay: pointer.Of(false),
-		}
+		stopped = append(stopped, &structs.AllocationDiff{
+			ID:                 alloc.ID,
+			DesiredDescription: timeoutDescription,
+			ClientStatus:       structs.AllocClientStatusFailed,
+		})
 	}
 
-	if len(transitions) == 0 {
+	if len(stopped) == 0 {
 		return
 	}
 
-	req := &structs.AllocUpdateDesiredTransitionRequest{
-		Allocs: transitions,
+	req := &structs.ApplyPlanResultsRequest{
+		AllocsStopped: stopped,
+		UpdatedAt:     now.UnixNano(),
 	}
 
-	if _, err := w.raft.UpdateAllocDesiredTransition(req); err != nil {
-		w.logger.Warn("failed to stop timed out allocations", "error", err, "count", len(transitions))
+	if _, err := w.raft.ApplyPlanResults(req); err != nil {
+		w.logger.Warn("failed to stop timed out allocations", "error", err, "count", len(stopped))
 		return
 	}
 
-	w.logger.Debug("stopped timed out allocations", "count", len(transitions))
+	w.logger.Debug("stopped timed out allocations", "count", len(stopped))
 }
 
 func shouldStopAlloc(now time.Time, alloc *structs.Allocation) bool {
diff --git a/nomad/batchtimeout/watcher_test.go b/nomad/batchtimeout/watcher_test.go
index 673e6898210..c73f410ac90 100644
--- a/nomad/batchtimeout/watcher_test.go
+++ b/nomad/batchtimeout/watcher_test.go
@@ -4,11 +4,14 @@
 package batchtimeout
 
 import (
+	"sync"
 	"testing"
 	"time"
 
+	"github.com/hashicorp/go-hclog"
 	"github.com/hashicorp/nomad/helper/pointer"
 	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/shoenig/test/must"
 )
@@ -273,3 +276,112 @@ func TestAllocRunningSince(t *testing.T) {
 		must.False(t, ok)
 	})
 }
+
+func TestWatcherSetEnabled(t *testing.T) {
+	t.Parallel()
+
+	raft := &mockRaftApplier{}
+	w := NewWatcher(hclog.NewNullLogger(), raft, time.Hour)
+
+	st := state.TestStateStore(t)
+
+	w.SetEnabled(true, st)
+	must.True(t, w.enabled)
+	must.True(t, w.running)
+	must.NotNil(t, w.stopCh)
+
+	firstStopCh := w.stopCh
+
+	w.SetEnabled(true, st)
+	must.True(t, w.running)
+	must.Eq(t, firstStopCh, w.stopCh)
+
+	w.SetEnabled(false, st)
+	must.False(t, w.enabled)
+}
+
+func TestWatcherScanAppliesTimedOutAllocs(t *testing.T) {
+	t.Parallel()
+
+	st := state.TestStateStore(t)
+	raft := &mockRaftApplier{}
+	w := NewWatcher(hclog.NewNullLogger(), raft, time.Hour)
+	w.state = st
+	w.enabled = true
+
+	batchAlloc := mock.Alloc()
+	batchAlloc.Job.Type = structs.JobTypeBatch
+	batchAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
+	batchAlloc.ClientStatus = structs.AllocClientStatusRunning
+	batchAlloc.DesiredStatus = structs.AllocDesiredStatusRun
+	batchAlloc.TaskStates = map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: time.Now().UTC().Add(-10 * time.Minute),
+		},
+	}
+
+	sysbatchAlloc := mock.SysBatchAlloc()
+	sysbatchAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
+	sysbatchAlloc.ClientStatus = structs.AllocClientStatusRunning
+	sysbatchAlloc.DesiredStatus = structs.AllocDesiredStatusRun
+	sysbatchAlloc.TaskStates = map[string]*structs.TaskState{
+		"ping-example": {
+			State:     structs.TaskStateRunning,
+			StartedAt: time.Now().UTC().Add(-10 * time.Minute),
+		},
+	}
+
+	okAlloc := mock.Alloc()
+	okAlloc.Job.Type = structs.JobTypeBatch
+	okAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(30 * time.Minute)
+	okAlloc.ClientStatus = structs.AllocClientStatusRunning
+	okAlloc.DesiredStatus = structs.AllocDesiredStatusRun
+	okAlloc.TaskStates = map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: time.Now().UTC().Add(-10 * time.Minute),
+		},
+	}
+
+	must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 100, nil, batchAlloc.Job))
+	must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 101, nil, sysbatchAlloc.Job))
+	must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 102, nil, okAlloc.Job))
+	must.NoError(t, st.UpsertAllocs(structs.MsgTypeTestSetup, 103, []*structs.Allocation{
+		batchAlloc,
+		sysbatchAlloc,
+		okAlloc,
+	}))
+
+	w.scan(time.Now().UTC())
+
+	must.NotNil(t, raft.lastReq)
+	must.SliceLen(t, 2, raft.lastReq.AllocsStopped)
+
+	got := map[string]*structs.AllocationDiff{}
+	for _, diff := range raft.lastReq.AllocsStopped {
+		got[diff.ID] = diff
+	}
+
+	must.MapContainsKey(t, got, batchAlloc.ID)
+	must.MapContainsKey(t, got, sysbatchAlloc.ID)
+	must.MapNotContainsKey(t, got, okAlloc.ID)
+
+	must.Eq(t, timeoutDescription, got[batchAlloc.ID].DesiredDescription)
+	must.Eq(t, structs.AllocClientStatusFailed, got[batchAlloc.ID].ClientStatus)
+	must.Eq(t, timeoutDescription, got[sysbatchAlloc.ID].DesiredDescription)
+	must.Eq(t, structs.AllocClientStatusFailed, got[sysbatchAlloc.ID].ClientStatus)
+}
+
+type mockRaftApplier struct {
+	mu      sync.Mutex
+	lastReq *structs.ApplyPlanResultsRequest
+}
+
+func (m *mockRaftApplier) ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	m.lastReq = req
+	return 1, nil
+}
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index a04e1262119..b069523dd0a 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -171,6 +171,15 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) {
 		DesiredDescription: "desired desc",
 		ClientStatus:       structs.AllocClientStatusLost,
 	}
+
+	timeoutStoppedAlloc := mock.Alloc()
+	timeoutStoppedAlloc.Job = job
+	timeoutStoppedAlloc.JobID = job.ID
+	timeoutStoppedAllocDiff := &structs.AllocationDiff{
+		ID:                 timeoutStoppedAlloc.ID,
+		DesiredDescription: "allocation exceeded max_run_duration",
+		ClientStatus:       structs.AllocClientStatusFailed,
+	}
 	preemptedAlloc := mock.Alloc()
 	preemptedAlloc.Job = job
 	preemptedAlloc.JobID = job.ID
@@ -181,7 +190,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) {
 
 	must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 900, nil, job))
 	must.NoError(t, state.UpsertAllocs(
-		structs.MsgTypeTestSetup, 999, []*structs.Allocation{stoppedAlloc, preemptedAlloc}))
+		structs.MsgTypeTestSetup, 999, []*structs.Allocation{stoppedAlloc, timeoutStoppedAlloc, preemptedAlloc}))
 
 	// modify job and ensure that stopped and preempted alloc point to original Job
 	mJob := job.Copy()
@@ -198,7 +207,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) {
 	// Create a plan result
 	res := structs.ApplyPlanResultsRequest{
 		AllocsUpdated:   []*structs.Allocation{alloc},
-		AllocsStopped:   []*structs.AllocationDiff{stoppedAllocDiff},
+		AllocsStopped:   []*structs.AllocationDiff{stoppedAllocDiff, timeoutStoppedAllocDiff},
 		Job:             mJob,
 		EvalID:          eval.ID,
 		AllocsPreempted: []*structs.AllocationDiff{preemptedAllocDiff},
@@ -227,6 +236,14 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) {
 	test.Eq(t, planModifyIndex, updatedStoppedAlloc.AllocModifyIndex)
 	test.Eq(t, job.TaskGroups, updatedStoppedAlloc.Job.TaskGroups)
 
+	updatedTimeoutStoppedAlloc, err := state.AllocByID(ws, timeoutStoppedAlloc.ID)
+	must.NoError(t, err)
+	test.Eq(t, timeoutStoppedAllocDiff.DesiredDescription, updatedTimeoutStoppedAlloc.DesiredDescription)
+	test.Eq(t, structs.AllocDesiredStatusStop, updatedTimeoutStoppedAlloc.DesiredStatus)
+	test.Eq(t, timeoutStoppedAlloc.ClientStatus, updatedTimeoutStoppedAlloc.ClientStatus)
+	test.Eq(t, planModifyIndex, updatedTimeoutStoppedAlloc.AllocModifyIndex)
+	test.Eq(t, job.TaskGroups, updatedTimeoutStoppedAlloc.Job.TaskGroups)
+
 	updatedPreemptedAlloc, err := state.AllocByID(ws, preemptedAlloc.ID)
 	must.NoError(t, err)
 	test.Eq(t, structs.AllocDesiredStatusEvict, updatedPreemptedAlloc.DesiredStatus)

From 617fe2b0419ecca327d07de3807115e366dfe5a8 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:31:25 +0200
Subject: [PATCH 03/37] client status

---
 nomad/state/state_store.go      | 20 +++++++++++++++-----
 nomad/state/state_store_test.go |  2 +-
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index f660d9ad94d..3c452d6f5c5 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -4054,7 +4054,9 @@ func (s *StateStore) nestedUpdateAllocFromClient(txn *txn, index uint64, alloc *
 
 	// Pull in anything the client is the authority on
 	copyAlloc.ClientStatus = alloc.ClientStatus
-	copyAlloc.ClientDescription = alloc.ClientDescription
+	if alloc.ClientDescription != "" {
+		copyAlloc.ClientDescription = alloc.ClientDescription
+	}
 	copyAlloc.TaskStates = alloc.TaskStates
 	copyAlloc.NetworkStatus = alloc.NetworkStatus
 
@@ -4288,10 +4290,18 @@ func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation
 			// Keep the clients task states
 			alloc.TaskStates = exist.TaskStates
 
-			// If the scheduler is marking this allocation as lost or unknown we do not
-			// want to reuse the status of the existing allocation.
-			if alloc.ClientStatus != structs.AllocClientStatusLost &&
-				alloc.ClientStatus != structs.AllocClientStatusUnknown {
+			// If the scheduler is marking this allocation as lost, unknown, or another
+			// explicit terminal client status, preserve the scheduler-provided status
+			// and description instead of reusing the existing allocation values.
+			switch alloc.ClientStatus {
+			case structs.AllocClientStatusLost,
+				structs.AllocClientStatusUnknown,
+				structs.AllocClientStatusFailed,
+				structs.AllocClientStatusComplete:
+				if alloc.ClientDescription == "" {
+					alloc.ClientDescription = exist.ClientDescription
+				}
+			default:
 				alloc.ClientStatus = exist.ClientStatus
 				alloc.ClientDescription = exist.ClientDescription
 			}
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index b069523dd0a..4db517a6e82 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -240,7 +240,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) {
 	must.NoError(t, err)
 	test.Eq(t, timeoutStoppedAllocDiff.DesiredDescription, updatedTimeoutStoppedAlloc.DesiredDescription)
 	test.Eq(t, structs.AllocDesiredStatusStop, updatedTimeoutStoppedAlloc.DesiredStatus)
-	test.Eq(t, timeoutStoppedAlloc.ClientStatus, updatedTimeoutStoppedAlloc.ClientStatus)
+	test.Eq(t, timeoutStoppedAllocDiff.ClientStatus, updatedTimeoutStoppedAlloc.ClientStatus)
 	test.Eq(t, planModifyIndex, updatedTimeoutStoppedAlloc.AllocModifyIndex)
 	test.Eq(t, job.TaskGroups, updatedTimeoutStoppedAlloc.Job.TaskGroups)
 

From 1f4c4dba8d8cf222a8615874162c9f013d96a2a3 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:39:13 +0200
Subject: [PATCH 04/37] integration-style test

---
 nomad/batch_timeout_test.go | 120 ++++++++++++++++++++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 nomad/batch_timeout_test.go

diff --git a/nomad/batch_timeout_test.go b/nomad/batch_timeout_test.go
new file mode 100644
index 00000000000..61e1ef99f1e
--- /dev/null
+++ b/nomad/batch_timeout_test.go
@@ -0,0 +1,120 @@
+// Copyright IBM Corp. 2015, 2025
+// SPDX-License-Identifier: BUSL-1.1
+
+package nomad
+
+import (
+	"testing"
+	"time"
+
+	"github.com/hashicorp/nomad/helper/pointer"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/hashicorp/nomad/testutil"
+	"github.com/shoenig/test/must"
+)
+
+func Test_BatchTimeoutWatcher_StopsExpiredBatchAlloc(t *testing.T) {
+	t.Parallel()
+
+	srv, cleanup := TestServer(t, nil)
+	t.Cleanup(cleanup)
+
+	testutil.WaitForLeader(t, srv.RPC)
+
+	job := mock.Job()
+	job.Type = structs.JobTypeBatch
+	job.TaskGroups[0].MaxRunDuration = pointer.Of(50 * time.Millisecond)
+	job.Canonicalize()
+
+	alloc := mock.Alloc()
+	alloc.Job = job
+	alloc.JobID = job.ID
+	alloc.Namespace = job.Namespace
+	alloc.TaskGroup = job.TaskGroups[0].Name
+	alloc.DesiredStatus = structs.AllocDesiredStatusRun
+	alloc.ClientStatus = structs.AllocClientStatusRunning
+	alloc.TaskStates = map[string]*structs.TaskState{
+		job.TaskGroups[0].Tasks[0].Name: {
+			State:     structs.TaskStateRunning,
+			StartedAt: time.Now().UTC().Add(-1 * time.Second),
+		},
+	}
+
+	state := srv.fsm.State()
+	must.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(job.ID)))
+	must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 999, nil, job))
+	must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc}))
+
+	testutil.WaitForResult(func() (bool, error) {
+		out, err := state.AllocByID(nil, alloc.ID)
+		if err != nil {
+			return false, err
+		}
+		if out == nil {
+			return false, nil
+		}
+
+		return out.DesiredStatus == structs.AllocDesiredStatusStop &&
+			out.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration &&
+			out.ClientStatus == structs.AllocClientStatusFailed, nil
+	}, func(err error) {
+		out, lookupErr := state.AllocByID(nil, alloc.ID)
+		if lookupErr != nil {
+			t.Fatalf("failed to lookup alloc after waiting: %v", lookupErr)
+		}
+		t.Fatalf("timed out waiting for batch timeout watcher to stop alloc: %v; alloc=%#v", err, out)
+	})
+}
+
+func Test_BatchTimeoutWatcher_StopsExpiredSysBatchAlloc(t *testing.T) {
+	t.Parallel()
+
+	srv, cleanup := TestServer(t, nil)
+	t.Cleanup(cleanup)
+
+	testutil.WaitForLeader(t, srv.RPC)
+
+	job := mock.SystemBatchJob()
+	job.TaskGroups[0].MaxRunDuration = pointer.Of(50 * time.Millisecond)
+	job.Canonicalize()
+
+	alloc := mock.SysBatchAlloc()
+	alloc.Job = job
+	alloc.JobID = job.ID
+	alloc.Namespace = job.Namespace
+	alloc.TaskGroup = job.TaskGroups[0].Name
+	alloc.DesiredStatus = structs.AllocDesiredStatusRun
+	alloc.ClientStatus = structs.AllocClientStatusRunning
+	alloc.TaskStates = map[string]*structs.TaskState{
+		job.TaskGroups[0].Tasks[0].Name: {
+			State:     structs.TaskStateRunning,
+			StartedAt: time.Now().UTC().Add(-1 * time.Second),
+		},
+	}
+
+	state := srv.fsm.State()
+	must.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(job.ID)))
+	must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 999, nil, job))
+	must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc}))
+
+	testutil.WaitForResult(func() (bool, error) {
+		out, err := state.AllocByID(nil, alloc.ID)
+		if err != nil {
+			return false, err
+		}
+		if out == nil {
+			return false, nil
+		}
+
+		return out.DesiredStatus == structs.AllocDesiredStatusStop &&
+			out.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration &&
+			out.ClientStatus == structs.AllocClientStatusFailed, nil
+	}, func(err error) {
+		out, lookupErr := state.AllocByID(nil, alloc.ID)
+		if lookupErr != nil {
+			t.Fatalf("failed to lookup alloc after waiting: %v", lookupErr)
+		}
+		t.Fatalf("timed out waiting for batch timeout watcher to stop sysbatch alloc: %v; alloc=%#v", err, out)
+	})
+}

From 180aea3eba98d90967964fa643be9c1d88c485da Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:39:23 +0200
Subject: [PATCH 05/37] event stream

---
 nomad/state/events.go      |  9 ++++++-
 nomad/state/events_test.go | 51 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/nomad/state/events.go b/nomad/state/events.go
index 07ea50c3acf..0211885165d 100644
--- a/nomad/state/events.go
+++ b/nomad/state/events.go
@@ -357,12 +357,19 @@ func eventFromChange(change memdb.Change) (structs.Event, bool) {
 		// remove job info to help keep size of alloc event down
 		alloc.Job = nil
 
+		allocEvent := &structs.AllocationEvent{Allocation: alloc}
+		if alloc.DesiredStatus == structs.AllocDesiredStatusStop &&
+			alloc.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration {
+			allocEvent.Timeout = true
+			allocEvent.TimeoutReason = alloc.DesiredDescription
+		}
+
 		return structs.Event{
 			Topic:      structs.TopicAllocation,
 			Key:        after.ID,
 			FilterKeys: filterKeys,
 			Namespace:  after.Namespace,
-			Payload:    &structs.AllocationEvent{Allocation: alloc},
+			Payload:    allocEvent,
 		}, true
 	case "jobs":
 		after, ok := change.After.(*structs.Job)
diff --git a/nomad/state/events_test.go b/nomad/state/events_test.go
index 8cbab8f26ea..73b9d1ed26f 100644
--- a/nomad/state/events_test.go
+++ b/nomad/state/events_test.go
@@ -538,6 +538,57 @@ func TestEventsFromChanges_ApplyPlanResultsRequestType(t *testing.T) {
 	must.Len(t, 1, evals)
 	must.Len(t, 1, jobs)
 	must.Len(t, 1, deploys)
+
+	for _, e := range allocs {
+		allocEvent := e.Payload.(*structs.AllocationEvent)
+		must.False(t, allocEvent.Timeout)
+		must.Eq(t, "", allocEvent.TimeoutReason)
+	}
+}
+
+func TestEventsFromChanges_ApplyPlanResultsRequestType_TimeoutStoppedAlloc(t *testing.T) {
+	ci.Parallel(t)
+	s := TestStateStoreCfg(t, TestStateStorePublisher(t))
+	defer s.StopEventBroker()
+
+	mockJob := mock.Job()
+	must.NoError(t, s.UpsertJob(structs.MsgTypeTestSetup, 9, nil, mockJob))
+
+	stoppedAlloc := mock.Alloc()
+	stoppedAlloc.Job = mockJob
+	stoppedAlloc.JobID = mockJob.ID
+
+	must.NoError(t, s.UpsertAllocs(structs.MsgTypeTestSetup, 10, []*structs.Allocation{stoppedAlloc}))
+
+	msgType := structs.ApplyPlanResultsRequestType
+	req := &structs.ApplyPlanResultsRequest{
+		AllocsStopped: []*structs.AllocationDiff{{
+			ID:                 stoppedAlloc.ID,
+			DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration,
+			ClientStatus:       structs.AllocClientStatusFailed,
+		}},
+		Job: mockJob,
+	}
+
+	must.NoError(t, s.UpsertPlanResults(msgType, 100, req))
+
+	events := WaitForEvents(t, s, 100, 1, 1*time.Second)
+	must.Len(t, 2, events)
+
+	var allocEvent *structs.AllocationEvent
+	for _, e := range events {
+		must.Eq(t, structs.TypePlanResult, e.Type)
+		if e.Topic == structs.TopicAllocation {
+			allocEvent = e.Payload.(*structs.AllocationEvent)
+		}
+	}
+
+	must.NotNil(t, allocEvent)
+	must.True(t, allocEvent.Timeout)
+	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.TimeoutReason)
+	must.Eq(t, structs.AllocDesiredStatusStop, allocEvent.Allocation.DesiredStatus)
+	must.Eq(t, structs.AllocClientStatusFailed, allocEvent.Allocation.ClientStatus)
+	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.Allocation.DesiredDescription)
 }
 
 func TestEventsFromChanges_BatchNodeUpdateDrainRequestType(t *testing.T) {

From 5fff8280523953ce4bd6791934d0dda3f2d60300 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:39:41 +0200
Subject: [PATCH 06/37] AllocTimeoutReasonMaxRunDuration

---
 nomad/batchtimeout/watcher.go      | 4 +---
 nomad/batchtimeout/watcher_test.go | 4 ++--
 nomad/state/state_store_test.go    | 2 +-
 nomad/structs/alloc.go             | 4 ++++
 nomad/structs/event.go             | 8 ++++++++
 5 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/nomad/batchtimeout/watcher.go b/nomad/batchtimeout/watcher.go
index 8dc907e43ac..69892e26c6f 100644
--- a/nomad/batchtimeout/watcher.go
+++ b/nomad/batchtimeout/watcher.go
@@ -15,8 +15,6 @@ import (
 
 const (
 	defaultScanInterval = 5 * time.Second
-
-	timeoutDescription = "allocation exceeded max_run_duration"
 )
 
 type RaftApplier interface {
@@ -134,7 +132,7 @@ func (w *Watcher) scan(now time.Time) {
 
 		stopped = append(stopped, &structs.AllocationDiff{
 			ID:                 alloc.ID,
-			DesiredDescription: timeoutDescription,
+			DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration,
 			ClientStatus:       structs.AllocClientStatusFailed,
 		})
 	}
diff --git a/nomad/batchtimeout/watcher_test.go b/nomad/batchtimeout/watcher_test.go
index c73f410ac90..84cddfb1701 100644
--- a/nomad/batchtimeout/watcher_test.go
+++ b/nomad/batchtimeout/watcher_test.go
@@ -367,9 +367,9 @@ func TestWatcherScanAppliesTimedOutAllocs(t *testing.T) {
 	must.MapContainsKey(t, got, sysbatchAlloc.ID)
 	must.MapNotContainsKey(t, got, okAlloc.ID)
 
-	must.Eq(t, timeoutDescription, got[batchAlloc.ID].DesiredDescription)
+	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, got[batchAlloc.ID].DesiredDescription)
 	must.Eq(t, structs.AllocClientStatusFailed, got[batchAlloc.ID].ClientStatus)
-	must.Eq(t, timeoutDescription, got[sysbatchAlloc.ID].DesiredDescription)
+	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, got[sysbatchAlloc.ID].DesiredDescription)
 	must.Eq(t, structs.AllocClientStatusFailed, got[sysbatchAlloc.ID].ClientStatus)
 }
 
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index 4db517a6e82..ba6ae7a26c1 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -177,7 +177,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) {
 	timeoutStoppedAlloc.JobID = job.ID
 	timeoutStoppedAllocDiff := &structs.AllocationDiff{
 		ID:                 timeoutStoppedAlloc.ID,
-		DesiredDescription: "allocation exceeded max_run_duration",
+		DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration,
 		ClientStatus:       structs.AllocClientStatusFailed,
 	}
 	preemptedAlloc := mock.Alloc()
diff --git a/nomad/structs/alloc.go b/nomad/structs/alloc.go
index fbc7777e776..56c4df7ae96 100644
--- a/nomad/structs/alloc.go
+++ b/nomad/structs/alloc.go
@@ -20,6 +20,10 @@ const (
 	AllocDesiredStatusRun   = "run"   // Allocation should run
 	AllocDesiredStatusStop  = "stop"  // Allocation should stop
 	AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted
+
+	// AllocTimeoutReasonMaxRunDuration is the reason used when an allocation is
+	// stopped because it exceeded its configured max_run_duration.
+	AllocTimeoutReasonMaxRunDuration = "allocation exceeded max_run_duration"
 )
 
 const (
diff --git a/nomad/structs/event.go b/nomad/structs/event.go
index 2affe4da618..e1aaecc6118 100644
--- a/nomad/structs/event.go
+++ b/nomad/structs/event.go
@@ -141,6 +141,14 @@ type EvaluationEvent struct {
 // Allocs embedded Job has been removed to reduce size.
 type AllocationEvent struct {
 	Allocation *Allocation
+
+	// Timeout indicates the allocation was stopped because it exceeded its
+	// configured max_run_duration.
+	Timeout bool `json:",omitempty"`
+
+	// TimeoutReason is a human-readable explanation for timeout-triggered
+	// allocation stops.
+	TimeoutReason string `json:",omitempty"`
 }
 
 // DeploymentEvent holds a newly updated Deployment.

From 8fbfff316ae965dd1fea0acd5ad59574dbb4db40 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:44:46 +0200
Subject: [PATCH 07/37] batch timeout watcher improvements

---
 nomad/batchtimeout/watcher.go      | 63 +++++++++++++++++++++---------
 nomad/batchtimeout/watcher_test.go |  8 ++--
 2 files changed, 48 insertions(+), 23 deletions(-)

diff --git a/nomad/batchtimeout/watcher.go b/nomad/batchtimeout/watcher.go
index 69892e26c6f..719a137cc13 100644
--- a/nomad/batchtimeout/watcher.go
+++ b/nomad/batchtimeout/watcher.go
@@ -21,6 +21,11 @@ type RaftApplier interface {
 	ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error)
 }
 
+// Watcher scans running batch and sysbatch allocations for max_run_duration
+// violations and stops expired allocations via plan results.
+//
+// The watcher is leader-managed. The enabled flag tracks desired state, while
+// running indicates whether the background watch loop is currently active.
 type Watcher struct {
 	logger       log.Logger
 	raft         RaftApplier
@@ -45,6 +50,8 @@ func NewWatcher(logger log.Logger, raft RaftApplier, scanInterval time.Duration)
 	}
 }
 
+// SetEnabled updates the desired watcher state and starts or stops the
+// background loop as needed.
 func (w *Watcher) SetEnabled(enabled bool, st *state.StateStore) {
 	w.mu.Lock()
 	defer w.mu.Unlock()
@@ -54,17 +61,25 @@ func (w *Watcher) SetEnabled(enabled bool, st *state.StateStore) {
 	}
 
 	if enabled {
-		w.enabled = true
-		if w.running || w.state == nil {
-			return
-		}
+		w.enableLocked()
+		return
+	}
+
+	w.disableLocked()
+}
 
-		w.stopCh = make(chan struct{})
-		w.running = true
-		go w.watch(w.stopCh)
+func (w *Watcher) enableLocked() {
+	w.enabled = true
+	if w.running || w.state == nil {
 		return
 	}
 
+	w.stopCh = make(chan struct{})
+	w.running = true
+	go w.watch(w.stopCh)
+}
+
+func (w *Watcher) disableLocked() {
 	w.enabled = false
 	if !w.running {
 		return
@@ -81,6 +96,8 @@ func (w *Watcher) watch(stopCh <-chan struct{}) {
 	defer w.markStopped(stopCh)
 
 	for {
+		// Scan immediately so leadership changes do not wait a full interval
+		// before enforcing timeouts.
 		w.scan(time.Now().UTC())
 
 		select {
@@ -97,16 +114,14 @@ func (w *Watcher) markStopped(stopCh <-chan struct{}) {
 
 	if w.stopCh == stopCh {
 		w.stopCh = nil
+		w.running = false
 	}
-	w.running = false
 }
 
+// scan evaluates all allocations in state and stops any that have exceeded
+// max_run_duration.
 func (w *Watcher) scan(now time.Time) {
-	w.mu.Lock()
-	st := w.state
-	enabled := w.enabled
-	w.mu.Unlock()
-
+	st, enabled := w.snapshot()
 	if !enabled || st == nil {
 		return
 	}
@@ -154,35 +169,42 @@ func (w *Watcher) scan(now time.Time) {
 	w.logger.Debug("stopped timed out allocations", "count", len(stopped))
 }
 
+func (w *Watcher) snapshot() (*state.StateStore, bool) {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+
+	return w.state, w.enabled
+}
+
 func shouldStopAlloc(now time.Time, alloc *structs.Allocation) bool {
+	// Allocation must exist and still be actively running.
 	if alloc == nil || alloc.Job == nil {
 		return false
 	}
-
 	if alloc.DesiredStatus != structs.AllocDesiredStatusRun {
 		return false
 	}
-
 	if alloc.ClientStatus != structs.AllocClientStatusRunning {
 		return false
 	}
-
 	if alloc.ClientTerminalStatus() || alloc.ServerTerminalStatus() {
 		return false
 	}
 
+	// Job and task group must opt into max_run_duration and support timeout
+	// enforcement.
 	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
 	if tg == nil || tg.MaxRunDuration == nil || *tg.MaxRunDuration <= 0 {
 		return false
 	}
-
 	switch alloc.Job.Type {
 	case structs.JobTypeBatch, structs.JobTypeSysBatch:
 	default:
 		return false
 	}
 
-	startedAt, ok := allocRunningSince(alloc)
+	// Allocation must be fully running and past its deadline.
+	startedAt, ok := allocFullyRunningSince(alloc)
 	if !ok {
 		return false
 	}
@@ -190,7 +212,10 @@ func shouldStopAlloc(now time.Time, alloc *structs.Allocation) bool {
 	return !startedAt.Add(*tg.MaxRunDuration).After(now)
 }
 
-func allocRunningSince(alloc *structs.Allocation) (time.Time, bool) {
+// allocFullyRunningSince returns the latest StartedAt timestamp across all task
+// states, but only when every known task state is running with a non-zero start
+// time.
+func allocFullyRunningSince(alloc *structs.Allocation) (time.Time, bool) {
 	var latest time.Time
 
 	for _, ts := range alloc.TaskStates {
diff --git a/nomad/batchtimeout/watcher_test.go b/nomad/batchtimeout/watcher_test.go
index 84cddfb1701..8eed4c651d6 100644
--- a/nomad/batchtimeout/watcher_test.go
+++ b/nomad/batchtimeout/watcher_test.go
@@ -235,7 +235,7 @@ func TestAllocRunningSince(t *testing.T) {
 			},
 		}
 
-		startedAt, ok := allocRunningSince(alloc)
+		startedAt, ok := allocFullyRunningSince(alloc)
 		must.True(t, ok)
 		must.Eq(t, now.Add(-5*time.Minute), startedAt)
 	})
@@ -244,7 +244,7 @@ func TestAllocRunningSince(t *testing.T) {
 		alloc := mock.Alloc()
 		alloc.TaskStates = nil
 
-		_, ok := allocRunningSince(alloc)
+		_, ok := allocFullyRunningSince(alloc)
 		must.False(t, ok)
 	})
 
@@ -260,7 +260,7 @@ func TestAllocRunningSince(t *testing.T) {
 			},
 		}
 
-		_, ok := allocRunningSince(alloc)
+		_, ok := allocFullyRunningSince(alloc)
 		must.False(t, ok)
 	})
 
@@ -272,7 +272,7 @@ func TestAllocRunningSince(t *testing.T) {
 			},
 		}
 
-		_, ok := allocRunningSince(alloc)
+		_, ok := allocFullyRunningSince(alloc)
 		must.False(t, ok)
 	})
 }

From 697c5c34d06c13b51efeadb53e986071961c35d8 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:46:09 +0200
Subject: [PATCH 08/37] added batchtimeout package to test-core.json

---
 ci/test-core.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ci/test-core.json b/ci/test-core.json
index 4756a5ed233..8e443e94180 100644
--- a/ci/test-core.json
+++ b/ci/test-core.json
@@ -34,6 +34,7 @@
     "jobspec2/...",
     "lib/...",
     "nomad/auth/...",
+    "nomad/batchtimeout/...",
     "nomad/deploymentwatcher/...",
     "nomad/drainer/...",
     "nomad/reporting/...",

From a31525ad2c7f9fff6c4723b445050a9d1ab32ed7 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Wed, 8 Apr 2026 08:46:35 +0200
Subject: [PATCH 09/37] move to allocrunner

---
 client/allocrunner/alloc_runner.go      |  88 +++++-
 client/allocrunner/alloc_runner_test.go |  44 +++
 client/allocrunner/state/state.go       |  16 +-
 nomad/batch_timeout_shims.go            |  20 --
 nomad/batch_timeout_test.go             | 120 --------
 nomad/batchtimeout/watcher.go           | 235 --------------
 nomad/batchtimeout/watcher_test.go      | 387 ------------------------
 nomad/leader.go                         |   3 -
 nomad/server.go                         |  13 -
 nomad/state/events.go                   |   5 +-
 nomad/state/events_test.go              |   9 +-
 nomad/state/state_store.go              |   4 +-
 nomad/structs/alloc.go                  |  81 +++++
 13 files changed, 233 insertions(+), 792 deletions(-)
 delete mode 100644 nomad/batch_timeout_shims.go
 delete mode 100644 nomad/batch_timeout_test.go
 delete mode 100644 nomad/batchtimeout/watcher.go
 delete mode 100644 nomad/batchtimeout/watcher_test.go

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index e0cb0d7daa8..8ac89516edf 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -131,6 +131,10 @@ type allocRunner struct {
 	// and serializes Shutdown/Destroy calls.
 	destroyedLock sync.Mutex
 
+	// maxRunTimer is used to enforce task group max_run_duration locally.
+	maxRunTimer     *time.Timer
+	maxRunTimerLock sync.Mutex
+
 	// Alloc captures the allocation being run.
 	alloc     *structs.Allocation
 	allocLock sync.RWMutex
@@ -372,6 +376,7 @@ func (ar *allocRunner) WaitCh() <-chan struct{} {
 func (ar *allocRunner) Run() {
 	// Close the wait channel on return
 	defer close(ar.waitCh)
+	defer ar.stopMaxRunTimer()
 
 	// Start the task state update handler
 	go ar.handleTaskStateUpdates()
@@ -472,6 +477,8 @@ func (ar *allocRunner) GetAllocDir() allocdir.Interface {
 // Restore state from database. Must be called after NewAllocRunner but before
 // Run.
 func (ar *allocRunner) Restore() error {
+	defer ar.resetMaxRunTimer()
+
 	// Retrieve deployment status to avoid reseting it across agent
 	// restarts. Once a deployment status is set Nomad no longer monitors
 	// alloc health, so we must persist deployment state across restarts.
@@ -689,6 +696,8 @@ func (ar *allocRunner) handleTaskStateUpdates() {
 		// Get the client allocation
 		calloc := ar.clientAlloc(states)
 
+		ar.resetMaxRunTimerWithAlloc(calloc)
+
 		// Update the server
 		ar.stateUpdater.AllocStateUpdated(calloc)
 
@@ -845,7 +854,10 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st
 	}
 
 	// Compute the ClientStatus
-	if ar.state.ClientStatus != "" {
+	if ar.state.MaxRunDurationExceeded {
+		a.ClientStatus = structs.AllocClientStatusFailed
+		a.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration
+	} else if ar.state.ClientStatus != "" {
 		// The client status is being forced
 		a.ClientStatus, a.ClientDescription = ar.state.ClientStatus, ar.state.ClientDescription
 	} else {
@@ -1056,6 +1068,7 @@ func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
 
 	// Update ar.alloc
 	ar.setAlloc(update)
+	ar.resetMaxRunTimer()
 
 	// Run update hooks if not stopping or dead
 	if !update.TerminalStatus() {
@@ -1081,6 +1094,79 @@ func (ar *allocRunner) Listener() *cstructs.AllocListener {
 	return ar.allocBroadcaster.Listen()
 }
 
+func (ar *allocRunner) resetMaxRunTimer() {
+	ar.maxRunTimerLock.Lock()
+	defer ar.maxRunTimerLock.Unlock()
+
+	if ar.maxRunTimer != nil {
+		ar.maxRunTimer.Stop()
+		ar.maxRunTimer = nil
+	}
+}
+
+func (ar *allocRunner) resetMaxRunTimerWithAlloc(alloc *structs.Allocation) {
+	ar.resetMaxRunTimer()
+
+	if alloc == nil || alloc.TerminalStatus() || alloc.DesiredStatus != structs.AllocDesiredStatusRun {
+		return
+	}
+
+	deadline, ok := alloc.MaxRunDurationDeadline()
+	if !ok {
+		return
+	}
+
+	remaining := time.Until(deadline)
+	if remaining <= 0 {
+		go ar.enforceMaxRunDuration()
+		return
+	}
+
+	timer := time.NewTimer(remaining)
+
+	ar.maxRunTimerLock.Lock()
+	ar.maxRunTimer = timer
+	ar.maxRunTimerLock.Unlock()
+
+	go func(t *time.Timer) {
+		select {
+		case <-t.C:
+			ar.enforceMaxRunDuration()
+		case <-ar.waitCh:
+			if !t.Stop() {
+				select {
+				case <-t.C:
+				default:
+				}
+			}
+		}
+	}(timer)
+}
+
+func (ar *allocRunner) stopMaxRunTimer() {
+	ar.resetMaxRunTimer()
+}
+
+func (ar *allocRunner) enforceMaxRunDuration() {
+	if ar.isShuttingDown() {
+		return
+	}
+
+	alloc := ar.Alloc()
+	if alloc == nil || !alloc.MaxRunDurationExpired(time.Now().UTC()) {
+		return
+	}
+
+	ar.stateLock.Lock()
+	ar.state.MaxRunDurationExceeded = true
+	ar.state.ClientStatus = structs.AllocClientStatusFailed
+	ar.state.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration
+	ar.stateLock.Unlock()
+
+	ar.logger.Debug("allocation exceeded max_run_duration, shutting down tasks")
+	ar.Shutdown()
+}
+
 func (ar *allocRunner) destroyImpl() {
 	// Stop any running tasks and persist states in case the client is
 	// shutdown before Destroy finishes.
diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go
index 794923d4819..aa59adc8ccb 100644
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@@ -2676,6 +2676,50 @@ func TestAllocRunner_GetUpdatePriority(t *testing.T) {
 	must.Eq(t, cstructs.AllocUpdatePriorityUrgent, ar.GetUpdatePriority(calloc))
 }
 
+func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	task := alloc.Job.TaskGroups[0].Tasks[0]
+	task.Driver = "mock_driver"
+	task.Config = map[string]interface{}{
+		"run_for": "10s",
+	}
+	task.KillTimeout = 10 * time.Millisecond
+	maxRunDuration := 50 * time.Millisecond
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	conf, cleanup := testAllocRunnerConfig(t, alloc)
+	defer cleanup()
+
+	arIface, err := NewAllocRunner(conf)
+	must.NoError(t, err)
+	ar := arIface.(*allocRunner)
+
+	ar.setAlloc(ar.Alloc().Copy())
+	ar.alloc.Job.TaskGroups[0].MaxRunDuration = nil
+
+	go ar.Run()
+	defer destroy(ar)
+
+	testutil.WaitForResult(func() (bool, error) {
+		state := ar.AllocState()
+		if state == nil {
+			return false, fmt.Errorf("No alloc state")
+		}
+		if state.ClientStatus != structs.AllocClientStatusFailed {
+			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusFailed)
+		}
+		if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration {
+			return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration)
+		}
+		return true, nil
+	}, func(err error) {
+		state := ar.AllocState()
+		t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement: %v; state=%#v", err, state)
+	})
+}
+
 func TestAllocRunner_setHookStatsHandler(t *testing.T) {
 	ci.Parallel(t)
 
diff --git a/client/allocrunner/state/state.go b/client/allocrunner/state/state.go
index 54954711907..89326eddc9b 100644
--- a/client/allocrunner/state/state.go
+++ b/client/allocrunner/state/state.go
@@ -19,6 +19,11 @@ type State struct {
 	// allocations client state
 	ClientDescription string
 
+	// MaxRunDurationExceeded indicates the allocation exceeded its configured
+	// max_run_duration and should be reported as failed regardless of task exit
+	// status.
+	MaxRunDurationExceeded bool
+
 	// DeploymentStatus captures the status of the deployment
 	DeploymentStatus *structs.AllocDeploymentStatus
 
@@ -60,11 +65,12 @@ func (s *State) Copy() *State {
 		taskStates[k] = v.Copy()
 	}
 	return &State{
-		ClientStatus:      s.ClientStatus,
-		ClientDescription: s.ClientDescription,
-		DeploymentStatus:  s.DeploymentStatus.Copy(),
-		TaskStates:        taskStates,
-		NetworkStatus:     s.NetworkStatus.Copy(),
+		ClientStatus:           s.ClientStatus,
+		ClientDescription:      s.ClientDescription,
+		MaxRunDurationExceeded: s.MaxRunDurationExceeded,
+		DeploymentStatus:       s.DeploymentStatus.Copy(),
+		TaskStates:             taskStates,
+		NetworkStatus:          s.NetworkStatus.Copy(),
 	}
 }
 
diff --git a/nomad/batch_timeout_shims.go b/nomad/batch_timeout_shims.go
deleted file mode 100644
index 82e7520fdca..00000000000
--- a/nomad/batch_timeout_shims.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright IBM Corp. 2015, 2025
-// SPDX-License-Identifier: BUSL-1.1
-
-package nomad
-
-import (
-	"github.com/hashicorp/nomad/nomad/batchtimeout"
-	"github.com/hashicorp/nomad/nomad/structs"
-)
-
-type batchTimeoutRaftShim struct {
-	s *Server
-}
-
-var _ batchtimeout.RaftApplier = batchTimeoutRaftShim{}
-
-func (b batchTimeoutRaftShim) ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) {
-	_, index, err := b.s.raftApply(structs.ApplyPlanResultsRequestType, req)
-	return index, err
-}
diff --git a/nomad/batch_timeout_test.go b/nomad/batch_timeout_test.go
deleted file mode 100644
index 61e1ef99f1e..00000000000
--- a/nomad/batch_timeout_test.go
+++ /dev/null
@@ -1,120 +0,0 @@
-// Copyright IBM Corp. 2015, 2025
-// SPDX-License-Identifier: BUSL-1.1
-
-package nomad
-
-import (
-	"testing"
-	"time"
-
-	"github.com/hashicorp/nomad/helper/pointer"
-	"github.com/hashicorp/nomad/nomad/mock"
-	"github.com/hashicorp/nomad/nomad/structs"
-	"github.com/hashicorp/nomad/testutil"
-	"github.com/shoenig/test/must"
-)
-
-func Test_BatchTimeoutWatcher_StopsExpiredBatchAlloc(t *testing.T) {
-	t.Parallel()
-
-	srv, cleanup := TestServer(t, nil)
-	t.Cleanup(cleanup)
-
-	testutil.WaitForLeader(t, srv.RPC)
-
-	job := mock.Job()
-	job.Type = structs.JobTypeBatch
-	job.TaskGroups[0].MaxRunDuration = pointer.Of(50 * time.Millisecond)
-	job.Canonicalize()
-
-	alloc := mock.Alloc()
-	alloc.Job = job
-	alloc.JobID = job.ID
-	alloc.Namespace = job.Namespace
-	alloc.TaskGroup = job.TaskGroups[0].Name
-	alloc.DesiredStatus = structs.AllocDesiredStatusRun
-	alloc.ClientStatus = structs.AllocClientStatusRunning
-	alloc.TaskStates = map[string]*structs.TaskState{
-		job.TaskGroups[0].Tasks[0].Name: {
-			State:     structs.TaskStateRunning,
-			StartedAt: time.Now().UTC().Add(-1 * time.Second),
-		},
-	}
-
-	state := srv.fsm.State()
-	must.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(job.ID)))
-	must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 999, nil, job))
-	must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc}))
-
-	testutil.WaitForResult(func() (bool, error) {
-		out, err := state.AllocByID(nil, alloc.ID)
-		if err != nil {
-			return false, err
-		}
-		if out == nil {
-			return false, nil
-		}
-
-		return out.DesiredStatus == structs.AllocDesiredStatusStop &&
-			out.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration &&
-			out.ClientStatus == structs.AllocClientStatusFailed, nil
-	}, func(err error) {
-		out, lookupErr := state.AllocByID(nil, alloc.ID)
-		if lookupErr != nil {
-			t.Fatalf("failed to lookup alloc after waiting: %v", lookupErr)
-		}
-		t.Fatalf("timed out waiting for batch timeout watcher to stop alloc: %v; alloc=%#v", err, out)
-	})
-}
-
-func Test_BatchTimeoutWatcher_StopsExpiredSysBatchAlloc(t *testing.T) {
-	t.Parallel()
-
-	srv, cleanup := TestServer(t, nil)
-	t.Cleanup(cleanup)
-
-	testutil.WaitForLeader(t, srv.RPC)
-
-	job := mock.SystemBatchJob()
-	job.TaskGroups[0].MaxRunDuration = pointer.Of(50 * time.Millisecond)
-	job.Canonicalize()
-
-	alloc := mock.SysBatchAlloc()
-	alloc.Job = job
-	alloc.JobID = job.ID
-	alloc.Namespace = job.Namespace
-	alloc.TaskGroup = job.TaskGroups[0].Name
-	alloc.DesiredStatus = structs.AllocDesiredStatusRun
-	alloc.ClientStatus = structs.AllocClientStatusRunning
-	alloc.TaskStates = map[string]*structs.TaskState{
-		job.TaskGroups[0].Tasks[0].Name: {
-			State:     structs.TaskStateRunning,
-			StartedAt: time.Now().UTC().Add(-1 * time.Second),
-		},
-	}
-
-	state := srv.fsm.State()
-	must.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(job.ID)))
-	must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 999, nil, job))
-	must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc}))
-
-	testutil.WaitForResult(func() (bool, error) {
-		out, err := state.AllocByID(nil, alloc.ID)
-		if err != nil {
-			return false, err
-		}
-		if out == nil {
-			return false, nil
-		}
-
-		return out.DesiredStatus == structs.AllocDesiredStatusStop &&
-			out.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration &&
-			out.ClientStatus == structs.AllocClientStatusFailed, nil
-	}, func(err error) {
-		out, lookupErr := state.AllocByID(nil, alloc.ID)
-		if lookupErr != nil {
-			t.Fatalf("failed to lookup alloc after waiting: %v", lookupErr)
-		}
-		t.Fatalf("timed out waiting for batch timeout watcher to stop sysbatch alloc: %v; alloc=%#v", err, out)
-	})
-}
diff --git a/nomad/batchtimeout/watcher.go b/nomad/batchtimeout/watcher.go
deleted file mode 100644
index 719a137cc13..00000000000
--- a/nomad/batchtimeout/watcher.go
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright IBM Corp. 2015, 2025
-// SPDX-License-Identifier: BUSL-1.1
-
-package batchtimeout
-
-import (
-	"sync"
-	"time"
-
-	log "github.com/hashicorp/go-hclog"
-
-	"github.com/hashicorp/nomad/nomad/state"
-	"github.com/hashicorp/nomad/nomad/structs"
-)
-
-const (
-	defaultScanInterval = 5 * time.Second
-)
-
-type RaftApplier interface {
-	ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error)
-}
-
-// Watcher scans running batch and sysbatch allocations for max_run_duration
-// violations and stops expired allocations via plan results.
-//
-// The watcher is leader-managed. The enabled flag tracks desired state, while
-// running indicates whether the background watch loop is currently active.
-type Watcher struct {
-	logger       log.Logger
-	raft         RaftApplier
-	state        *state.StateStore
-	scanInterval time.Duration
-
-	mu      sync.Mutex
-	enabled bool
-	stopCh  chan struct{}
-	running bool
-}
-
-func NewWatcher(logger log.Logger, raft RaftApplier, scanInterval time.Duration) *Watcher {
-	if scanInterval <= 0 {
-		scanInterval = defaultScanInterval
-	}
-
-	return &Watcher{
-		logger:       logger.Named("batch_timeout_watcher"),
-		raft:         raft,
-		scanInterval: scanInterval,
-	}
-}
-
-// SetEnabled updates the desired watcher state and starts or stops the
-// background loop as needed.
-func (w *Watcher) SetEnabled(enabled bool, st *state.StateStore) {
-	w.mu.Lock()
-	defer w.mu.Unlock()
-
-	if st != nil {
-		w.state = st
-	}
-
-	if enabled {
-		w.enableLocked()
-		return
-	}
-
-	w.disableLocked()
-}
-
-func (w *Watcher) enableLocked() {
-	w.enabled = true
-	if w.running || w.state == nil {
-		return
-	}
-
-	w.stopCh = make(chan struct{})
-	w.running = true
-	go w.watch(w.stopCh)
-}
-
-func (w *Watcher) disableLocked() {
-	w.enabled = false
-	if !w.running {
-		return
-	}
-
-	close(w.stopCh)
-	w.stopCh = nil
-	w.running = false
-}
-
-func (w *Watcher) watch(stopCh <-chan struct{}) {
-	ticker := time.NewTicker(w.scanInterval)
-	defer ticker.Stop()
-	defer w.markStopped(stopCh)
-
-	for {
-		// Scan immediately so leadership changes do not wait a full interval
-		// before enforcing timeouts.
-		w.scan(time.Now().UTC())
-
-		select {
-		case <-ticker.C:
-		case <-stopCh:
-			return
-		}
-	}
-}
-
-func (w *Watcher) markStopped(stopCh <-chan struct{}) {
-	w.mu.Lock()
-	defer w.mu.Unlock()
-
-	if w.stopCh == stopCh {
-		w.stopCh = nil
-		w.running = false
-	}
-}
-
-// scan evaluates all allocations in state and stops any that have exceeded
-// max_run_duration.
-func (w *Watcher) scan(now time.Time) {
-	st, enabled := w.snapshot()
-	if !enabled || st == nil {
-		return
-	}
-
-	iter, err := st.Allocs(nil, state.SortDefault)
-	if err != nil {
-		w.logger.Warn("failed to iterate allocations", "error", err)
-		return
-	}
-
-	var stopped []*structs.AllocationDiff
-
-	for {
-		raw := iter.Next()
-		if raw == nil {
-			break
-		}
-
-		alloc := raw.(*structs.Allocation)
-		if !shouldStopAlloc(now, alloc) {
-			continue
-		}
-
-		stopped = append(stopped, &structs.AllocationDiff{
-			ID:                 alloc.ID,
-			DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration,
-			ClientStatus:       structs.AllocClientStatusFailed,
-		})
-	}
-
-	if len(stopped) == 0 {
-		return
-	}
-
-	req := &structs.ApplyPlanResultsRequest{
-		AllocsStopped: stopped,
-		UpdatedAt:     now.UnixNano(),
-	}
-
-	if _, err := w.raft.ApplyPlanResults(req); err != nil {
-		w.logger.Warn("failed to stop timed out allocations", "error", err, "count", len(stopped))
-		return
-	}
-
-	w.logger.Debug("stopped timed out allocations", "count", len(stopped))
-}
-
-func (w *Watcher) snapshot() (*state.StateStore, bool) {
-	w.mu.Lock()
-	defer w.mu.Unlock()
-
-	return w.state, w.enabled
-}
-
-func shouldStopAlloc(now time.Time, alloc *structs.Allocation) bool {
-	// Allocation must exist and still be actively running.
-	if alloc == nil || alloc.Job == nil {
-		return false
-	}
-	if alloc.DesiredStatus != structs.AllocDesiredStatusRun {
-		return false
-	}
-	if alloc.ClientStatus != structs.AllocClientStatusRunning {
-		return false
-	}
-	if alloc.ClientTerminalStatus() || alloc.ServerTerminalStatus() {
-		return false
-	}
-
-	// Job and task group must opt into max_run_duration and support timeout
-	// enforcement.
-	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
-	if tg == nil || tg.MaxRunDuration == nil || *tg.MaxRunDuration <= 0 {
-		return false
-	}
-	switch alloc.Job.Type {
-	case structs.JobTypeBatch, structs.JobTypeSysBatch:
-	default:
-		return false
-	}
-
-	// Allocation must be fully running and past its deadline.
-	startedAt, ok := allocFullyRunningSince(alloc)
-	if !ok {
-		return false
-	}
-
-	return !startedAt.Add(*tg.MaxRunDuration).After(now)
-}
-
-// allocFullyRunningSince returns the latest StartedAt timestamp across all task
-// states, but only when every known task state is running with a non-zero start
-// time.
-func allocFullyRunningSince(alloc *structs.Allocation) (time.Time, bool) {
-	var latest time.Time
-
-	for _, ts := range alloc.TaskStates {
-		if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() {
-			return time.Time{}, false
-		}
-		if ts.StartedAt.After(latest) {
-			latest = ts.StartedAt
-		}
-	}
-
-	if latest.IsZero() {
-		return time.Time{}, false
-	}
-
-	return latest, true
-}
diff --git a/nomad/batchtimeout/watcher_test.go b/nomad/batchtimeout/watcher_test.go
deleted file mode 100644
index 8eed4c651d6..00000000000
--- a/nomad/batchtimeout/watcher_test.go
+++ /dev/null
@@ -1,387 +0,0 @@
-// Copyright IBM Corp. 2015, 2025
-// SPDX-License-Identifier: BUSL-1.1
-
-package batchtimeout
-
-import (
-	"sync"
-	"testing"
-	"time"
-
-	"github.com/hashicorp/go-hclog"
-	"github.com/hashicorp/nomad/helper/pointer"
-	"github.com/hashicorp/nomad/nomad/mock"
-	"github.com/hashicorp/nomad/nomad/state"
-	"github.com/hashicorp/nomad/nomad/structs"
-	"github.com/shoenig/test/must"
-)
-
-func TestShouldStopAlloc(t *testing.T) {
-	t.Parallel()
-
-	now := time.Now().UTC()
-
-	cases := []struct {
-		name     string
-		allocFn  func() *structs.Allocation
-		expected bool
-	}{
-		{
-			name: "batch alloc times out after max run duration",
-			allocFn: func() *structs.Allocation {
-				alloc := mock.Alloc()
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
-				alloc.ClientStatus = structs.AllocClientStatusRunning
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
-				alloc.TaskStates = map[string]*structs.TaskState{
-					"web": {
-						State:     structs.TaskStateRunning,
-						StartedAt: now.Add(-10 * time.Minute),
-					},
-				}
-				return alloc
-			},
-			expected: true,
-		},
-		{
-			name: "sysbatch alloc times out after max run duration",
-			allocFn: func() *structs.Allocation {
-				alloc := mock.SysBatchAlloc()
-				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
-				alloc.ClientStatus = structs.AllocClientStatusRunning
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
-				alloc.TaskStates = map[string]*structs.TaskState{
-					"ping-example": {
-						State:     structs.TaskStateRunning,
-						StartedAt: now.Add(-10 * time.Minute),
-					},
-				}
-				return alloc
-			},
-			expected: true,
-		},
-		{
-			name: "alloc without max run duration does not time out",
-			allocFn: func() *structs.Allocation {
-				alloc := mock.Alloc()
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].MaxRunDuration = nil
-				alloc.ClientStatus = structs.AllocClientStatusRunning
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
-				alloc.TaskStates = map[string]*structs.TaskState{
-					"web": {
-						State:     structs.TaskStateRunning,
-						StartedAt: now.Add(-10 * time.Minute),
-					},
-				}
-				return alloc
-			},
-			expected: false,
-		},
-		{
-			name: "service alloc does not time out",
-			allocFn: func() *structs.Allocation {
-				alloc := mock.Alloc()
-				alloc.Job.Type = structs.JobTypeService
-				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
-				alloc.ClientStatus = structs.AllocClientStatusRunning
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
-				alloc.TaskStates = map[string]*structs.TaskState{
-					"web": {
-						State:     structs.TaskStateRunning,
-						StartedAt: now.Add(-10 * time.Minute),
-					},
-				}
-				return alloc
-			},
-			expected: false,
-		},
-		{
-			name: "pending alloc does not time out",
-			allocFn: func() *structs.Allocation {
-				alloc := mock.Alloc()
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
-				alloc.ClientStatus = structs.AllocClientStatusPending
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
-				alloc.TaskStates = map[string]*structs.TaskState{
-					"web": {
-						State:     structs.TaskStateRunning,
-						StartedAt: now.Add(-10 * time.Minute),
-					},
-				}
-				return alloc
-			},
-			expected: false,
-		},
-		{
-			name: "stopping alloc does not time out again",
-			allocFn: func() *structs.Allocation {
-				alloc := mock.Alloc()
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
-				alloc.ClientStatus = structs.AllocClientStatusRunning
-				alloc.DesiredStatus = structs.AllocDesiredStatusStop
-				alloc.TaskStates = map[string]*structs.TaskState{
-					"web": {
-						State:     structs.TaskStateRunning,
-						StartedAt: now.Add(-10 * time.Minute),
-					},
-				}
-				return alloc
-			},
-			expected: false,
-		},
-		{
-			name: "alloc within max run duration does not time out",
-			allocFn: func() *structs.Allocation {
-				alloc := mock.Alloc()
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(15 * time.Minute)
-				alloc.ClientStatus = structs.AllocClientStatusRunning
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
-				alloc.TaskStates = map[string]*structs.TaskState{
-					"web": {
-						State:     structs.TaskStateRunning,
-						StartedAt: now.Add(-10 * time.Minute),
-					},
-				}
-				return alloc
-			},
-			expected: false,
-		},
-		{
-			name: "alloc with no running task state does not time out",
-			allocFn: func() *structs.Allocation {
-				alloc := mock.Alloc()
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
-				alloc.ClientStatus = structs.AllocClientStatusRunning
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
-				alloc.TaskStates = map[string]*structs.TaskState{
-					"web": {
-						State:     structs.TaskStatePending,
-						StartedAt: now.Add(-10 * time.Minute),
-					},
-				}
-				return alloc
-			},
-			expected: false,
-		},
-		{
-			name: "alloc with zero start time does not time out",
-			allocFn: func() *structs.Allocation {
-				alloc := mock.Alloc()
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
-				alloc.ClientStatus = structs.AllocClientStatusRunning
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
-				alloc.TaskStates = map[string]*structs.TaskState{
-					"web": {
-						State: structs.TaskStateRunning,
-					},
-				}
-				return alloc
-			},
-			expected: false,
-		},
-		{
-			name: "alloc with mixed task states does not time out until all tasks are running",
-			allocFn: func() *structs.Allocation {
-				alloc := mock.Alloc()
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, &structs.Task{Name: "sidecar"})
-				alloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
-				alloc.ClientStatus = structs.AllocClientStatusRunning
-				alloc.DesiredStatus = structs.AllocDesiredStatusRun
-				alloc.TaskStates = map[string]*structs.TaskState{
-					"web": {
-						State:     structs.TaskStateRunning,
-						StartedAt: now.Add(-10 * time.Minute),
-					},
-					"sidecar": {
-						State: structs.TaskStatePending,
-					},
-				}
-				return alloc
-			},
-			expected: false,
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			must.Eq(t, tc.expected, shouldStopAlloc(now, tc.allocFn()))
-		})
-	}
-}
-
-func TestAllocRunningSince(t *testing.T) {
-	t.Parallel()
-
-	now := time.Now().UTC()
-
-	t.Run("returns latest running task start time", func(t *testing.T) {
-		alloc := mock.Alloc()
-		alloc.TaskStates = map[string]*structs.TaskState{
-			"web": {
-				State:     structs.TaskStateRunning,
-				StartedAt: now.Add(-10 * time.Minute),
-			},
-			"sidecar": {
-				State:     structs.TaskStateRunning,
-				StartedAt: now.Add(-5 * time.Minute),
-			},
-		}
-
-		startedAt, ok := allocFullyRunningSince(alloc)
-		must.True(t, ok)
-		must.Eq(t, now.Add(-5*time.Minute), startedAt)
-	})
-
-	t.Run("returns false when task states are missing", func(t *testing.T) {
-		alloc := mock.Alloc()
-		alloc.TaskStates = nil
-
-		_, ok := allocFullyRunningSince(alloc)
-		must.False(t, ok)
-	})
-
-	t.Run("returns false when any task is not running", func(t *testing.T) {
-		alloc := mock.Alloc()
-		alloc.TaskStates = map[string]*structs.TaskState{
-			"web": {
-				State:     structs.TaskStateRunning,
-				StartedAt: now.Add(-10 * time.Minute),
-			},
-			"sidecar": {
-				State: structs.TaskStatePending,
-			},
-		}
-
-		_, ok := allocFullyRunningSince(alloc)
-		must.False(t, ok)
-	})
-
-	t.Run("returns false when any running task has zero start time", func(t *testing.T) {
-		alloc := mock.Alloc()
-		alloc.TaskStates = map[string]*structs.TaskState{
-			"web": {
-				State: structs.TaskStateRunning,
-			},
-		}
-
-		_, ok := allocFullyRunningSince(alloc)
-		must.False(t, ok)
-	})
-}
-
-func TestWatcherSetEnabled(t *testing.T) {
-	t.Parallel()
-
-	raft := &mockRaftApplier{}
-	w := NewWatcher(hclog.NewNullLogger(), raft, time.Hour)
-
-	st := state.TestStateStore(t)
-
-	w.SetEnabled(true, st)
-	must.True(t, w.enabled)
-	must.True(t, w.running)
-	must.NotNil(t, w.stopCh)
-
-	firstStopCh := w.stopCh
-
-	w.SetEnabled(true, st)
-	must.True(t, w.running)
-	must.Eq(t, firstStopCh, w.stopCh)
-
-	w.SetEnabled(false, st)
-	must.False(t, w.enabled)
-}
-
-func TestWatcherScanAppliesTimedOutAllocs(t *testing.T) {
-	t.Parallel()
-
-	st := state.TestStateStore(t)
-	raft := &mockRaftApplier{}
-	w := NewWatcher(hclog.NewNullLogger(), raft, time.Hour)
-	w.state = st
-	w.enabled = true
-
-	batchAlloc := mock.Alloc()
-	batchAlloc.Job.Type = structs.JobTypeBatch
-	batchAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
-	batchAlloc.ClientStatus = structs.AllocClientStatusRunning
-	batchAlloc.DesiredStatus = structs.AllocDesiredStatusRun
-	batchAlloc.TaskStates = map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: time.Now().UTC().Add(-10 * time.Minute),
-		},
-	}
-
-	sysbatchAlloc := mock.SysBatchAlloc()
-	sysbatchAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(5 * time.Minute)
-	sysbatchAlloc.ClientStatus = structs.AllocClientStatusRunning
-	sysbatchAlloc.DesiredStatus = structs.AllocDesiredStatusRun
-	sysbatchAlloc.TaskStates = map[string]*structs.TaskState{
-		"ping-example": {
-			State:     structs.TaskStateRunning,
-			StartedAt: time.Now().UTC().Add(-10 * time.Minute),
-		},
-	}
-
-	okAlloc := mock.Alloc()
-	okAlloc.Job.Type = structs.JobTypeBatch
-	okAlloc.Job.TaskGroups[0].MaxRunDuration = pointer.Of(30 * time.Minute)
-	okAlloc.ClientStatus = structs.AllocClientStatusRunning
-	okAlloc.DesiredStatus = structs.AllocDesiredStatusRun
-	okAlloc.TaskStates = map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: time.Now().UTC().Add(-10 * time.Minute),
-		},
-	}
-
-	must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 100, nil, batchAlloc.Job))
-	must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 101, nil, sysbatchAlloc.Job))
-	must.NoError(t, st.UpsertJob(structs.MsgTypeTestSetup, 102, nil, okAlloc.Job))
-	must.NoError(t, st.UpsertAllocs(structs.MsgTypeTestSetup, 103, []*structs.Allocation{
-		batchAlloc,
-		sysbatchAlloc,
-		okAlloc,
-	}))
-
-	w.scan(time.Now().UTC())
-
-	must.NotNil(t, raft.lastReq)
-	must.SliceLen(t, 2, raft.lastReq.AllocsStopped)
-
-	got := map[string]*structs.AllocationDiff{}
-	for _, diff := range raft.lastReq.AllocsStopped {
-		got[diff.ID] = diff
-	}
-
-	must.MapContainsKey(t, got, batchAlloc.ID)
-	must.MapContainsKey(t, got, sysbatchAlloc.ID)
-	must.MapNotContainsKey(t, got, okAlloc.ID)
-
-	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, got[batchAlloc.ID].DesiredDescription)
-	must.Eq(t, structs.AllocClientStatusFailed, got[batchAlloc.ID].ClientStatus)
-	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, got[sysbatchAlloc.ID].DesiredDescription)
-	must.Eq(t, structs.AllocClientStatusFailed, got[sysbatchAlloc.ID].ClientStatus)
-}
-
-type mockRaftApplier struct {
-	mu      sync.Mutex
-	lastReq *structs.ApplyPlanResultsRequest
-}
-
-func (m *mockRaftApplier) ApplyPlanResults(req *structs.ApplyPlanResultsRequest) (uint64, error) {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-
-	m.lastReq = req
-	return 1, nil
-}
diff --git a/nomad/leader.go b/nomad/leader.go
index 1855fc2bda4..26a5f1d9521 100644
--- a/nomad/leader.go
+++ b/nomad/leader.go
@@ -412,9 +412,6 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error {
 	// Enable the deployment watcher, since we are now the leader
 	s.deploymentWatcher.SetEnabled(true, s.State())
 
-	// Enable the batch timeout watcher, since we are now the leader
-	s.batchTimeoutWatcher.SetEnabled(true, s.State())
-
 	// Enable the NodeDrainer
 	s.nodeDrainer.SetEnabled(true, s.State())
 
diff --git a/nomad/server.go b/nomad/server.go
index ba49483be79..c45047dffcb 100644
--- a/nomad/server.go
+++ b/nomad/server.go
@@ -43,7 +43,6 @@ import (
 	"github.com/hashicorp/nomad/helper/tlsutil"
 	"github.com/hashicorp/nomad/lib/auth/oidc"
 	"github.com/hashicorp/nomad/nomad/auth"
-	"github.com/hashicorp/nomad/nomad/batchtimeout"
 	"github.com/hashicorp/nomad/nomad/deploymentwatcher"
 	"github.com/hashicorp/nomad/nomad/drainer"
 	"github.com/hashicorp/nomad/nomad/lock"
@@ -230,10 +229,6 @@ type Server struct {
 	// make the required calls to continue to transition the deployment.
 	deploymentWatcher *deploymentwatcher.Watcher
 
-	// batchTimeoutWatcher is used to stop batch and sysbatch allocations that
-	// exceed their configured max_run_duration.
-	batchTimeoutWatcher *batchtimeout.Watcher
-
 	// nodeDrainer is used to drain allocations from nodes.
 	nodeDrainer *drainer.NodeDrainer
 
@@ -532,9 +527,6 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigFunc
 	// updates when the processes SetEnabled is triggered.
 	go s.evalBroker.enabledNotifier.Run()
 
-	// Setup the batch timeout watcher.
-	s.setupBatchTimeoutWatcher()
-
 	// Setup the node drainer.
 	s.setupNodeDrainer()
 
@@ -1192,11 +1184,6 @@ func (s *Server) setupVolumeWatcher() error {
 
 // setupNodeDrainer creates a node drainer which will be enabled when a server
 // becomes a leader.
-func (s *Server) setupBatchTimeoutWatcher() {
-	shim := batchTimeoutRaftShim{s}
-	s.batchTimeoutWatcher = batchtimeout.NewWatcher(s.logger, shim, 0)
-}
-
 func (s *Server) setupNodeDrainer() {
 	// Create a shim around Raft requests
 	shim := drainerShim{s}
diff --git a/nomad/state/events.go b/nomad/state/events.go
index 0211885165d..3470e06bcf8 100644
--- a/nomad/state/events.go
+++ b/nomad/state/events.go
@@ -358,10 +358,9 @@ func eventFromChange(change memdb.Change) (structs.Event, bool) {
 		alloc.Job = nil
 
 		allocEvent := &structs.AllocationEvent{Allocation: alloc}
-		if alloc.DesiredStatus == structs.AllocDesiredStatusStop &&
-			alloc.DesiredDescription == structs.AllocTimeoutReasonMaxRunDuration {
+		if alloc.ClientDescription == structs.AllocTimeoutReasonMaxRunDuration {
 			allocEvent.Timeout = true
-			allocEvent.TimeoutReason = alloc.DesiredDescription
+			allocEvent.TimeoutReason = alloc.ClientDescription
 		}
 
 		return structs.Event{
diff --git a/nomad/state/events_test.go b/nomad/state/events_test.go
index 73b9d1ed26f..57dad2710b2 100644
--- a/nomad/state/events_test.go
+++ b/nomad/state/events_test.go
@@ -557,15 +557,16 @@ func TestEventsFromChanges_ApplyPlanResultsRequestType_TimeoutStoppedAlloc(t *te
 	stoppedAlloc := mock.Alloc()
 	stoppedAlloc.Job = mockJob
 	stoppedAlloc.JobID = mockJob.ID
+	stoppedAlloc.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration
 
 	must.NoError(t, s.UpsertAllocs(structs.MsgTypeTestSetup, 10, []*structs.Allocation{stoppedAlloc}))
 
 	msgType := structs.ApplyPlanResultsRequestType
 	req := &structs.ApplyPlanResultsRequest{
 		AllocsStopped: []*structs.AllocationDiff{{
-			ID:                 stoppedAlloc.ID,
-			DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration,
-			ClientStatus:       structs.AllocClientStatusFailed,
+			ID:                stoppedAlloc.ID,
+			ClientStatus:      structs.AllocClientStatusFailed,
+			ClientDescription: structs.AllocTimeoutReasonMaxRunDuration,
 		}},
 		Job: mockJob,
 	}
@@ -588,7 +589,7 @@ func TestEventsFromChanges_ApplyPlanResultsRequestType_TimeoutStoppedAlloc(t *te
 	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.TimeoutReason)
 	must.Eq(t, structs.AllocDesiredStatusStop, allocEvent.Allocation.DesiredStatus)
 	must.Eq(t, structs.AllocClientStatusFailed, allocEvent.Allocation.ClientStatus)
-	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.Allocation.DesiredDescription)
+	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.Allocation.ClientDescription)
 }
 
 func TestEventsFromChanges_BatchNodeUpdateDrainRequestType(t *testing.T) {
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 3c452d6f5c5..2232dd6ca82 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -4303,7 +4303,9 @@ func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation
 				}
 			default:
 				alloc.ClientStatus = exist.ClientStatus
-				alloc.ClientDescription = exist.ClientDescription
+				if alloc.ClientDescription == "" {
+					alloc.ClientDescription = exist.ClientDescription
+				}
 			}
 
 			// The job has been denormalized so re-attach the original job
diff --git a/nomad/structs/alloc.go b/nomad/structs/alloc.go
index 56c4df7ae96..7af3e6683b8 100644
--- a/nomad/structs/alloc.go
+++ b/nomad/structs/alloc.go
@@ -361,6 +361,87 @@ func (a *Allocation) TerminalStatus() bool {
 	return a.ServerTerminalStatus() || a.ClientTerminalStatus()
 }
 
+// MaxRunDuration returns the configured max_run_duration for the allocation's
+// task group, if any.
+func (a *Allocation) MaxRunDuration() (time.Duration, bool) {
+	if a == nil || a.Job == nil {
+		return 0, false
+	}
+
+	tg := a.Job.LookupTaskGroup(a.TaskGroup)
+	if tg == nil || tg.MaxRunDuration == nil || *tg.MaxRunDuration <= 0 {
+		return 0, false
+	}
+
+	switch a.Job.Type {
+	case JobTypeBatch, JobTypeSysBatch:
+		return *tg.MaxRunDuration, true
+	default:
+		return 0, false
+	}
+}
+
+// FullyRunningSince returns the latest StartedAt timestamp across all task
+// states, but only when every known task state is running with a non-zero start
+// time.
+func (a *Allocation) FullyRunningSince() (time.Time, bool) {
+	if a == nil {
+		return time.Time{}, false
+	}
+
+	var latest time.Time
+
+	for _, ts := range a.TaskStates {
+		if ts == nil || ts.State != TaskStateRunning || ts.StartedAt.IsZero() {
+			return time.Time{}, false
+		}
+		if ts.StartedAt.After(latest) {
+			latest = ts.StartedAt
+		}
+	}
+
+	if latest.IsZero() {
+		return time.Time{}, false
+	}
+
+	return latest, true
+}
+
+// MaxRunDurationDeadline returns the deadline at which the allocation should be
+// considered timed out based on max_run_duration.
+func (a *Allocation) MaxRunDurationDeadline() (time.Time, bool) {
+	maxRunDuration, ok := a.MaxRunDuration()
+	if !ok {
+		return time.Time{}, false
+	}
+
+	startedAt, ok := a.FullyRunningSince()
+	if !ok {
+		return time.Time{}, false
+	}
+
+	return startedAt.Add(maxRunDuration), true
+}
+
+// MaxRunDurationExpired returns whether the allocation has exceeded its
+// configured max_run_duration.
+func (a *Allocation) MaxRunDurationExpired(now time.Time) bool {
+	if a == nil || a.DesiredStatus != AllocDesiredStatusRun || a.ClientStatus != AllocClientStatusRunning {
+		return false
+	}
+
+	if a.ClientTerminalStatus() || a.ServerTerminalStatus() {
+		return false
+	}
+
+	deadline, ok := a.MaxRunDurationDeadline()
+	if !ok {
+		return false
+	}
+
+	return !deadline.After(now)
+}
+
 // ServerTerminalStatus returns true if the desired state of the allocation is terminal
 func (a *Allocation) ServerTerminalStatus() bool {
 	switch a.DesiredStatus {

From 2a7fc3bdeeab6672c124b39eccfe36279b932733 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Wed, 8 Apr 2026 08:47:02 +0200
Subject: [PATCH 10/37] remove nomad/batchtimeout

---
 ci/test-core.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ci/test-core.json b/ci/test-core.json
index 8e443e94180..4756a5ed233 100644
--- a/ci/test-core.json
+++ b/ci/test-core.json
@@ -34,7 +34,6 @@
     "jobspec2/...",
     "lib/...",
     "nomad/auth/...",
-    "nomad/batchtimeout/...",
     "nomad/deploymentwatcher/...",
     "nomad/drainer/...",
     "nomad/reporting/...",

From e9d30e78506fb001a05a2a6aac546b5bb2f5a96c Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Wed, 8 Apr 2026 17:05:23 +0200
Subject: [PATCH 11/37] timer tweaks

---
 client/allocrunner/alloc_runner.go | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 8ac89516edf..8e973b06a4e 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -1105,7 +1105,12 @@ func (ar *allocRunner) resetMaxRunTimer() {
 }
 
 func (ar *allocRunner) resetMaxRunTimerWithAlloc(alloc *structs.Allocation) {
-	ar.resetMaxRunTimer()
+	ar.maxRunTimerLock.Lock()
+	if ar.maxRunTimer != nil {
+		ar.maxRunTimer.Stop()
+		ar.maxRunTimer = nil
+	}
+	ar.maxRunTimerLock.Unlock()
 
 	if alloc == nil || alloc.TerminalStatus() || alloc.DesiredStatus != structs.AllocDesiredStatusRun {
 		return
@@ -1131,8 +1136,19 @@ func (ar *allocRunner) resetMaxRunTimerWithAlloc(alloc *structs.Allocation) {
 	go func(t *time.Timer) {
 		select {
 		case <-t.C:
+			ar.maxRunTimerLock.Lock()
+			if ar.maxRunTimer != t {
+				ar.maxRunTimerLock.Unlock()
+				return
+			}
+			ar.maxRunTimer = nil
+			ar.maxRunTimerLock.Unlock()
+
 			ar.enforceMaxRunDuration()
 		case <-ar.waitCh:
+			ar.maxRunTimerLock.Lock()
+			ar.maxRunTimerLock.Unlock()
+
 			if !t.Stop() {
 				select {
 				case <-t.C:
@@ -1159,7 +1175,7 @@ func (ar *allocRunner) enforceMaxRunDuration() {
 
 	ar.stateLock.Lock()
 	ar.state.MaxRunDurationExceeded = true
-	ar.state.ClientStatus = structs.AllocClientStatusFailed
+	ar.state.ClientStatus = structs.AllocClientStatusComplete
 	ar.state.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration
 	ar.stateLock.Unlock()
 

From d560d47b1c3158cbc23f709db4a880595eeea558 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Wed, 8 Apr 2026 18:43:24 +0200
Subject: [PATCH 12/37] max_run_duration_hook

---
 client/allocrunner/alloc_runner.go            | 135 ++++------
 client/allocrunner/alloc_runner_hooks.go      |   6 +-
 client/allocrunner/alloc_runner_test.go       |  63 ++++-
 .../interfaces/runner_lifecycle.go            |  12 +-
 client/allocrunner/max_run_duration_hook.go   | 246 ++++++++++++++++++
 client/allocrunner/state/state.go             |   4 +-
 command/agent/job_endpoint.go                 |   4 +
 nomad/structs/structs.go                      |   4 +
 8 files changed, 382 insertions(+), 92 deletions(-)
 create mode 100644 client/allocrunner/max_run_duration_hook.go

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 8e973b06a4e..f86b11fda39 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -131,10 +131,6 @@ type allocRunner struct {
 	// and serializes Shutdown/Destroy calls.
 	destroyedLock sync.Mutex
 
-	// maxRunTimer is used to enforce task group max_run_duration locally.
-	maxRunTimer     *time.Timer
-	maxRunTimerLock sync.Mutex
-
 	// Alloc captures the allocation being run.
 	alloc     *structs.Allocation
 	allocLock sync.RWMutex
@@ -376,7 +372,6 @@ func (ar *allocRunner) WaitCh() <-chan struct{} {
 func (ar *allocRunner) Run() {
 	// Close the wait channel on return
 	defer close(ar.waitCh)
-	defer ar.stopMaxRunTimer()
 
 	// Start the task state update handler
 	go ar.handleTaskStateUpdates()
@@ -477,8 +472,6 @@ func (ar *allocRunner) GetAllocDir() allocdir.Interface {
 // Restore state from database. Must be called after NewAllocRunner but before
 // Run.
 func (ar *allocRunner) Restore() error {
-	defer ar.resetMaxRunTimer()
-
 	// Retrieve deployment status to avoid reseting it across agent
 	// restarts. Once a deployment status is set Nomad no longer monitors
 	// alloc health, so we must persist deployment state across restarts.
@@ -696,7 +689,28 @@ func (ar *allocRunner) handleTaskStateUpdates() {
 		// Get the client allocation
 		calloc := ar.clientAlloc(states)
 
-		ar.resetMaxRunTimerWithAlloc(calloc)
+		hookAlloc := calloc
+		if hookAlloc != nil {
+			hookAlloc = hookAlloc.Copy()
+			serverAlloc := ar.Alloc()
+			hookAlloc.Job = serverAlloc.Job
+			hookAlloc.TaskGroup = serverAlloc.TaskGroup
+			hookAlloc.DesiredStatus = serverAlloc.DesiredStatus
+		}
+
+		req := &interfaces.RunnerUpdateRequest{
+			Alloc:      hookAlloc,
+			TaskStates: states,
+		}
+		for _, hook := range ar.runnerHooks {
+			h, ok := hook.(interfaces.RunnerTaskStateHook)
+			if !ok {
+				continue
+			}
+			if err := h.TaskStateUpdated(req); err != nil {
+				ar.logger.Error("error running task state hook", "hook", h.Name(), "error", err)
+			}
+		}
 
 		// Update the server
 		ar.stateUpdater.AllocStateUpdated(calloc)
@@ -746,8 +760,14 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
 			return nil
 		}
 
-		return structs.NewTaskEvent(structs.TaskKilling).
+		event := structs.NewTaskEvent(structs.TaskKilling).
 			SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout)
+
+		if ar.state.MaxRunDurationExceeded {
+			event.SetDisplayMessage(structs.AllocTimeoutReasonMaxRunDuration)
+		}
+
+		return event
 	}
 
 	// Kill leader first, synchronously
@@ -855,7 +875,7 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st
 
 	// Compute the ClientStatus
 	if ar.state.MaxRunDurationExceeded {
-		a.ClientStatus = structs.AllocClientStatusFailed
+		a.ClientStatus = structs.AllocClientStatusComplete
 		a.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration
 	} else if ar.state.ClientStatus != "" {
 		// The client status is being forced
@@ -1063,12 +1083,25 @@ func (ar *allocRunner) handleAllocUpdates() {
 // If there is already a pending update it will be discarded and replaced by
 // the latest update.
 func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
+	current := ar.Alloc()
+
 	// Detect Stop updates
-	stopping := !ar.Alloc().TerminalStatus() && update.TerminalStatus()
+	stopping := !current.TerminalStatus() && update.TerminalStatus()
+
+	if update.Job == nil && current != nil && current.Job != nil {
+		update = update.Copy()
+		update.Job = current.Job.Copy()
+		if update.TaskGroup == "" {
+			update.TaskGroup = current.TaskGroup
+		}
+		if update.DesiredStatus == "" {
+			update.DesiredStatus = current.DesiredStatus
+		}
+
+	}
 
 	// Update ar.alloc
 	ar.setAlloc(update)
-	ar.resetMaxRunTimer()
 
 	// Run update hooks if not stopping or dead
 	if !update.TerminalStatus() {
@@ -1094,82 +1127,16 @@ func (ar *allocRunner) Listener() *cstructs.AllocListener {
 	return ar.allocBroadcaster.Listen()
 }
 
-func (ar *allocRunner) resetMaxRunTimer() {
-	ar.maxRunTimerLock.Lock()
-	defer ar.maxRunTimerLock.Unlock()
-
-	if ar.maxRunTimer != nil {
-		ar.maxRunTimer.Stop()
-		ar.maxRunTimer = nil
-	}
-}
-
-func (ar *allocRunner) resetMaxRunTimerWithAlloc(alloc *structs.Allocation) {
-	ar.maxRunTimerLock.Lock()
-	if ar.maxRunTimer != nil {
-		ar.maxRunTimer.Stop()
-		ar.maxRunTimer = nil
-	}
-	ar.maxRunTimerLock.Unlock()
-
-	if alloc == nil || alloc.TerminalStatus() || alloc.DesiredStatus != structs.AllocDesiredStatusRun {
-		return
-	}
+func (ar *allocRunner) EnforceMaxRunDurationTimeout(deadline time.Time) {
+	now := time.Now().UTC()
 
-	deadline, ok := alloc.MaxRunDurationDeadline()
-	if !ok {
-		return
-	}
+	if ar.isShuttingDown() {
 
-	remaining := time.Until(deadline)
-	if remaining <= 0 {
-		go ar.enforceMaxRunDuration()
 		return
 	}
 
-	timer := time.NewTimer(remaining)
-
-	ar.maxRunTimerLock.Lock()
-	ar.maxRunTimer = timer
-	ar.maxRunTimerLock.Unlock()
-
-	go func(t *time.Timer) {
-		select {
-		case <-t.C:
-			ar.maxRunTimerLock.Lock()
-			if ar.maxRunTimer != t {
-				ar.maxRunTimerLock.Unlock()
-				return
-			}
-			ar.maxRunTimer = nil
-			ar.maxRunTimerLock.Unlock()
-
-			ar.enforceMaxRunDuration()
-		case <-ar.waitCh:
-			ar.maxRunTimerLock.Lock()
-			ar.maxRunTimerLock.Unlock()
-
-			if !t.Stop() {
-				select {
-				case <-t.C:
-				default:
-				}
-			}
-		}
-	}(timer)
-}
-
-func (ar *allocRunner) stopMaxRunTimer() {
-	ar.resetMaxRunTimer()
-}
-
-func (ar *allocRunner) enforceMaxRunDuration() {
-	if ar.isShuttingDown() {
-		return
-	}
+	if now.Before(deadline) {
 
-	alloc := ar.Alloc()
-	if alloc == nil || !alloc.MaxRunDurationExpired(time.Now().UTC()) {
 		return
 	}
 
@@ -1179,8 +1146,8 @@ func (ar *allocRunner) enforceMaxRunDuration() {
 	ar.state.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration
 	ar.stateLock.Unlock()
 
-	ar.logger.Debug("allocation exceeded max_run_duration, shutting down tasks")
-	ar.Shutdown()
+	ar.logger.Debug("allocation exceeded max_run_duration, killing tasks", "deadline", deadline)
+	ar.killTasks()
 }
 
 func (ar *allocRunner) destroyImpl() {
diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go
index abc2898b08b..e18e7fcdda8 100644
--- a/client/allocrunner/alloc_runner_hooks.go
+++ b/client/allocrunner/alloc_runner_hooks.go
@@ -111,6 +111,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
 	ar.runnerHooks = []interfaces.RunnerHook{
 		newIdentityHook(hookLogger, ar.widmgr),
 		newAllocDirHook(hookLogger, ar.allocDir),
+		newMaxRunDurationHook(hookLogger, alloc, ar),
 		newConsulHook(consulHookConfig{
 			alloc:                   ar.alloc,
 			allocdir:                ar.allocDir,
@@ -224,8 +225,9 @@ func (ar *allocRunner) update(update *structs.Allocation) error {
 	).SetAllocDir(ar.allocDir.AllocDirPath()).Build()
 
 	req := &interfaces.RunnerUpdateRequest{
-		Alloc:    update,
-		AllocEnv: allocEnv,
+		Alloc:      update,
+		AllocEnv:   allocEnv,
+		TaskStates: ar.AllocState().TaskStates,
 	}
 
 	var merr multierror.Error
diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go
index aa59adc8ccb..28bc7f64f79 100644
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@@ -2707,8 +2707,8 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) {
 		if state == nil {
 			return false, fmt.Errorf("No alloc state")
 		}
-		if state.ClientStatus != structs.AllocClientStatusFailed {
-			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusFailed)
+		if state.ClientStatus != structs.AllocClientStatusComplete {
+			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete)
 		}
 		if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration {
 			return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration)
@@ -2720,6 +2720,65 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) {
 	})
 }
 
+func TestAllocRunner_MaxRunDuration_PreservesOriginalJobAcrossAllocUpdatesWithoutJob(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	task := alloc.Job.TaskGroups[0].Tasks[0]
+	task.Driver = "mock_driver"
+	task.Config = map[string]interface{}{
+		"run_for": "10s",
+	}
+	task.KillTimeout = 10 * time.Millisecond
+	maxRunDuration := 50 * time.Millisecond
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	conf, cleanup := testAllocRunnerConfig(t, alloc)
+	defer cleanup()
+
+	arIface, err := NewAllocRunner(conf)
+	must.NoError(t, err)
+	ar := arIface.(*allocRunner)
+
+	go ar.Run()
+	defer destroy(ar)
+
+	testutil.WaitForResult(func() (bool, error) {
+		state := ar.AllocState()
+		if state == nil {
+			return false, fmt.Errorf("No alloc state")
+		}
+		if state.ClientStatus != structs.AllocClientStatusRunning {
+			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusRunning)
+		}
+		return true, nil
+	}, func(err error) {
+		state := ar.AllocState()
+		t.Fatalf("timed out waiting for alloc runner to start: %v; state=%#v", err, state)
+	})
+
+	update := ar.Alloc().CopySkipJob()
+	update.AllocModifyIndex++
+	ar.Update(update)
+
+	testutil.WaitForResult(func() (bool, error) {
+		state := ar.AllocState()
+		if state == nil {
+			return false, fmt.Errorf("No alloc state")
+		}
+		if state.ClientStatus != structs.AllocClientStatusComplete {
+			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete)
+		}
+		if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration {
+			return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration)
+		}
+		return true, nil
+	}, func(err error) {
+		state := ar.AllocState()
+		t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement after alloc update without job: %v; state=%#v", err, state)
+	})
+}
+
 func TestAllocRunner_setHookStatsHandler(t *testing.T) {
 	ci.Parallel(t)
 
diff --git a/client/allocrunner/interfaces/runner_lifecycle.go b/client/allocrunner/interfaces/runner_lifecycle.go
index 9e1f0e43852..f46c2f3ee1b 100644
--- a/client/allocrunner/interfaces/runner_lifecycle.go
+++ b/client/allocrunner/interfaces/runner_lifecycle.go
@@ -56,8 +56,16 @@ type RunnerUpdateHook interface {
 }
 
 type RunnerUpdateRequest struct {
-	Alloc    *structs.Allocation
-	AllocEnv *taskenv.TaskEnv
+	Alloc      *structs.Allocation
+	AllocEnv   *taskenv.TaskEnv
+	TaskStates map[string]*structs.TaskState
+}
+
+// A RunnerTaskStateHook is notified when task state changes are synchronized
+// into the alloc runner's client allocation view.
+type RunnerTaskStateHook interface {
+	RunnerHook
+	TaskStateUpdated(*RunnerUpdateRequest) error
 }
 
 // A RunnerTaskRestartHook is executed just before the allocation runner is
diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go
new file mode 100644
index 00000000000..ff0e493e76e
--- /dev/null
+++ b/client/allocrunner/max_run_duration_hook.go
@@ -0,0 +1,246 @@
+// Copyright IBM Corp. 2015, 2025
+// SPDX-License-Identifier: BUSL-1.1
+
+package allocrunner
+
+import (
+	"sync"
+	"time"
+
+	log "github.com/hashicorp/go-hclog"
+	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
+	"github.com/hashicorp/nomad/client/taskenv"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+type maxRunDurationSetter interface {
+	EnforceMaxRunDurationTimeout(time.Time)
+}
+
+type maxRunDurationHook struct {
+	logger log.Logger
+	setter maxRunDurationSetter
+
+	hookLock sync.Mutex
+
+	alloc        *structs.Allocation
+	originalJob  *structs.Job
+	originalTG   string
+	originalWant string
+	timer        *time.Timer
+}
+
+func newMaxRunDurationHook(
+	logger log.Logger,
+	alloc *structs.Allocation,
+	setter maxRunDurationSetter,
+) *maxRunDurationHook {
+	h := &maxRunDurationHook{
+		alloc:  alloc,
+		setter: setter,
+	}
+	if alloc != nil {
+		h.originalTG = alloc.TaskGroup
+		h.originalWant = alloc.DesiredStatus
+		if alloc.Job != nil {
+			h.originalJob = alloc.Job.Copy()
+		}
+	}
+	h.logger = logger.Named(h.Name())
+	return h
+}
+
+var (
+	_ interfaces.RunnerPrerunHook    = (*maxRunDurationHook)(nil)
+	_ interfaces.RunnerUpdateHook    = (*maxRunDurationHook)(nil)
+	_ interfaces.RunnerTaskStateHook = (*maxRunDurationHook)(nil)
+	_ interfaces.RunnerPostrunHook   = (*maxRunDurationHook)(nil)
+	_ interfaces.ShutdownHook        = (*maxRunDurationHook)(nil)
+)
+
+func (h *maxRunDurationHook) Name() string {
+	return "max_run_duration"
+}
+
+func (h *maxRunDurationHook) Prerun(_ *taskenv.TaskEnv) error {
+	h.hookLock.Lock()
+	defer h.hookLock.Unlock()
+
+	h.resetTimerLocked(nil)
+	return nil
+}
+
+func (h *maxRunDurationHook) Update(req *interfaces.RunnerUpdateRequest) error {
+	h.hookLock.Lock()
+	defer h.hookLock.Unlock()
+
+	h.alloc = h.mergeAllocLocked(req.Alloc)
+	h.resetTimerLocked(req.TaskStates)
+	return nil
+}
+
+func (h *maxRunDurationHook) TaskStateUpdated(req *interfaces.RunnerUpdateRequest) error {
+	h.hookLock.Lock()
+	defer h.hookLock.Unlock()
+
+	if req.Alloc != nil {
+		h.alloc = h.mergeAllocLocked(req.Alloc)
+	}
+
+	h.resetTimerLocked(req.TaskStates)
+	return nil
+}
+
+func (h *maxRunDurationHook) Postrun() error {
+	h.hookLock.Lock()
+	defer h.hookLock.Unlock()
+
+	h.stopTimerLocked()
+	return nil
+}
+
+func (h *maxRunDurationHook) Shutdown() {
+	_ = h.Postrun()
+}
+
+func (h *maxRunDurationHook) allocWithTaskStatesLocked(taskStates map[string]*structs.TaskState) *structs.Allocation {
+	if h.alloc == nil {
+		return nil
+	}
+
+	alloc := h.alloc.Copy()
+	if h.originalJob != nil {
+		alloc.Job = h.originalJob.Copy()
+	}
+	if alloc.TaskGroup == "" {
+		alloc.TaskGroup = h.originalTG
+	}
+	if alloc.DesiredStatus == "" {
+		alloc.DesiredStatus = h.originalWant
+	}
+
+	alloc.TaskStates = make(map[string]*structs.TaskState, len(taskStates))
+	for name, ts := range taskStates {
+		if ts == nil {
+			alloc.TaskStates[name] = nil
+			continue
+		}
+		alloc.TaskStates[name] = ts.Copy()
+	}
+
+	return alloc
+}
+
+func (h *maxRunDurationHook) mergeAllocLocked(update *structs.Allocation) *structs.Allocation {
+	if update == nil {
+		return h.alloc
+	}
+
+	merged := update.Copy()
+
+	if merged.TaskGroup != "" {
+		h.originalTG = merged.TaskGroup
+	}
+	if merged.DesiredStatus != "" {
+		h.originalWant = merged.DesiredStatus
+	}
+
+	if h.originalJob != nil {
+		merged.Job = h.originalJob.Copy()
+	}
+	if merged.TaskGroup == "" {
+		merged.TaskGroup = h.originalTG
+	}
+	if merged.DesiredStatus == "" {
+		merged.DesiredStatus = h.originalWant
+	}
+
+	return merged
+}
+
+func (h *maxRunDurationHook) resetTimerLocked(taskStates map[string]*structs.TaskState) {
+	h.stopTimerLocked()
+
+	alloc := h.allocWithTaskStatesLocked(taskStates)
+	if alloc == nil {
+		return
+	}
+
+	if alloc.TerminalStatus() {
+		return
+	}
+
+	if alloc.DesiredStatus != "" && alloc.DesiredStatus != structs.AllocDesiredStatusRun {
+		return
+	}
+
+	maxRunDuration, ok := alloc.MaxRunDuration()
+	if !ok {
+		return
+	}
+
+	startedAt, ok := fullyRunningSince(taskStates)
+	if !ok {
+		return
+	}
+
+	deadline := startedAt.Add(maxRunDuration)
+	remaining := time.Until(deadline)
+	if remaining <= 0 {
+		go h.setter.EnforceMaxRunDurationTimeout(deadline)
+		return
+	}
+
+	timer := time.NewTimer(remaining)
+	h.timer = timer
+
+	go func(t *time.Timer, deadline time.Time) {
+		<-t.C
+
+		h.hookLock.Lock()
+		if h.timer != t {
+			h.hookLock.Unlock()
+			return
+		}
+		h.timer = nil
+		h.hookLock.Unlock()
+
+		h.setter.EnforceMaxRunDurationTimeout(deadline)
+	}(timer, deadline)
+}
+
+func (h *maxRunDurationHook) stopTimerLocked() {
+	if h.timer == nil {
+		return
+	}
+
+	if !h.timer.Stop() {
+		select {
+		case <-h.timer.C:
+		default:
+		}
+	}
+	h.timer = nil
+}
+
+func fullyRunningSince(taskStates map[string]*structs.TaskState) (time.Time, bool) {
+	if len(taskStates) == 0 {
+		return time.Time{}, false
+	}
+
+	var latest time.Time
+	for _, ts := range taskStates {
+		if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() {
+			return time.Time{}, false
+		}
+		if ts.StartedAt.After(latest) {
+			latest = ts.StartedAt
+		}
+	}
+
+	if latest.IsZero() {
+		return time.Time{}, false
+	}
+
+	return latest, true
+}
diff --git a/client/allocrunner/state/state.go b/client/allocrunner/state/state.go
index 89326eddc9b..dd1f2327bf9 100644
--- a/client/allocrunner/state/state.go
+++ b/client/allocrunner/state/state.go
@@ -20,8 +20,8 @@ type State struct {
 	ClientDescription string
 
 	// MaxRunDurationExceeded indicates the allocation exceeded its configured
-	// max_run_duration and should be reported as failed regardless of task exit
-	// status.
+	// max_run_duration and should be reported as complete with a timeout reason
+	// regardless of task exit status.
 	MaxRunDurationExceeded bool
 
 	// DeploymentStatus captures the status of the deployment
diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go
index d4baeb9f360..99232856ba8 100644
--- a/command/agent/job_endpoint.go
+++ b/command/agent/job_endpoint.go
@@ -1253,6 +1253,10 @@ func ApiTgToStructsTG(job *structs.Job, taskGroup *api.TaskGroup, tg *structs.Ta
 		tg.ShutdownDelay = taskGroup.ShutdownDelay
 	}
 
+	if taskGroup.MaxRunDuration != nil {
+		tg.MaxRunDuration = taskGroup.MaxRunDuration
+	}
+
 	if taskGroup.ReschedulePolicy != nil {
 		tg.ReschedulePolicy = &structs.ReschedulePolicy{
 			Attempts:      *taskGroup.ReschedulePolicy.Attempts,
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 33ea1b3835d..76576bec5b8 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -6940,6 +6940,10 @@ func (tg *TaskGroup) Copy() *TaskGroup {
 		ntg.ShutdownDelay = tg.ShutdownDelay
 	}
 
+	if tg.MaxRunDuration != nil {
+		ntg.MaxRunDuration = tg.MaxRunDuration
+	}
+
 	return ntg
 }
 

From 737f23d757f41175943c6090e7e58f890aaccc70 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Wed, 8 Apr 2026 19:00:14 +0200
Subject: [PATCH 13/37] TestJobs_ApiJobToStructsJob

---
 command/agent/job_endpoint_test.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go
index 740126988c1..0abef0b8f70 100644
--- a/command/agent/job_endpoint_test.go
+++ b/command/agent/job_endpoint_test.go
@@ -2834,6 +2834,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
 					ProgressDeadline: pointer.Of(5 * time.Minute),
 					AutoRevert:       pointer.Of(true),
 				},
+				MaxRunDuration: pointer.Of(10 * time.Second),
 				Meta: map[string]string{
 					"key": "value",
 				},
@@ -3275,6 +3276,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
 					AutoPromote:      false,
 					Canary:           1,
 				},
+				MaxRunDuration: pointer.Of(10 * time.Second),
 				Meta: map[string]string{
 					"key": "value",
 				},

From 48a96521314ad51c313d2c192f91ccd56466b930 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Wed, 8 Apr 2026 19:07:51 +0200
Subject: [PATCH 14/37] don't need that interface

---
 client/allocrunner/alloc_runner.go                | 13 ++++++-------
 client/allocrunner/alloc_runner_hooks.go          |  3 ++-
 client/allocrunner/interfaces/runner_lifecycle.go |  7 -------
 client/allocrunner/max_run_duration_hook.go       |  9 ++++-----
 4 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index f86b11fda39..e86961f36f9 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -152,6 +152,9 @@ type allocRunner struct {
 	// transitions.
 	runnerHooks []interfaces.RunnerHook
 
+	// maxRunDurationHook handles max_run_duration updates from task state changes.
+	maxRunDurationHook *maxRunDurationHook
+
 	// hookResources holds the output from allocrunner hooks so that later
 	// allocrunner hooks or task runner hooks can read them
 	hookResources *cstructs.AllocHookResources
@@ -702,13 +705,9 @@ func (ar *allocRunner) handleTaskStateUpdates() {
 			Alloc:      hookAlloc,
 			TaskStates: states,
 		}
-		for _, hook := range ar.runnerHooks {
-			h, ok := hook.(interfaces.RunnerTaskStateHook)
-			if !ok {
-				continue
-			}
-			if err := h.TaskStateUpdated(req); err != nil {
-				ar.logger.Error("error running task state hook", "hook", h.Name(), "error", err)
+		if ar.maxRunDurationHook != nil {
+			if err := ar.maxRunDurationHook.TaskStateUpdated(req); err != nil {
+				ar.logger.Error("error running max run duration hook", "hook", ar.maxRunDurationHook.Name(), "error", err)
 			}
 		}
 
diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go
index e18e7fcdda8..66ff12e4876 100644
--- a/client/allocrunner/alloc_runner_hooks.go
+++ b/client/allocrunner/alloc_runner_hooks.go
@@ -107,11 +107,12 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
 	// Create the alloc directory hook. This is run first to ensure the
 	// directory path exists for other hooks.
 	alloc := ar.Alloc()
+	ar.maxRunDurationHook = newMaxRunDurationHook(hookLogger, alloc, ar)
 
 	ar.runnerHooks = []interfaces.RunnerHook{
 		newIdentityHook(hookLogger, ar.widmgr),
 		newAllocDirHook(hookLogger, ar.allocDir),
-		newMaxRunDurationHook(hookLogger, alloc, ar),
+		ar.maxRunDurationHook,
 		newConsulHook(consulHookConfig{
 			alloc:                   ar.alloc,
 			allocdir:                ar.allocDir,
diff --git a/client/allocrunner/interfaces/runner_lifecycle.go b/client/allocrunner/interfaces/runner_lifecycle.go
index f46c2f3ee1b..6267c978e00 100644
--- a/client/allocrunner/interfaces/runner_lifecycle.go
+++ b/client/allocrunner/interfaces/runner_lifecycle.go
@@ -61,13 +61,6 @@ type RunnerUpdateRequest struct {
 	TaskStates map[string]*structs.TaskState
 }
 
-// A RunnerTaskStateHook is notified when task state changes are synchronized
-// into the alloc runner's client allocation view.
-type RunnerTaskStateHook interface {
-	RunnerHook
-	TaskStateUpdated(*RunnerUpdateRequest) error
-}
-
 // A RunnerTaskRestartHook is executed just before the allocation runner is
 // going to restart all tasks.
 type RunnerTaskRestartHook interface {
diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go
index ff0e493e76e..c276afac361 100644
--- a/client/allocrunner/max_run_duration_hook.go
+++ b/client/allocrunner/max_run_duration_hook.go
@@ -51,11 +51,10 @@ func newMaxRunDurationHook(
 }
 
 var (
-	_ interfaces.RunnerPrerunHook    = (*maxRunDurationHook)(nil)
-	_ interfaces.RunnerUpdateHook    = (*maxRunDurationHook)(nil)
-	_ interfaces.RunnerTaskStateHook = (*maxRunDurationHook)(nil)
-	_ interfaces.RunnerPostrunHook   = (*maxRunDurationHook)(nil)
-	_ interfaces.ShutdownHook        = (*maxRunDurationHook)(nil)
+	_ interfaces.RunnerPrerunHook  = (*maxRunDurationHook)(nil)
+	_ interfaces.RunnerUpdateHook  = (*maxRunDurationHook)(nil)
+	_ interfaces.RunnerPostrunHook = (*maxRunDurationHook)(nil)
+	_ interfaces.ShutdownHook      = (*maxRunDurationHook)(nil)
 )
 
 func (h *maxRunDurationHook) Name() string {

From eda5a4f4e07c1a28755d9d80948d88eafba45ec6 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Thu, 9 Apr 2026 17:27:05 +0200
Subject: [PATCH 15/37] it's actually not a hook

---
 client/allocrunner/alloc_runner.go            |  41 ++-
 client/allocrunner/alloc_runner_hooks.go      |   2 -
 client/allocrunner/alloc_runner_test.go       |   3 -
 client/allocrunner/max_run_duration_hook.go   | 245 --------------
 .../tasklifecycle/max_run_duration.go         | 161 ++++++++++
 .../tasklifecycle/max_run_duration_test.go    | 300 ++++++++++++++++++
 6 files changed, 490 insertions(+), 262 deletions(-)
 delete mode 100644 client/allocrunner/max_run_duration_hook.go
 create mode 100644 client/allocrunner/tasklifecycle/max_run_duration.go
 create mode 100644 client/allocrunner/tasklifecycle/max_run_duration_test.go

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index e86961f36f9..6a00b97efb9 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -152,8 +152,9 @@ type allocRunner struct {
 	// transitions.
 	runnerHooks []interfaces.RunnerHook
 
-	// maxRunDurationHook handles max_run_duration updates from task state changes.
-	maxRunDurationHook *maxRunDurationHook
+	// maxRunDuration coordinates alloc-level max_run_duration enforcement from
+	// authoritative alloc and task state updates.
+	maxRunDuration *tasklifecycle.MaxRunDuration
 
 	// hookResources holds the output from allocrunner hooks so that later
 	// allocrunner hooks or task runner hooks can read them
@@ -295,6 +296,7 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e
 	)
 
 	ar.taskCoordinator = tasklifecycle.NewCoordinator(ar.logger, tg.Tasks, ar.waitCh)
+	ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.logger, ar.alloc, ar)
 
 	shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background())
 	ar.shutdownDelayCtx = shutdownDelayCtx
@@ -411,6 +413,10 @@ func (ar *allocRunner) Run() {
 		return
 	}
 
+	if ar.maxRunDuration != nil {
+		ar.maxRunDuration.Stop()
+	}
+
 	// Run the postrun hooks
 	if err := ar.postrun(); err != nil {
 		ar.logger.Error("postrun failed", "error", err)
@@ -694,21 +700,27 @@ func (ar *allocRunner) handleTaskStateUpdates() {
 
 		hookAlloc := calloc
 		if hookAlloc != nil {
-			hookAlloc = hookAlloc.Copy()
 			serverAlloc := ar.Alloc()
-			hookAlloc.Job = serverAlloc.Job
-			hookAlloc.TaskGroup = serverAlloc.TaskGroup
-			hookAlloc.DesiredStatus = serverAlloc.DesiredStatus
+			hookAlloc = &structs.Allocation{
+				ID:                calloc.ID,
+				TaskStates:        calloc.TaskStates,
+				ClientStatus:      calloc.ClientStatus,
+				ClientDescription: calloc.ClientDescription,
+				DeploymentStatus:  calloc.DeploymentStatus,
+				NetworkStatus:     calloc.NetworkStatus,
+				Job:               serverAlloc.Job,
+				TaskGroup:         serverAlloc.TaskGroup,
+				DesiredStatus:     serverAlloc.DesiredStatus,
+			}
 		}
 
 		req := &interfaces.RunnerUpdateRequest{
 			Alloc:      hookAlloc,
 			TaskStates: states,
 		}
-		if ar.maxRunDurationHook != nil {
-			if err := ar.maxRunDurationHook.TaskStateUpdated(req); err != nil {
-				ar.logger.Error("error running max run duration hook", "hook", ar.maxRunDurationHook.Name(), "error", err)
-			}
+		if ar.maxRunDuration != nil {
+			ar.maxRunDuration.SetAlloc(req.Alloc)
+			ar.maxRunDuration.TaskStateUpdated(req.TaskStates)
 		}
 
 		// Update the server
@@ -1014,7 +1026,7 @@ func (ar *allocRunner) AllocState() *state.State {
 	// If TaskStateUpdated has not been called yet, ar.state.TaskStates
 	// won't be set as it is not the canonical source of TaskStates.
 	if len(state.TaskStates) == 0 {
-		ar.state.TaskStates = make(map[string]*structs.TaskState, len(ar.tasks))
+		state.TaskStates = make(map[string]*structs.TaskState, len(ar.tasks))
 		for k, tr := range ar.tasks {
 			state.TaskStates[k] = tr.TaskState()
 		}
@@ -1324,7 +1336,12 @@ func (ar *allocRunner) Shutdown() {
 		ar.logger.Trace("shutting down")
 
 		// Shutdown tasks gracefully if they were run
-		wg := sync.WaitGroup{}
+		if ar.maxRunDuration != nil {
+			ar.maxRunDuration.Stop()
+		}
+
+		// Shutdown task runners
+		var wg sync.WaitGroup
 		for _, tr := range ar.tasks {
 			wg.Add(1)
 			go func(tr *taskrunner.TaskRunner) {
diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go
index 66ff12e4876..d57d94167c4 100644
--- a/client/allocrunner/alloc_runner_hooks.go
+++ b/client/allocrunner/alloc_runner_hooks.go
@@ -107,12 +107,10 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
 	// Create the alloc directory hook. This is run first to ensure the
 	// directory path exists for other hooks.
 	alloc := ar.Alloc()
-	ar.maxRunDurationHook = newMaxRunDurationHook(hookLogger, alloc, ar)
 
 	ar.runnerHooks = []interfaces.RunnerHook{
 		newIdentityHook(hookLogger, ar.widmgr),
 		newAllocDirHook(hookLogger, ar.allocDir),
-		ar.maxRunDurationHook,
 		newConsulHook(consulHookConfig{
 			alloc:                   ar.alloc,
 			allocdir:                ar.allocDir,
diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go
index 28bc7f64f79..0977e9cff1e 100644
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@@ -2696,9 +2696,6 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) {
 	must.NoError(t, err)
 	ar := arIface.(*allocRunner)
 
-	ar.setAlloc(ar.Alloc().Copy())
-	ar.alloc.Job.TaskGroups[0].MaxRunDuration = nil
-
 	go ar.Run()
 	defer destroy(ar)
 
diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go
deleted file mode 100644
index c276afac361..00000000000
--- a/client/allocrunner/max_run_duration_hook.go
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright IBM Corp. 2015, 2025
-// SPDX-License-Identifier: BUSL-1.1
-
-package allocrunner
-
-import (
-	"sync"
-	"time"
-
-	log "github.com/hashicorp/go-hclog"
-	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
-	"github.com/hashicorp/nomad/client/taskenv"
-	"github.com/hashicorp/nomad/nomad/structs"
-)
-
-type maxRunDurationSetter interface {
-	EnforceMaxRunDurationTimeout(time.Time)
-}
-
-type maxRunDurationHook struct {
-	logger log.Logger
-	setter maxRunDurationSetter
-
-	hookLock sync.Mutex
-
-	alloc        *structs.Allocation
-	originalJob  *structs.Job
-	originalTG   string
-	originalWant string
-	timer        *time.Timer
-}
-
-func newMaxRunDurationHook(
-	logger log.Logger,
-	alloc *structs.Allocation,
-	setter maxRunDurationSetter,
-) *maxRunDurationHook {
-	h := &maxRunDurationHook{
-		alloc:  alloc,
-		setter: setter,
-	}
-	if alloc != nil {
-		h.originalTG = alloc.TaskGroup
-		h.originalWant = alloc.DesiredStatus
-		if alloc.Job != nil {
-			h.originalJob = alloc.Job.Copy()
-		}
-	}
-	h.logger = logger.Named(h.Name())
-	return h
-}
-
-var (
-	_ interfaces.RunnerPrerunHook  = (*maxRunDurationHook)(nil)
-	_ interfaces.RunnerUpdateHook  = (*maxRunDurationHook)(nil)
-	_ interfaces.RunnerPostrunHook = (*maxRunDurationHook)(nil)
-	_ interfaces.ShutdownHook      = (*maxRunDurationHook)(nil)
-)
-
-func (h *maxRunDurationHook) Name() string {
-	return "max_run_duration"
-}
-
-func (h *maxRunDurationHook) Prerun(_ *taskenv.TaskEnv) error {
-	h.hookLock.Lock()
-	defer h.hookLock.Unlock()
-
-	h.resetTimerLocked(nil)
-	return nil
-}
-
-func (h *maxRunDurationHook) Update(req *interfaces.RunnerUpdateRequest) error {
-	h.hookLock.Lock()
-	defer h.hookLock.Unlock()
-
-	h.alloc = h.mergeAllocLocked(req.Alloc)
-	h.resetTimerLocked(req.TaskStates)
-	return nil
-}
-
-func (h *maxRunDurationHook) TaskStateUpdated(req *interfaces.RunnerUpdateRequest) error {
-	h.hookLock.Lock()
-	defer h.hookLock.Unlock()
-
-	if req.Alloc != nil {
-		h.alloc = h.mergeAllocLocked(req.Alloc)
-	}
-
-	h.resetTimerLocked(req.TaskStates)
-	return nil
-}
-
-func (h *maxRunDurationHook) Postrun() error {
-	h.hookLock.Lock()
-	defer h.hookLock.Unlock()
-
-	h.stopTimerLocked()
-	return nil
-}
-
-func (h *maxRunDurationHook) Shutdown() {
-	_ = h.Postrun()
-}
-
-func (h *maxRunDurationHook) allocWithTaskStatesLocked(taskStates map[string]*structs.TaskState) *structs.Allocation {
-	if h.alloc == nil {
-		return nil
-	}
-
-	alloc := h.alloc.Copy()
-	if h.originalJob != nil {
-		alloc.Job = h.originalJob.Copy()
-	}
-	if alloc.TaskGroup == "" {
-		alloc.TaskGroup = h.originalTG
-	}
-	if alloc.DesiredStatus == "" {
-		alloc.DesiredStatus = h.originalWant
-	}
-
-	alloc.TaskStates = make(map[string]*structs.TaskState, len(taskStates))
-	for name, ts := range taskStates {
-		if ts == nil {
-			alloc.TaskStates[name] = nil
-			continue
-		}
-		alloc.TaskStates[name] = ts.Copy()
-	}
-
-	return alloc
-}
-
-func (h *maxRunDurationHook) mergeAllocLocked(update *structs.Allocation) *structs.Allocation {
-	if update == nil {
-		return h.alloc
-	}
-
-	merged := update.Copy()
-
-	if merged.TaskGroup != "" {
-		h.originalTG = merged.TaskGroup
-	}
-	if merged.DesiredStatus != "" {
-		h.originalWant = merged.DesiredStatus
-	}
-
-	if h.originalJob != nil {
-		merged.Job = h.originalJob.Copy()
-	}
-	if merged.TaskGroup == "" {
-		merged.TaskGroup = h.originalTG
-	}
-	if merged.DesiredStatus == "" {
-		merged.DesiredStatus = h.originalWant
-	}
-
-	return merged
-}
-
-func (h *maxRunDurationHook) resetTimerLocked(taskStates map[string]*structs.TaskState) {
-	h.stopTimerLocked()
-
-	alloc := h.allocWithTaskStatesLocked(taskStates)
-	if alloc == nil {
-		return
-	}
-
-	if alloc.TerminalStatus() {
-		return
-	}
-
-	if alloc.DesiredStatus != "" && alloc.DesiredStatus != structs.AllocDesiredStatusRun {
-		return
-	}
-
-	maxRunDuration, ok := alloc.MaxRunDuration()
-	if !ok {
-		return
-	}
-
-	startedAt, ok := fullyRunningSince(taskStates)
-	if !ok {
-		return
-	}
-
-	deadline := startedAt.Add(maxRunDuration)
-	remaining := time.Until(deadline)
-	if remaining <= 0 {
-		go h.setter.EnforceMaxRunDurationTimeout(deadline)
-		return
-	}
-
-	timer := time.NewTimer(remaining)
-	h.timer = timer
-
-	go func(t *time.Timer, deadline time.Time) {
-		<-t.C
-
-		h.hookLock.Lock()
-		if h.timer != t {
-			h.hookLock.Unlock()
-			return
-		}
-		h.timer = nil
-		h.hookLock.Unlock()
-
-		h.setter.EnforceMaxRunDurationTimeout(deadline)
-	}(timer, deadline)
-}
-
-func (h *maxRunDurationHook) stopTimerLocked() {
-	if h.timer == nil {
-		return
-	}
-
-	if !h.timer.Stop() {
-		select {
-		case <-h.timer.C:
-		default:
-		}
-	}
-	h.timer = nil
-}
-
-func fullyRunningSince(taskStates map[string]*structs.TaskState) (time.Time, bool) {
-	if len(taskStates) == 0 {
-		return time.Time{}, false
-	}
-
-	var latest time.Time
-	for _, ts := range taskStates {
-		if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() {
-			return time.Time{}, false
-		}
-		if ts.StartedAt.After(latest) {
-			latest = ts.StartedAt
-		}
-	}
-
-	if latest.IsZero() {
-		return time.Time{}, false
-	}
-
-	return latest, true
-}
diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go
new file mode 100644
index 00000000000..9390233452d
--- /dev/null
+++ b/client/allocrunner/tasklifecycle/max_run_duration.go
@@ -0,0 +1,161 @@
+// Copyright IBM Corp. 2015, 2025
+// SPDX-License-Identifier: BUSL-1.1
+
+package tasklifecycle
+
+import (
+	"sync"
+	"time"
+
+	"github.com/hashicorp/go-hclog"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+type MaxRunDurationSetter interface {
+	EnforceMaxRunDurationTimeout(time.Time)
+}
+
+// MaxRunDuration coordinates local enforcement of task group
+// `max_run_duration`.
+//
+// This component is alloc-scoped, not task-scoped. It observes the current
+// authoritative allocation plus task state snapshots and arms a timer once the
+// allocation is fully running. The timer is canceled and recomputed whenever
+// alloc or task state changes are observed.
+type MaxRunDuration struct {
+	mu sync.Mutex
+
+	alloc  *structs.Allocation
+	timer  *time.Timer
+	setter MaxRunDurationSetter
+	logger hclog.Logger
+}
+
+func NewMaxRunDuration(
+	logger hclog.Logger,
+	alloc *structs.Allocation,
+	setter MaxRunDurationSetter,
+) *MaxRunDuration {
+	return &MaxRunDuration{
+		alloc:  alloc,
+		setter: setter,
+		logger: logger.Named("max_run_duration"),
+	}
+}
+
+// SetAlloc updates the authoritative allocation used for
+// `max_run_duration` evaluation.
+func (m *MaxRunDuration) SetAlloc(alloc *structs.Allocation) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	m.alloc = alloc
+}
+
+// TaskStateUpdated recomputes the `max_run_duration` timer from the latest
+// alloc and task state snapshot.
+func (m *MaxRunDuration) TaskStateUpdated(taskStates map[string]*structs.TaskState) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	m.resetTimerLocked(taskStates)
+}
+
+// Stop cancels any active timer.
+func (m *MaxRunDuration) Stop() {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	m.stopTimerLocked()
+}
+
+func (m *MaxRunDuration) resetTimerLocked(taskStates map[string]*structs.TaskState) {
+	m.stopTimerLocked()
+
+	if m.alloc == nil {
+		return
+	}
+
+	// Only running allocations with a valid max_run_duration and a fully
+	// running task set should have an active timer.
+	if m.alloc.TerminalStatus() {
+		return
+	}
+
+	if m.alloc.DesiredStatus != "" && m.alloc.DesiredStatus != structs.AllocDesiredStatusRun {
+		return
+	}
+
+	maxRunDuration, ok := m.alloc.MaxRunDuration()
+	if !ok {
+		return
+	}
+
+	startedAt, ok := FullyRunningSince(taskStates)
+	if !ok {
+		return
+	}
+
+	deadline := startedAt.Add(maxRunDuration)
+	remaining := time.Until(deadline)
+	if remaining <= 0 {
+		go m.setter.EnforceMaxRunDurationTimeout(deadline)
+		return
+	}
+
+	timer := time.NewTimer(remaining)
+	m.timer = timer
+
+	go func(t *time.Timer, deadline time.Time) {
+		<-t.C
+
+		m.mu.Lock()
+		if m.timer != t {
+			m.mu.Unlock()
+			return
+		}
+		m.timer = nil
+		m.mu.Unlock()
+
+		m.setter.EnforceMaxRunDurationTimeout(deadline)
+	}(timer, deadline)
+}
+
+func (m *MaxRunDuration) stopTimerLocked() {
+	if m.timer == nil {
+		return
+	}
+
+	if !m.timer.Stop() {
+		select {
+		case <-m.timer.C:
+		default:
+		}
+	}
+	m.timer = nil
+}
+
+// FullyRunningSince returns the latest StartedAt timestamp across all task
+// states, but only when every known task state is running with a non-zero
+// start time.
+func FullyRunningSince(taskStates map[string]*structs.TaskState) (time.Time, bool) {
+	if len(taskStates) == 0 {
+		return time.Time{}, false
+	}
+
+	var latest time.Time
+	for _, ts := range taskStates {
+		if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() {
+			return time.Time{}, false
+		}
+		if ts.StartedAt.After(latest) {
+			latest = ts.StartedAt
+		}
+	}
+
+	if latest.IsZero() {
+		return time.Time{}, false
+	}
+
+	return latest, true
+}
diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go
new file mode 100644
index 00000000000..8d488a81dc1
--- /dev/null
+++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go
@@ -0,0 +1,300 @@
+// Copyright IBM Corp. 2015, 2025
+// SPDX-License-Identifier: BUSL-1.1
+
+package tasklifecycle
+
+import (
+	"testing"
+	"time"
+
+	log "github.com/hashicorp/go-hclog"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/shoenig/test/must"
+)
+
+type testMaxRunDurationSetter struct {
+	deadlines chan time.Time
+}
+
+func newTestMaxRunDurationSetter() *testMaxRunDurationSetter {
+	return &testMaxRunDurationSetter{
+		deadlines: make(chan time.Time, 8),
+	}
+}
+
+func (s *testMaxRunDurationSetter) EnforceMaxRunDurationTimeout(deadline time.Time) {
+	s.deadlines <- deadline
+}
+
+func TestMaxRunDuration_FullyRunningSince(t *testing.T) {
+	t.Parallel()
+
+	now := time.Now().UTC()
+	earlier := now.Add(-2 * time.Second)
+	later := now.Add(-1 * time.Second)
+
+	got, ok := FullyRunningSince(map[string]*structs.TaskState{
+		"a": {
+			State:     structs.TaskStateRunning,
+			StartedAt: earlier,
+		},
+		"b": {
+			State:     structs.TaskStateRunning,
+			StartedAt: later,
+		},
+	})
+
+	must.True(t, ok)
+	must.Eq(t, later, got)
+}
+
+func TestMaxRunDuration_FullyRunningSince_FalseWhenEmpty(t *testing.T) {
+	t.Parallel()
+
+	got, ok := FullyRunningSince(nil)
+	must.False(t, ok)
+	must.Eq(t, time.Time{}, got)
+}
+
+func TestMaxRunDuration_FullyRunningSince_FalseWhenTaskNotRunning(t *testing.T) {
+	t.Parallel()
+
+	_, ok := FullyRunningSince(map[string]*structs.TaskState{
+		"a": {
+			State:     structs.TaskStateRunning,
+			StartedAt: time.Now().Add(-time.Second).UTC(),
+		},
+		"b": {
+			State: structs.TaskStatePending,
+		},
+	})
+
+	must.False(t, ok)
+}
+
+func TestMaxRunDuration_FullyRunningSince_FalseWhenStartedAtMissing(t *testing.T) {
+	t.Parallel()
+
+	_, ok := FullyRunningSince(map[string]*structs.TaskState{
+		"a": {
+			State: structs.TaskStateRunning,
+		},
+	})
+
+	must.False(t, ok)
+}
+
+func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 50 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	setter := newTestMaxRunDurationSetter()
+	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+
+	startedAt := time.Now().UTC()
+	m.TaskStateUpdated(map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: startedAt,
+		},
+	})
+
+	select {
+	case deadline := <-setter.deadlines:
+		must.Eq(t, startedAt.Add(maxRunDuration), deadline)
+	case <-time.After(500 * time.Millisecond):
+		t.Fatal("timed out waiting for max_run_duration deadline")
+	}
+}
+
+func TestMaxRunDuration_TaskStateUpdated_ImmediateEnforcementWhenExpired(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 25 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	setter := newTestMaxRunDurationSetter()
+	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+
+	startedAt := time.Now().Add(-2 * maxRunDuration).UTC()
+	m.TaskStateUpdated(map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: startedAt,
+		},
+	})
+
+	select {
+	case deadline := <-setter.deadlines:
+		must.Eq(t, startedAt.Add(maxRunDuration), deadline)
+	case <-time.After(250 * time.Millisecond):
+		t.Fatal("timed out waiting for immediate max_run_duration enforcement")
+	}
+}
+
+func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenNotFullyRunning(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 25 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	setter := newTestMaxRunDurationSetter()
+	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+
+	m.TaskStateUpdated(map[string]*structs.TaskState{
+		"web": {
+			State: structs.TaskStatePending,
+		},
+	})
+
+	select {
+	case deadline := <-setter.deadlines:
+		t.Fatalf("unexpected deadline fired: %v", deadline)
+	case <-time.After(100 * time.Millisecond):
+	}
+}
+
+func TestMaxRunDuration_TaskStateUpdated_DoesNotFireForNonBatchJobs(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 25 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeService
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	setter := newTestMaxRunDurationSetter()
+	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+
+	m.TaskStateUpdated(map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: time.Now().UTC(),
+		},
+	})
+
+	select {
+	case deadline := <-setter.deadlines:
+		t.Fatalf("unexpected deadline fired: %v", deadline)
+	case <-time.After(100 * time.Millisecond):
+	}
+}
+
+func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenDesiredStatusNotRun(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 25 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+	alloc.DesiredStatus = structs.AllocDesiredStatusStop
+
+	setter := newTestMaxRunDurationSetter()
+	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+
+	m.TaskStateUpdated(map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: time.Now().UTC(),
+		},
+	})
+
+	select {
+	case deadline := <-setter.deadlines:
+		t.Fatalf("unexpected deadline fired: %v", deadline)
+	case <-time.After(100 * time.Millisecond):
+	}
+}
+
+func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocTerminal(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 25 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+	alloc.ClientStatus = structs.AllocClientStatusComplete
+
+	setter := newTestMaxRunDurationSetter()
+	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+
+	m.TaskStateUpdated(map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: time.Now().UTC(),
+		},
+	})
+
+	select {
+	case deadline := <-setter.deadlines:
+		t.Fatalf("unexpected deadline fired: %v", deadline)
+	case <-time.After(100 * time.Millisecond):
+	}
+}
+
+func TestMaxRunDuration_SetAlloc_UsesLatestAllocConfig(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.BatchAlloc()
+	initial := 200 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &initial
+
+	setter := newTestMaxRunDurationSetter()
+	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+
+	updated := alloc.Copy()
+	latest := 40 * time.Millisecond
+	updated.Job.TaskGroups[0].MaxRunDuration = &latest
+	m.SetAlloc(updated)
+
+	startedAt := time.Now().UTC()
+	m.TaskStateUpdated(map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: startedAt,
+		},
+	})
+
+	select {
+	case deadline := <-setter.deadlines:
+		must.Eq(t, startedAt.Add(latest), deadline)
+	case <-time.After(500 * time.Millisecond):
+		t.Fatal("timed out waiting for updated max_run_duration deadline")
+	}
+}
+
+func TestMaxRunDuration_Stop_CancelsTimer(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 150 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	setter := newTestMaxRunDurationSetter()
+	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+
+	m.TaskStateUpdated(map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: time.Now().UTC(),
+		},
+	})
+
+	m.Stop()
+
+	select {
+	case deadline := <-setter.deadlines:
+		t.Fatalf("unexpected deadline fired after stop: %v", deadline)
+	case <-time.After(250 * time.Millisecond):
+	}
+}

From ab7565d163e48576c85d06636e17feda230756af Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Thu, 9 Apr 2026 17:31:10 +0200
Subject: [PATCH 16/37] refinements

---
 client/allocrunner/alloc_runner.go              |  2 +-
 client/allocrunner/alloc_runner_test.go         |  2 +-
 .../tasklifecycle/max_run_duration.go           |  4 ----
 .../tasklifecycle/max_run_duration_test.go      | 17 ++++++++---------
 4 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 6a00b97efb9..392e1b5f43c 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -296,7 +296,7 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e
 	)
 
 	ar.taskCoordinator = tasklifecycle.NewCoordinator(ar.logger, tg.Tasks, ar.waitCh)
-	ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.logger, ar.alloc, ar)
+	ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.alloc, ar)
 
 	shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background())
 	ar.shutdownDelayCtx = shutdownDelayCtx
diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go
index 0977e9cff1e..94446849ada 100644
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@@ -2717,7 +2717,7 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) {
 	})
 }
 
-func TestAllocRunner_MaxRunDuration_PreservesOriginalJobAcrossAllocUpdatesWithoutJob(t *testing.T) {
+func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossAllocUpdatesWithoutJob(t *testing.T) {
 	ci.Parallel(t)
 
 	alloc := mock.BatchAlloc()
diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go
index 9390233452d..144720da702 100644
--- a/client/allocrunner/tasklifecycle/max_run_duration.go
+++ b/client/allocrunner/tasklifecycle/max_run_duration.go
@@ -7,7 +7,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/hashicorp/go-hclog"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
@@ -28,18 +27,15 @@ type MaxRunDuration struct {
 	alloc  *structs.Allocation
 	timer  *time.Timer
 	setter MaxRunDurationSetter
-	logger hclog.Logger
 }
 
 func NewMaxRunDuration(
-	logger hclog.Logger,
 	alloc *structs.Allocation,
 	setter MaxRunDurationSetter,
 ) *MaxRunDuration {
 	return &MaxRunDuration{
 		alloc:  alloc,
 		setter: setter,
-		logger: logger.Named("max_run_duration"),
 	}
 }
 
diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go
index 8d488a81dc1..7d70c1a92c7 100644
--- a/client/allocrunner/tasklifecycle/max_run_duration_test.go
+++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go
@@ -7,7 +7,6 @@ import (
 	"testing"
 	"time"
 
-	log "github.com/hashicorp/go-hclog"
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/shoenig/test/must"
@@ -94,7 +93,7 @@ func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) {
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
 	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+	m := NewMaxRunDuration(alloc, setter)
 
 	startedAt := time.Now().UTC()
 	m.TaskStateUpdated(map[string]*structs.TaskState{
@@ -121,7 +120,7 @@ func TestMaxRunDuration_TaskStateUpdated_ImmediateEnforcementWhenExpired(t *test
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
 	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+	m := NewMaxRunDuration(alloc, setter)
 
 	startedAt := time.Now().Add(-2 * maxRunDuration).UTC()
 	m.TaskStateUpdated(map[string]*structs.TaskState{
@@ -148,7 +147,7 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenNotFullyRunning(t *testi
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
 	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+	m := NewMaxRunDuration(alloc, setter)
 
 	m.TaskStateUpdated(map[string]*structs.TaskState{
 		"web": {
@@ -172,7 +171,7 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireForNonBatchJobs(t *testing.T
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
 	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+	m := NewMaxRunDuration(alloc, setter)
 
 	m.TaskStateUpdated(map[string]*structs.TaskState{
 		"web": {
@@ -198,7 +197,7 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenDesiredStatusNotRun(t *t
 	alloc.DesiredStatus = structs.AllocDesiredStatusStop
 
 	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+	m := NewMaxRunDuration(alloc, setter)
 
 	m.TaskStateUpdated(map[string]*structs.TaskState{
 		"web": {
@@ -224,7 +223,7 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocTerminal(t *testing
 	alloc.ClientStatus = structs.AllocClientStatusComplete
 
 	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+	m := NewMaxRunDuration(alloc, setter)
 
 	m.TaskStateUpdated(map[string]*structs.TaskState{
 		"web": {
@@ -249,7 +248,7 @@ func TestMaxRunDuration_SetAlloc_UsesLatestAllocConfig(t *testing.T) {
 	alloc.Job.TaskGroups[0].MaxRunDuration = &initial
 
 	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+	m := NewMaxRunDuration(alloc, setter)
 
 	updated := alloc.Copy()
 	latest := 40 * time.Millisecond
@@ -281,7 +280,7 @@ func TestMaxRunDuration_Stop_CancelsTimer(t *testing.T) {
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
 	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(log.NewNullLogger(), alloc, setter)
+	m := NewMaxRunDuration(alloc, setter)
 
 	m.TaskStateUpdated(map[string]*structs.TaskState{
 		"web": {

From c44ba70361c22b3f904b7d2e186bfc2f048dfa4b Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Thu, 9 Apr 2026 17:35:21 +0200
Subject: [PATCH 17/37] corrections

---
 client/allocrunner/alloc_runner_test.go | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go
index 94446849ada..4b2c42735b2 100644
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@@ -2702,7 +2702,7 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) {
 	testutil.WaitForResult(func() (bool, error) {
 		state := ar.AllocState()
 		if state == nil {
-			return false, fmt.Errorf("No alloc state")
+			return false, fmt.Errorf("no alloc state")
 		}
 		if state.ClientStatus != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete)
@@ -2710,6 +2710,9 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) {
 		if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration {
 			return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration)
 		}
+		if !state.MaxRunDurationExceeded {
+			return false, fmt.Errorf("max run duration was not marked exceeded")
+		}
 		return true, nil
 	}, func(err error) {
 		state := ar.AllocState()
@@ -2717,7 +2720,7 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) {
 	})
 }
 
-func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossAllocUpdatesWithoutJob(t *testing.T) {
+func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossPartialAllocUpdate(t *testing.T) {
 	ci.Parallel(t)
 
 	alloc := mock.BatchAlloc()
@@ -2743,7 +2746,7 @@ func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossAllocUpdatesWithoutJob(
 	testutil.WaitForResult(func() (bool, error) {
 		state := ar.AllocState()
 		if state == nil {
-			return false, fmt.Errorf("No alloc state")
+			return false, fmt.Errorf("no alloc state")
 		}
 		if state.ClientStatus != structs.AllocClientStatusRunning {
 			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusRunning)
@@ -2761,7 +2764,7 @@ func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossAllocUpdatesWithoutJob(
 	testutil.WaitForResult(func() (bool, error) {
 		state := ar.AllocState()
 		if state == nil {
-			return false, fmt.Errorf("No alloc state")
+			return false, fmt.Errorf("no alloc state")
 		}
 		if state.ClientStatus != structs.AllocClientStatusComplete {
 			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete)
@@ -2769,10 +2772,13 @@ func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossAllocUpdatesWithoutJob(
 		if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration {
 			return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration)
 		}
+		if !state.MaxRunDurationExceeded {
+			return false, fmt.Errorf("max run duration was not marked exceeded")
+		}
 		return true, nil
 	}, func(err error) {
 		state := ar.AllocState()
-		t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement after alloc update without job: %v; state=%#v", err, state)
+		t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement after partial alloc update: %v; state=%#v", err, state)
 	})
 }
 

From 9abba4f26aca9cee5ec25bbf7218a389399a3197 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Thu, 9 Apr 2026 17:38:37 +0200
Subject: [PATCH 18/37] test refactror

---
 .../tasklifecycle/max_run_duration_test.go    | 207 ++++++------------
 1 file changed, 69 insertions(+), 138 deletions(-)

diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go
index 7d70c1a92c7..5050677f358 100644
--- a/client/allocrunner/tasklifecycle/max_run_duration_test.go
+++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go
@@ -48,15 +48,7 @@ func TestMaxRunDuration_FullyRunningSince(t *testing.T) {
 	must.Eq(t, later, got)
 }
 
-func TestMaxRunDuration_FullyRunningSince_FalseWhenEmpty(t *testing.T) {
-	t.Parallel()
-
-	got, ok := FullyRunningSince(nil)
-	must.False(t, ok)
-	must.Eq(t, time.Time{}, got)
-}
-
-func TestMaxRunDuration_FullyRunningSince_FalseWhenTaskNotRunning(t *testing.T) {
+func TestMaxRunDuration_FullyRunningSince_FalseWhenNotFullyRunning(t *testing.T) {
 	t.Parallel()
 
 	_, ok := FullyRunningSince(map[string]*structs.TaskState{
@@ -72,18 +64,6 @@ func TestMaxRunDuration_FullyRunningSince_FalseWhenTaskNotRunning(t *testing.T)
 	must.False(t, ok)
 }
 
-func TestMaxRunDuration_FullyRunningSince_FalseWhenStartedAtMissing(t *testing.T) {
-	t.Parallel()
-
-	_, ok := FullyRunningSince(map[string]*structs.TaskState{
-		"a": {
-			State: structs.TaskStateRunning,
-		},
-	})
-
-	must.False(t, ok)
-}
-
 func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) {
 	t.Parallel()
 
@@ -111,131 +91,82 @@ func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) {
 	}
 }
 
-func TestMaxRunDuration_TaskStateUpdated_ImmediateEnforcementWhenExpired(t *testing.T) {
+func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocNotEligible(t *testing.T) {
 	t.Parallel()
 
-	alloc := mock.BatchAlloc()
-	maxRunDuration := 25 * time.Millisecond
-	alloc.Job.Type = structs.JobTypeBatch
-	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-
-	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(alloc, setter)
-
-	startedAt := time.Now().Add(-2 * maxRunDuration).UTC()
-	m.TaskStateUpdated(map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: startedAt,
+	cases := []struct {
+		name  string
+		alloc *structs.Allocation
+	}{
+		{
+			name: "not fully running",
+			alloc: func() *structs.Allocation {
+				alloc := mock.BatchAlloc()
+				maxRunDuration := 25 * time.Millisecond
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+				return alloc
+			}(),
 		},
-	})
-
-	select {
-	case deadline := <-setter.deadlines:
-		must.Eq(t, startedAt.Add(maxRunDuration), deadline)
-	case <-time.After(250 * time.Millisecond):
-		t.Fatal("timed out waiting for immediate max_run_duration enforcement")
-	}
-}
-
-func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenNotFullyRunning(t *testing.T) {
-	t.Parallel()
-
-	alloc := mock.BatchAlloc()
-	maxRunDuration := 25 * time.Millisecond
-	alloc.Job.Type = structs.JobTypeBatch
-	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-
-	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(alloc, setter)
-
-	m.TaskStateUpdated(map[string]*structs.TaskState{
-		"web": {
-			State: structs.TaskStatePending,
+		{
+			name: "non-batch job",
+			alloc: func() *structs.Allocation {
+				alloc := mock.BatchAlloc()
+				maxRunDuration := 25 * time.Millisecond
+				alloc.Job.Type = structs.JobTypeService
+				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+				return alloc
+			}(),
 		},
-	})
-
-	select {
-	case deadline := <-setter.deadlines:
-		t.Fatalf("unexpected deadline fired: %v", deadline)
-	case <-time.After(100 * time.Millisecond):
-	}
-}
-
-func TestMaxRunDuration_TaskStateUpdated_DoesNotFireForNonBatchJobs(t *testing.T) {
-	t.Parallel()
-
-	alloc := mock.BatchAlloc()
-	maxRunDuration := 25 * time.Millisecond
-	alloc.Job.Type = structs.JobTypeService
-	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-
-	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(alloc, setter)
-
-	m.TaskStateUpdated(map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: time.Now().UTC(),
+		{
+			name: "desired status not run",
+			alloc: func() *structs.Allocation {
+				alloc := mock.BatchAlloc()
+				maxRunDuration := 25 * time.Millisecond
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+				alloc.DesiredStatus = structs.AllocDesiredStatusStop
+				return alloc
+			}(),
 		},
-	})
-
-	select {
-	case deadline := <-setter.deadlines:
-		t.Fatalf("unexpected deadline fired: %v", deadline)
-	case <-time.After(100 * time.Millisecond):
-	}
-}
-
-func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenDesiredStatusNotRun(t *testing.T) {
-	t.Parallel()
-
-	alloc := mock.BatchAlloc()
-	maxRunDuration := 25 * time.Millisecond
-	alloc.Job.Type = structs.JobTypeBatch
-	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-	alloc.DesiredStatus = structs.AllocDesiredStatusStop
-
-	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(alloc, setter)
-
-	m.TaskStateUpdated(map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: time.Now().UTC(),
+		{
+			name: "terminal alloc",
+			alloc: func() *structs.Allocation {
+				alloc := mock.BatchAlloc()
+				maxRunDuration := 25 * time.Millisecond
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+				alloc.ClientStatus = structs.AllocClientStatusComplete
+				return alloc
+			}(),
 		},
-	})
-
-	select {
-	case deadline := <-setter.deadlines:
-		t.Fatalf("unexpected deadline fired: %v", deadline)
-	case <-time.After(100 * time.Millisecond):
 	}
-}
-
-func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocTerminal(t *testing.T) {
-	t.Parallel()
 
-	alloc := mock.BatchAlloc()
-	maxRunDuration := 25 * time.Millisecond
-	alloc.Job.Type = structs.JobTypeBatch
-	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-	alloc.ClientStatus = structs.AllocClientStatusComplete
-
-	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(alloc, setter)
-
-	m.TaskStateUpdated(map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: time.Now().UTC(),
-		},
-	})
-
-	select {
-	case deadline := <-setter.deadlines:
-		t.Fatalf("unexpected deadline fired: %v", deadline)
-	case <-time.After(100 * time.Millisecond):
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			setter := newTestMaxRunDurationSetter()
+			m := NewMaxRunDuration(tc.alloc, setter)
+
+			taskStates := map[string]*structs.TaskState{
+				"web": {
+					State:     structs.TaskStateRunning,
+					StartedAt: time.Now().UTC(),
+				},
+			}
+			if tc.name == "not fully running" {
+				taskStates["web"] = &structs.TaskState{State: structs.TaskStatePending}
+			}
+
+			m.TaskStateUpdated(taskStates)
+
+			select {
+			case deadline := <-setter.deadlines:
+				t.Fatalf("unexpected deadline fired: %v", deadline)
+			case <-time.After(100 * time.Millisecond):
+			}
+		})
 	}
 }
 

From 0aecff5175fefb183e851fbe87fe1f5dbbaef24a Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Thu, 9 Apr 2026 17:57:01 +0200
Subject: [PATCH 19/37] tidying up

---
 client/allocrunner/alloc_runner.go | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 392e1b5f43c..1e16ded0f34 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -1094,22 +1094,8 @@ func (ar *allocRunner) handleAllocUpdates() {
 // If there is already a pending update it will be discarded and replaced by
 // the latest update.
 func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
-	current := ar.Alloc()
-
 	// Detect Stop updates
-	stopping := !current.TerminalStatus() && update.TerminalStatus()
-
-	if update.Job == nil && current != nil && current.Job != nil {
-		update = update.Copy()
-		update.Job = current.Job.Copy()
-		if update.TaskGroup == "" {
-			update.TaskGroup = current.TaskGroup
-		}
-		if update.DesiredStatus == "" {
-			update.DesiredStatus = current.DesiredStatus
-		}
-
-	}
+	stopping := !ar.Alloc().TerminalStatus() && update.TerminalStatus()
 
 	// Update ar.alloc
 	ar.setAlloc(update)
@@ -1142,12 +1128,10 @@ func (ar *allocRunner) EnforceMaxRunDurationTimeout(deadline time.Time) {
 	now := time.Now().UTC()
 
 	if ar.isShuttingDown() {
-
 		return
 	}
 
 	if now.Before(deadline) {
-
 		return
 	}
 

From f4545261a6ff232803b53067dffc43616b3d0464 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Thu, 9 Apr 2026 17:57:22 +0200
Subject: [PATCH 20/37] more tidying up

---
 client/allocrunner/alloc_runner_test.go | 62 -------------------------
 1 file changed, 62 deletions(-)

diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go
index 4b2c42735b2..01dce5c61d3 100644
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@@ -2720,68 +2720,6 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) {
 	})
 }
 
-func TestAllocRunner_MaxRunDuration_PreservesConfigAcrossPartialAllocUpdate(t *testing.T) {
-	ci.Parallel(t)
-
-	alloc := mock.BatchAlloc()
-	task := alloc.Job.TaskGroups[0].Tasks[0]
-	task.Driver = "mock_driver"
-	task.Config = map[string]interface{}{
-		"run_for": "10s",
-	}
-	task.KillTimeout = 10 * time.Millisecond
-	maxRunDuration := 50 * time.Millisecond
-	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-
-	conf, cleanup := testAllocRunnerConfig(t, alloc)
-	defer cleanup()
-
-	arIface, err := NewAllocRunner(conf)
-	must.NoError(t, err)
-	ar := arIface.(*allocRunner)
-
-	go ar.Run()
-	defer destroy(ar)
-
-	testutil.WaitForResult(func() (bool, error) {
-		state := ar.AllocState()
-		if state == nil {
-			return false, fmt.Errorf("no alloc state")
-		}
-		if state.ClientStatus != structs.AllocClientStatusRunning {
-			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusRunning)
-		}
-		return true, nil
-	}, func(err error) {
-		state := ar.AllocState()
-		t.Fatalf("timed out waiting for alloc runner to start: %v; state=%#v", err, state)
-	})
-
-	update := ar.Alloc().CopySkipJob()
-	update.AllocModifyIndex++
-	ar.Update(update)
-
-	testutil.WaitForResult(func() (bool, error) {
-		state := ar.AllocState()
-		if state == nil {
-			return false, fmt.Errorf("no alloc state")
-		}
-		if state.ClientStatus != structs.AllocClientStatusComplete {
-			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete)
-		}
-		if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration {
-			return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration)
-		}
-		if !state.MaxRunDurationExceeded {
-			return false, fmt.Errorf("max run duration was not marked exceeded")
-		}
-		return true, nil
-	}, func(err error) {
-		state := ar.AllocState()
-		t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement after partial alloc update: %v; state=%#v", err, state)
-	})
-}
-
 func TestAllocRunner_setHookStatsHandler(t *testing.T) {
 	ci.Parallel(t)
 

From 9b735a022a4c143b61ac2565a7d73a172e2729cd Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Thu, 9 Apr 2026 18:24:19 +0200
Subject: [PATCH 21/37] bugfix

---
 .../tasklifecycle/max_run_duration.go         | 42 +++++++++++++----
 .../tasklifecycle/max_run_duration_test.go    | 47 +++++++++++++++++++
 2 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go
index 144720da702..5a450cf54f6 100644
--- a/client/allocrunner/tasklifecycle/max_run_duration.go
+++ b/client/allocrunner/tasklifecycle/max_run_duration.go
@@ -24,9 +24,11 @@ type MaxRunDurationSetter interface {
 type MaxRunDuration struct {
 	mu sync.Mutex
 
-	alloc  *structs.Allocation
-	timer  *time.Timer
-	setter MaxRunDurationSetter
+	alloc       *structs.Allocation
+	deadline    time.Time
+	hasDeadline bool
+	timer       *time.Timer
+	setter      MaxRunDurationSetter
 }
 
 func NewMaxRunDuration(
@@ -54,7 +56,8 @@ func (m *MaxRunDuration) TaskStateUpdated(taskStates map[string]*structs.TaskSta
 	m.mu.Lock()
 	defer m.mu.Unlock()
 
-	m.resetTimerLocked(taskStates)
+	m.updateDeadlineLocked(taskStates)
+	m.resetTimerLocked()
 }
 
 // Stop cancels any active timer.
@@ -65,25 +68,35 @@ func (m *MaxRunDuration) Stop() {
 	m.stopTimerLocked()
 }
 
-func (m *MaxRunDuration) resetTimerLocked(taskStates map[string]*structs.TaskState) {
-	m.stopTimerLocked()
-
+func (m *MaxRunDuration) updateDeadlineLocked(taskStates map[string]*structs.TaskState) {
 	if m.alloc == nil {
+		m.deadline = time.Time{}
+		m.hasDeadline = false
 		return
 	}
 
 	// Only running allocations with a valid max_run_duration and a fully
-	// running task set should have an active timer.
+	// running task set should establish a deadline.
 	if m.alloc.TerminalStatus() {
+		m.deadline = time.Time{}
+		m.hasDeadline = false
 		return
 	}
 
 	if m.alloc.DesiredStatus != "" && m.alloc.DesiredStatus != structs.AllocDesiredStatusRun {
+		m.deadline = time.Time{}
+		m.hasDeadline = false
 		return
 	}
 
 	maxRunDuration, ok := m.alloc.MaxRunDuration()
 	if !ok {
+		m.deadline = time.Time{}
+		m.hasDeadline = false
+		return
+	}
+
+	if m.hasDeadline {
 		return
 	}
 
@@ -92,7 +105,18 @@ func (m *MaxRunDuration) resetTimerLocked(taskStates map[string]*structs.TaskSta
 		return
 	}
 
-	deadline := startedAt.Add(maxRunDuration)
+	m.deadline = startedAt.Add(maxRunDuration)
+	m.hasDeadline = true
+}
+
+func (m *MaxRunDuration) resetTimerLocked() {
+	m.stopTimerLocked()
+
+	if !m.hasDeadline {
+		return
+	}
+
+	deadline := m.deadline
 	remaining := time.Until(deadline)
 	if remaining <= 0 {
 		go m.setter.EnforceMaxRunDurationTimeout(deadline)
diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go
index 5050677f358..649c5bd88a2 100644
--- a/client/allocrunner/tasklifecycle/max_run_duration_test.go
+++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go
@@ -91,6 +91,53 @@ func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) {
 	}
 }
 
+func TestMaxRunDuration_TaskStateUpdated_PreservesDeadlineWhenOneTaskFinishesEarly(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 50 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
+	task2.Name = "web2"
+	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
+
+	setter := newTestMaxRunDurationSetter()
+	m := NewMaxRunDuration(alloc, setter)
+
+	startedAt := time.Now().UTC()
+	m.TaskStateUpdated(map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: startedAt,
+		},
+		"web2": {
+			State:     structs.TaskStateRunning,
+			StartedAt: startedAt,
+		},
+	})
+
+	m.TaskStateUpdated(map[string]*structs.TaskState{
+		"web": {
+			State:     structs.TaskStateRunning,
+			StartedAt: startedAt,
+		},
+		"web2": {
+			State:      structs.TaskStateDead,
+			StartedAt:  startedAt,
+			FinishedAt: startedAt.Add(2 * time.Millisecond),
+		},
+	})
+
+	select {
+	case deadline := <-setter.deadlines:
+		must.Eq(t, startedAt.Add(maxRunDuration), deadline)
+	case <-time.After(500 * time.Millisecond):
+		t.Fatal("timed out waiting for max_run_duration deadline after one task finished early")
+	}
+}
+
 func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocNotEligible(t *testing.T) {
 	t.Parallel()
 

From 7e0509a7c80a2e817eaef6540ab0e993d9a671e6 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Mon, 13 Apr 2026 10:53:52 +0200
Subject: [PATCH 22/37] unify fully running since

---
 .../tasklifecycle/max_run_duration.go         | 31 ++-----------------
 .../tasklifecycle/max_run_duration_test.go    |  4 +--
 nomad/state/state_store.go                    | 24 ++++----------
 nomad/state/state_store_test.go               | 21 ++-----------
 nomad/structs/alloc.go                        | 16 +++++++---
 5 files changed, 25 insertions(+), 71 deletions(-)

diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go
index 5a450cf54f6..127a443b82c 100644
--- a/client/allocrunner/tasklifecycle/max_run_duration.go
+++ b/client/allocrunner/tasklifecycle/max_run_duration.go
@@ -56,7 +56,7 @@ func (m *MaxRunDuration) TaskStateUpdated(taskStates map[string]*structs.TaskSta
 	m.mu.Lock()
 	defer m.mu.Unlock()
 
-	m.updateDeadlineLocked(taskStates)
+	m.setDeadline(taskStates)
 	m.resetTimerLocked()
 }
 
@@ -68,7 +68,7 @@ func (m *MaxRunDuration) Stop() {
 	m.stopTimerLocked()
 }
 
-func (m *MaxRunDuration) updateDeadlineLocked(taskStates map[string]*structs.TaskState) {
+func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) {
 	if m.alloc == nil {
 		m.deadline = time.Time{}
 		m.hasDeadline = false
@@ -100,7 +100,7 @@ func (m *MaxRunDuration) updateDeadlineLocked(taskStates map[string]*structs.Tas
 		return
 	}
 
-	startedAt, ok := FullyRunningSince(taskStates)
+	startedAt, ok := structs.FullyRunningSince(taskStates)
 	if !ok {
 		return
 	}
@@ -154,28 +154,3 @@ func (m *MaxRunDuration) stopTimerLocked() {
 	}
 	m.timer = nil
 }
-
-// FullyRunningSince returns the latest StartedAt timestamp across all task
-// states, but only when every known task state is running with a non-zero
-// start time.
-func FullyRunningSince(taskStates map[string]*structs.TaskState) (time.Time, bool) {
-	if len(taskStates) == 0 {
-		return time.Time{}, false
-	}
-
-	var latest time.Time
-	for _, ts := range taskStates {
-		if ts == nil || ts.State != structs.TaskStateRunning || ts.StartedAt.IsZero() {
-			return time.Time{}, false
-		}
-		if ts.StartedAt.After(latest) {
-			latest = ts.StartedAt
-		}
-	}
-
-	if latest.IsZero() {
-		return time.Time{}, false
-	}
-
-	return latest, true
-}
diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go
index 649c5bd88a2..c9c9d8b6821 100644
--- a/client/allocrunner/tasklifecycle/max_run_duration_test.go
+++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go
@@ -33,7 +33,7 @@ func TestMaxRunDuration_FullyRunningSince(t *testing.T) {
 	earlier := now.Add(-2 * time.Second)
 	later := now.Add(-1 * time.Second)
 
-	got, ok := FullyRunningSince(map[string]*structs.TaskState{
+	got, ok := structs.FullyRunningSince(map[string]*structs.TaskState{
 		"a": {
 			State:     structs.TaskStateRunning,
 			StartedAt: earlier,
@@ -51,7 +51,7 @@ func TestMaxRunDuration_FullyRunningSince(t *testing.T) {
 func TestMaxRunDuration_FullyRunningSince_FalseWhenNotFullyRunning(t *testing.T) {
 	t.Parallel()
 
-	_, ok := FullyRunningSince(map[string]*structs.TaskState{
+	_, ok := structs.FullyRunningSince(map[string]*structs.TaskState{
 		"a": {
 			State:     structs.TaskStateRunning,
 			StartedAt: time.Now().Add(-time.Second).UTC(),
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 2232dd6ca82..f660d9ad94d 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -4054,9 +4054,7 @@ func (s *StateStore) nestedUpdateAllocFromClient(txn *txn, index uint64, alloc *
 
 	// Pull in anything the client is the authority on
 	copyAlloc.ClientStatus = alloc.ClientStatus
-	if alloc.ClientDescription != "" {
-		copyAlloc.ClientDescription = alloc.ClientDescription
-	}
+	copyAlloc.ClientDescription = alloc.ClientDescription
 	copyAlloc.TaskStates = alloc.TaskStates
 	copyAlloc.NetworkStatus = alloc.NetworkStatus
 
@@ -4290,22 +4288,12 @@ func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation
 			// Keep the clients task states
 			alloc.TaskStates = exist.TaskStates
 
-			// If the scheduler is marking this allocation as lost, unknown, or another
-			// explicit terminal client status, preserve the scheduler-provided status
-			// and description instead of reusing the existing allocation values.
-			switch alloc.ClientStatus {
-			case structs.AllocClientStatusLost,
-				structs.AllocClientStatusUnknown,
-				structs.AllocClientStatusFailed,
-				structs.AllocClientStatusComplete:
-				if alloc.ClientDescription == "" {
-					alloc.ClientDescription = exist.ClientDescription
-				}
-			default:
+			// If the scheduler is marking this allocation as lost or unknown we do not
+			// want to reuse the status of the existing allocation.
+			if alloc.ClientStatus != structs.AllocClientStatusLost &&
+				alloc.ClientStatus != structs.AllocClientStatusUnknown {
 				alloc.ClientStatus = exist.ClientStatus
-				if alloc.ClientDescription == "" {
-					alloc.ClientDescription = exist.ClientDescription
-				}
+				alloc.ClientDescription = exist.ClientDescription
 			}
 
 			// The job has been denormalized so re-attach the original job
diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go
index ba6ae7a26c1..a04e1262119 100644
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@@ -171,15 +171,6 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) {
 		DesiredDescription: "desired desc",
 		ClientStatus:       structs.AllocClientStatusLost,
 	}
-
-	timeoutStoppedAlloc := mock.Alloc()
-	timeoutStoppedAlloc.Job = job
-	timeoutStoppedAlloc.JobID = job.ID
-	timeoutStoppedAllocDiff := &structs.AllocationDiff{
-		ID:                 timeoutStoppedAlloc.ID,
-		DesiredDescription: structs.AllocTimeoutReasonMaxRunDuration,
-		ClientStatus:       structs.AllocClientStatusFailed,
-	}
 	preemptedAlloc := mock.Alloc()
 	preemptedAlloc.Job = job
 	preemptedAlloc.JobID = job.ID
@@ -190,7 +181,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) {
 
 	must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 900, nil, job))
 	must.NoError(t, state.UpsertAllocs(
-		structs.MsgTypeTestSetup, 999, []*structs.Allocation{stoppedAlloc, timeoutStoppedAlloc, preemptedAlloc}))
+		structs.MsgTypeTestSetup, 999, []*structs.Allocation{stoppedAlloc, preemptedAlloc}))
 
 	// modify job and ensure that stopped and preempted alloc point to original Job
 	mJob := job.Copy()
@@ -207,7 +198,7 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) {
 	// Create a plan result
 	res := structs.ApplyPlanResultsRequest{
 		AllocsUpdated:   []*structs.Allocation{alloc},
-		AllocsStopped:   []*structs.AllocationDiff{stoppedAllocDiff, timeoutStoppedAllocDiff},
+		AllocsStopped:   []*structs.AllocationDiff{stoppedAllocDiff},
 		Job:             mJob,
 		EvalID:          eval.ID,
 		AllocsPreempted: []*structs.AllocationDiff{preemptedAllocDiff},
@@ -236,14 +227,6 @@ func TestStateStore_UpsertPlanResults_AllocationsDenormalized(t *testing.T) {
 	test.Eq(t, planModifyIndex, updatedStoppedAlloc.AllocModifyIndex)
 	test.Eq(t, job.TaskGroups, updatedStoppedAlloc.Job.TaskGroups)
 
-	updatedTimeoutStoppedAlloc, err := state.AllocByID(ws, timeoutStoppedAlloc.ID)
-	must.NoError(t, err)
-	test.Eq(t, timeoutStoppedAllocDiff.DesiredDescription, updatedTimeoutStoppedAlloc.DesiredDescription)
-	test.Eq(t, structs.AllocDesiredStatusStop, updatedTimeoutStoppedAlloc.DesiredStatus)
-	test.Eq(t, timeoutStoppedAllocDiff.ClientStatus, updatedTimeoutStoppedAlloc.ClientStatus)
-	test.Eq(t, planModifyIndex, updatedTimeoutStoppedAlloc.AllocModifyIndex)
-	test.Eq(t, job.TaskGroups, updatedTimeoutStoppedAlloc.Job.TaskGroups)
-
 	updatedPreemptedAlloc, err := state.AllocByID(ws, preemptedAlloc.ID)
 	must.NoError(t, err)
 	test.Eq(t, structs.AllocDesiredStatusEvict, updatedPreemptedAlloc.DesiredStatus)
diff --git a/nomad/structs/alloc.go b/nomad/structs/alloc.go
index 7af3e6683b8..e662b634175 100644
--- a/nomad/structs/alloc.go
+++ b/nomad/structs/alloc.go
@@ -384,14 +384,14 @@ func (a *Allocation) MaxRunDuration() (time.Duration, bool) {
 // FullyRunningSince returns the latest StartedAt timestamp across all task
 // states, but only when every known task state is running with a non-zero start
 // time.
-func (a *Allocation) FullyRunningSince() (time.Time, bool) {
-	if a == nil {
+func FullyRunningSince(taskStates map[string]*TaskState) (time.Time, bool) {
+	if len(taskStates) == 0 {
 		return time.Time{}, false
 	}
 
 	var latest time.Time
 
-	for _, ts := range a.TaskStates {
+	for _, ts := range taskStates {
 		if ts == nil || ts.State != TaskStateRunning || ts.StartedAt.IsZero() {
 			return time.Time{}, false
 		}
@@ -407,6 +407,14 @@ func (a *Allocation) FullyRunningSince() (time.Time, bool) {
 	return latest, true
 }
 
+func (a *Allocation) fullyRunningSince() (time.Time, bool) {
+	if a == nil {
+		return time.Time{}, false
+	}
+
+	return FullyRunningSince(a.TaskStates)
+}
+
 // MaxRunDurationDeadline returns the deadline at which the allocation should be
 // considered timed out based on max_run_duration.
 func (a *Allocation) MaxRunDurationDeadline() (time.Time, bool) {
@@ -415,7 +423,7 @@ func (a *Allocation) MaxRunDurationDeadline() (time.Time, bool) {
 		return time.Time{}, false
 	}
 
-	startedAt, ok := a.FullyRunningSince()
+	startedAt, ok := a.fullyRunningSince()
 	if !ok {
 		return time.Time{}, false
 	}

From ea2f1ad4c259275723e106a33f509042e46f7ad0 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Mon, 13 Apr 2026 10:54:01 +0200
Subject: [PATCH 23/37] hook alloc can never be nil

---
 client/allocrunner/alloc_runner.go | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 1e16ded0f34..076c30a369d 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -699,19 +699,17 @@ func (ar *allocRunner) handleTaskStateUpdates() {
 		calloc := ar.clientAlloc(states)
 
 		hookAlloc := calloc
-		if hookAlloc != nil {
-			serverAlloc := ar.Alloc()
-			hookAlloc = &structs.Allocation{
-				ID:                calloc.ID,
-				TaskStates:        calloc.TaskStates,
-				ClientStatus:      calloc.ClientStatus,
-				ClientDescription: calloc.ClientDescription,
-				DeploymentStatus:  calloc.DeploymentStatus,
-				NetworkStatus:     calloc.NetworkStatus,
-				Job:               serverAlloc.Job,
-				TaskGroup:         serverAlloc.TaskGroup,
-				DesiredStatus:     serverAlloc.DesiredStatus,
-			}
+		serverAlloc := ar.Alloc()
+		hookAlloc = &structs.Allocation{
+			ID:                calloc.ID,
+			TaskStates:        calloc.TaskStates,
+			ClientStatus:      calloc.ClientStatus,
+			ClientDescription: calloc.ClientDescription,
+			DeploymentStatus:  calloc.DeploymentStatus,
+			NetworkStatus:     calloc.NetworkStatus,
+			Job:               serverAlloc.Job,
+			TaskGroup:         serverAlloc.TaskGroup,
+			DesiredStatus:     serverAlloc.DesiredStatus,
 		}
 
 		req := &interfaces.RunnerUpdateRequest{

From f99673109f7fddc0b4bd9bcc293c637f944b2eeb Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Mon, 13 Apr 2026 10:55:29 +0200
Subject: [PATCH 24/37] even stream corrections

---
 nomad/state/events.go      |  3 +-
 nomad/state/events_test.go | 70 ++++++++++++++++++--------------------
 2 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/nomad/state/events.go b/nomad/state/events.go
index 3470e06bcf8..396d4d42fd0 100644
--- a/nomad/state/events.go
+++ b/nomad/state/events.go
@@ -358,7 +358,8 @@ func eventFromChange(change memdb.Change) (structs.Event, bool) {
 		alloc.Job = nil
 
 		allocEvent := &structs.AllocationEvent{Allocation: alloc}
-		if alloc.ClientDescription == structs.AllocTimeoutReasonMaxRunDuration {
+		if alloc.ClientStatus == structs.AllocClientStatusComplete &&
+			alloc.ClientDescription == structs.AllocTimeoutReasonMaxRunDuration {
 			allocEvent.Timeout = true
 			allocEvent.TimeoutReason = alloc.ClientDescription
 		}
diff --git a/nomad/state/events_test.go b/nomad/state/events_test.go
index 57dad2710b2..4f885e6db20 100644
--- a/nomad/state/events_test.go
+++ b/nomad/state/events_test.go
@@ -546,50 +546,46 @@ func TestEventsFromChanges_ApplyPlanResultsRequestType(t *testing.T) {
 	}
 }
 
-func TestEventsFromChanges_ApplyPlanResultsRequestType_TimeoutStoppedAlloc(t *testing.T) {
+func TestEventFromChange_AllocationTimeoutFields(t *testing.T) {
 	ci.Parallel(t)
 	s := TestStateStoreCfg(t, TestStateStorePublisher(t))
 	defer s.StopEventBroker()
 
-	mockJob := mock.Job()
-	must.NoError(t, s.UpsertJob(structs.MsgTypeTestSetup, 9, nil, mockJob))
-
-	stoppedAlloc := mock.Alloc()
-	stoppedAlloc.Job = mockJob
-	stoppedAlloc.JobID = mockJob.ID
-	stoppedAlloc.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration
+	timeoutAlloc := mock.Alloc()
+	timeoutAlloc.ClientStatus = structs.AllocClientStatusComplete
+	timeoutAlloc.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration
 
-	must.NoError(t, s.UpsertAllocs(structs.MsgTypeTestSetup, 10, []*structs.Allocation{stoppedAlloc}))
-
-	msgType := structs.ApplyPlanResultsRequestType
-	req := &structs.ApplyPlanResultsRequest{
-		AllocsStopped: []*structs.AllocationDiff{{
-			ID:                stoppedAlloc.ID,
-			ClientStatus:      structs.AllocClientStatusFailed,
-			ClientDescription: structs.AllocTimeoutReasonMaxRunDuration,
-		}},
-		Job: mockJob,
-	}
-
-	must.NoError(t, s.UpsertPlanResults(msgType, 100, req))
-
-	events := WaitForEvents(t, s, 100, 1, 1*time.Second)
-	must.Len(t, 2, events)
+	timeoutEvent, ok := eventFromChange(memdb.Change{
+		Table:  "allocs",
+		Before: nil,
+		After:  timeoutAlloc,
+	})
+	must.True(t, ok)
 
-	var allocEvent *structs.AllocationEvent
-	for _, e := range events {
-		must.Eq(t, structs.TypePlanResult, e.Type)
-		if e.Topic == structs.TopicAllocation {
-			allocEvent = e.Payload.(*structs.AllocationEvent)
-		}
-	}
+	timeoutPayload, ok := timeoutEvent.Payload.(*structs.AllocationEvent)
+	must.True(t, ok)
+	must.True(t, timeoutPayload.Timeout)
+	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, timeoutPayload.TimeoutReason)
+	must.Eq(t, structs.AllocClientStatusComplete, timeoutPayload.Allocation.ClientStatus)
+	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, timeoutPayload.Allocation.ClientDescription)
+
+	nonTimeoutAlloc := mock.Alloc()
+	nonTimeoutAlloc.ClientStatus = structs.AllocClientStatusFailed
+	nonTimeoutAlloc.ClientDescription = structs.AllocTimeoutReasonMaxRunDuration
+
+	nonTimeoutEvent, ok := eventFromChange(memdb.Change{
+		Table:  "allocs",
+		Before: nil,
+		After:  nonTimeoutAlloc,
+	})
+	must.True(t, ok)
 
-	must.NotNil(t, allocEvent)
-	must.True(t, allocEvent.Timeout)
-	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.TimeoutReason)
-	must.Eq(t, structs.AllocDesiredStatusStop, allocEvent.Allocation.DesiredStatus)
-	must.Eq(t, structs.AllocClientStatusFailed, allocEvent.Allocation.ClientStatus)
-	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, allocEvent.Allocation.ClientDescription)
+	nonTimeoutPayload, ok := nonTimeoutEvent.Payload.(*structs.AllocationEvent)
+	must.True(t, ok)
+	must.False(t, nonTimeoutPayload.Timeout)
+	must.Eq(t, "", nonTimeoutPayload.TimeoutReason)
+	must.Eq(t, structs.AllocClientStatusFailed, nonTimeoutPayload.Allocation.ClientStatus)
+	must.Eq(t, structs.AllocTimeoutReasonMaxRunDuration, nonTimeoutPayload.Allocation.ClientDescription)
 }
 
 func TestEventsFromChanges_BatchNodeUpdateDrainRequestType(t *testing.T) {

From a5212394e65abc46c771a0027a59544851d3eddc Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 14 Apr 2026 17:58:12 +0200
Subject: [PATCH 25/37] desired status comment

---
 client/allocrunner/tasklifecycle/max_run_duration.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go
index 127a443b82c..71a607032f2 100644
--- a/client/allocrunner/tasklifecycle/max_run_duration.go
+++ b/client/allocrunner/tasklifecycle/max_run_duration.go
@@ -83,6 +83,9 @@ func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) {
 		return
 	}
 
+	// DesiredStatus defaults to the zero value for normal running allocations.
+	// Treat an empty status the same as "run" and only clear the deadline when
+	// the alloc is explicitly marked with a non-run desired status.
 	if m.alloc.DesiredStatus != "" && m.alloc.DesiredStatus != structs.AllocDesiredStatusRun {
 		m.deadline = time.Time{}
 		m.hasDeadline = false

From 2f420ac0426be138b79410695234f6e5038bc15e Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 14 Apr 2026 18:00:15 +0200
Subject: [PATCH 26/37] allocrunner unnecessary copy fix

---
 client/allocrunner/alloc_runner.go | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 076c30a369d..5ab5469864b 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -698,18 +698,16 @@ func (ar *allocRunner) handleTaskStateUpdates() {
 		// Get the client allocation
 		calloc := ar.clientAlloc(states)
 
-		hookAlloc := calloc
-		serverAlloc := ar.Alloc()
-		hookAlloc = &structs.Allocation{
+		hookAlloc := &structs.Allocation{
 			ID:                calloc.ID,
 			TaskStates:        calloc.TaskStates,
 			ClientStatus:      calloc.ClientStatus,
 			ClientDescription: calloc.ClientDescription,
 			DeploymentStatus:  calloc.DeploymentStatus,
 			NetworkStatus:     calloc.NetworkStatus,
-			Job:               serverAlloc.Job,
-			TaskGroup:         serverAlloc.TaskGroup,
-			DesiredStatus:     serverAlloc.DesiredStatus,
+			Job:               ar.Alloc().Job,
+			TaskGroup:         ar.Alloc().TaskGroup,
+			DesiredStatus:     ar.Alloc().DesiredStatus,
 		}
 
 		req := &interfaces.RunnerUpdateRequest{

From ae2d08a4abe925b098e67071d02e4fbcb9fcb8ae Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 14 Apr 2026 18:13:54 +0200
Subject: [PATCH 27/37] fix UTC and interface

---
 client/allocrunner/alloc_runner.go                | 10 +++-------
 client/allocrunner/alloc_runner_hooks.go          |  5 ++---
 client/allocrunner/interfaces/runner_lifecycle.go |  5 ++---
 3 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 5ab5469864b..83363d316f2 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -710,13 +710,9 @@ func (ar *allocRunner) handleTaskStateUpdates() {
 			DesiredStatus:     ar.Alloc().DesiredStatus,
 		}
 
-		req := &interfaces.RunnerUpdateRequest{
-			Alloc:      hookAlloc,
-			TaskStates: states,
-		}
 		if ar.maxRunDuration != nil {
-			ar.maxRunDuration.SetAlloc(req.Alloc)
-			ar.maxRunDuration.TaskStateUpdated(req.TaskStates)
+			ar.maxRunDuration.SetAlloc(hookAlloc)
+			ar.maxRunDuration.TaskStateUpdated(states)
 		}
 
 		// Update the server
@@ -1121,7 +1117,7 @@ func (ar *allocRunner) Listener() *cstructs.AllocListener {
 }
 
 func (ar *allocRunner) EnforceMaxRunDurationTimeout(deadline time.Time) {
-	now := time.Now().UTC()
+	now := time.Now()
 
 	if ar.isShuttingDown() {
 		return
diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go
index d57d94167c4..abc2898b08b 100644
--- a/client/allocrunner/alloc_runner_hooks.go
+++ b/client/allocrunner/alloc_runner_hooks.go
@@ -224,9 +224,8 @@ func (ar *allocRunner) update(update *structs.Allocation) error {
 	).SetAllocDir(ar.allocDir.AllocDirPath()).Build()
 
 	req := &interfaces.RunnerUpdateRequest{
-		Alloc:      update,
-		AllocEnv:   allocEnv,
-		TaskStates: ar.AllocState().TaskStates,
+		Alloc:    update,
+		AllocEnv: allocEnv,
 	}
 
 	var merr multierror.Error
diff --git a/client/allocrunner/interfaces/runner_lifecycle.go b/client/allocrunner/interfaces/runner_lifecycle.go
index 6267c978e00..9e1f0e43852 100644
--- a/client/allocrunner/interfaces/runner_lifecycle.go
+++ b/client/allocrunner/interfaces/runner_lifecycle.go
@@ -56,9 +56,8 @@ type RunnerUpdateHook interface {
 }
 
 type RunnerUpdateRequest struct {
-	Alloc      *structs.Allocation
-	AllocEnv   *taskenv.TaskEnv
-	TaskStates map[string]*structs.TaskState
+	Alloc    *structs.Allocation
+	AllocEnv *taskenv.TaskEnv
 }
 
 // A RunnerTaskRestartHook is executed just before the allocation runner is

From bc6344c3176901b9277ef896080d575a864d848c Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Tue, 14 Apr 2026 18:29:03 +0200
Subject: [PATCH 28/37] max_run_duration cleanups (thanks for the comments
 @mismithhisler)

---
 client/allocrunner/alloc_runner.go            |  2 +-
 .../tasklifecycle/max_run_duration.go         | 36 ++++---------
 .../tasklifecycle/max_run_duration_test.go    | 52 +++++++++----------
 3 files changed, 37 insertions(+), 53 deletions(-)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 83363d316f2..cbb01a08792 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -296,7 +296,7 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e
 	)
 
 	ar.taskCoordinator = tasklifecycle.NewCoordinator(ar.logger, tg.Tasks, ar.waitCh)
-	ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.alloc, ar)
+	ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.EnforceMaxRunDurationTimeout)
 
 	shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background())
 	ar.shutdownDelayCtx = shutdownDelayCtx
diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go
index 71a607032f2..40d2cd44a86 100644
--- a/client/allocrunner/tasklifecycle/max_run_duration.go
+++ b/client/allocrunner/tasklifecycle/max_run_duration.go
@@ -10,9 +10,7 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
-type MaxRunDurationSetter interface {
-	EnforceMaxRunDurationTimeout(time.Time)
-}
+type MaxRunDurationTimeoutFn func(time.Time)
 
 // MaxRunDuration coordinates local enforcement of task group
 // `max_run_duration`.
@@ -24,21 +22,14 @@ type MaxRunDurationSetter interface {
 type MaxRunDuration struct {
 	mu sync.Mutex
 
-	alloc       *structs.Allocation
-	deadline    time.Time
-	hasDeadline bool
-	timer       *time.Timer
-	setter      MaxRunDurationSetter
+	alloc     *structs.Allocation
+	deadline  time.Time
+	timer     *time.Timer
+	onTimeout MaxRunDurationTimeoutFn
 }
 
-func NewMaxRunDuration(
-	alloc *structs.Allocation,
-	setter MaxRunDurationSetter,
-) *MaxRunDuration {
-	return &MaxRunDuration{
-		alloc:  alloc,
-		setter: setter,
-	}
+func NewMaxRunDuration(onTimeout MaxRunDurationTimeoutFn) *MaxRunDuration {
+	return &MaxRunDuration{onTimeout: onTimeout}
 }
 
 // SetAlloc updates the authoritative allocation used for
@@ -71,7 +62,6 @@ func (m *MaxRunDuration) Stop() {
 func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) {
 	if m.alloc == nil {
 		m.deadline = time.Time{}
-		m.hasDeadline = false
 		return
 	}
 
@@ -79,7 +69,6 @@ func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) {
 	// running task set should establish a deadline.
 	if m.alloc.TerminalStatus() {
 		m.deadline = time.Time{}
-		m.hasDeadline = false
 		return
 	}
 
@@ -88,18 +77,16 @@ func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) {
 	// the alloc is explicitly marked with a non-run desired status.
 	if m.alloc.DesiredStatus != "" && m.alloc.DesiredStatus != structs.AllocDesiredStatusRun {
 		m.deadline = time.Time{}
-		m.hasDeadline = false
 		return
 	}
 
 	maxRunDuration, ok := m.alloc.MaxRunDuration()
 	if !ok {
 		m.deadline = time.Time{}
-		m.hasDeadline = false
 		return
 	}
 
-	if m.hasDeadline {
+	if !m.deadline.IsZero() {
 		return
 	}
 
@@ -109,20 +96,19 @@ func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) {
 	}
 
 	m.deadline = startedAt.Add(maxRunDuration)
-	m.hasDeadline = true
 }
 
 func (m *MaxRunDuration) resetTimerLocked() {
 	m.stopTimerLocked()
 
-	if !m.hasDeadline {
+	if m.deadline.IsZero() {
 		return
 	}
 
 	deadline := m.deadline
 	remaining := time.Until(deadline)
 	if remaining <= 0 {
-		go m.setter.EnforceMaxRunDurationTimeout(deadline)
+		go m.onTimeout(deadline)
 		return
 	}
 
@@ -140,7 +126,7 @@ func (m *MaxRunDuration) resetTimerLocked() {
 		m.timer = nil
 		m.mu.Unlock()
 
-		m.setter.EnforceMaxRunDurationTimeout(deadline)
+		m.onTimeout(deadline)
 	}(timer, deadline)
 }
 
diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go
index c9c9d8b6821..eea7f38326d 100644
--- a/client/allocrunner/tasklifecycle/max_run_duration_test.go
+++ b/client/allocrunner/tasklifecycle/max_run_duration_test.go
@@ -12,18 +12,11 @@ import (
 	"github.com/shoenig/test/must"
 )
 
-type testMaxRunDurationSetter struct {
-	deadlines chan time.Time
-}
-
-func newTestMaxRunDurationSetter() *testMaxRunDurationSetter {
-	return &testMaxRunDurationSetter{
-		deadlines: make(chan time.Time, 8),
-	}
-}
-
-func (s *testMaxRunDurationSetter) EnforceMaxRunDurationTimeout(deadline time.Time) {
-	s.deadlines <- deadline
+func newTestMaxRunDurationCallback() (func(time.Time), chan time.Time) {
+	deadlines := make(chan time.Time, 8)
+	return func(deadline time.Time) {
+		deadlines <- deadline
+	}, deadlines
 }
 
 func TestMaxRunDuration_FullyRunningSince(t *testing.T) {
@@ -72,8 +65,9 @@ func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) {
 	alloc.Job.Type = structs.JobTypeBatch
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
-	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(alloc, setter)
+	onTimeout, deadlines := newTestMaxRunDurationCallback()
+	m := NewMaxRunDuration(onTimeout)
+	m.SetAlloc(alloc)
 
 	startedAt := time.Now().UTC()
 	m.TaskStateUpdated(map[string]*structs.TaskState{
@@ -84,7 +78,7 @@ func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) {
 	})
 
 	select {
-	case deadline := <-setter.deadlines:
+	case deadline := <-deadlines:
 		must.Eq(t, startedAt.Add(maxRunDuration), deadline)
 	case <-time.After(500 * time.Millisecond):
 		t.Fatal("timed out waiting for max_run_duration deadline")
@@ -103,8 +97,9 @@ func TestMaxRunDuration_TaskStateUpdated_PreservesDeadlineWhenOneTaskFinishesEar
 	task2.Name = "web2"
 	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
 
-	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(alloc, setter)
+	onTimeout, deadlines := newTestMaxRunDurationCallback()
+	m := NewMaxRunDuration(onTimeout)
+	m.SetAlloc(alloc)
 
 	startedAt := time.Now().UTC()
 	m.TaskStateUpdated(map[string]*structs.TaskState{
@@ -131,7 +126,7 @@ func TestMaxRunDuration_TaskStateUpdated_PreservesDeadlineWhenOneTaskFinishesEar
 	})
 
 	select {
-	case deadline := <-setter.deadlines:
+	case deadline := <-deadlines:
 		must.Eq(t, startedAt.Add(maxRunDuration), deadline)
 	case <-time.After(500 * time.Millisecond):
 		t.Fatal("timed out waiting for max_run_duration deadline after one task finished early")
@@ -193,8 +188,9 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocNotEligible(t *test
 		t.Run(tc.name, func(t *testing.T) {
 			t.Parallel()
 
-			setter := newTestMaxRunDurationSetter()
-			m := NewMaxRunDuration(tc.alloc, setter)
+			onTimeout, deadlines := newTestMaxRunDurationCallback()
+			m := NewMaxRunDuration(onTimeout)
+			m.SetAlloc(tc.alloc)
 
 			taskStates := map[string]*structs.TaskState{
 				"web": {
@@ -209,7 +205,7 @@ func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocNotEligible(t *test
 			m.TaskStateUpdated(taskStates)
 
 			select {
-			case deadline := <-setter.deadlines:
+			case deadline := <-deadlines:
 				t.Fatalf("unexpected deadline fired: %v", deadline)
 			case <-time.After(100 * time.Millisecond):
 			}
@@ -225,8 +221,9 @@ func TestMaxRunDuration_SetAlloc_UsesLatestAllocConfig(t *testing.T) {
 	alloc.Job.Type = structs.JobTypeBatch
 	alloc.Job.TaskGroups[0].MaxRunDuration = &initial
 
-	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(alloc, setter)
+	onTimeout, deadlines := newTestMaxRunDurationCallback()
+	m := NewMaxRunDuration(onTimeout)
+	m.SetAlloc(alloc)
 
 	updated := alloc.Copy()
 	latest := 40 * time.Millisecond
@@ -242,7 +239,7 @@ func TestMaxRunDuration_SetAlloc_UsesLatestAllocConfig(t *testing.T) {
 	})
 
 	select {
-	case deadline := <-setter.deadlines:
+	case deadline := <-deadlines:
 		must.Eq(t, startedAt.Add(latest), deadline)
 	case <-time.After(500 * time.Millisecond):
 		t.Fatal("timed out waiting for updated max_run_duration deadline")
@@ -257,8 +254,9 @@ func TestMaxRunDuration_Stop_CancelsTimer(t *testing.T) {
 	alloc.Job.Type = structs.JobTypeBatch
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
-	setter := newTestMaxRunDurationSetter()
-	m := NewMaxRunDuration(alloc, setter)
+	onTimeout, deadlines := newTestMaxRunDurationCallback()
+	m := NewMaxRunDuration(onTimeout)
+	m.SetAlloc(alloc)
 
 	m.TaskStateUpdated(map[string]*structs.TaskState{
 		"web": {
@@ -270,7 +268,7 @@ func TestMaxRunDuration_Stop_CancelsTimer(t *testing.T) {
 	m.Stop()
 
 	select {
-	case deadline := <-setter.deadlines:
+	case deadline := <-deadlines:
 		t.Fatalf("unexpected deadline fired after stop: %v", deadline)
 	case <-time.After(250 * time.Millisecond):
 	}

From f246b59acc8401dbcc79962b15e7798435358039 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 17 Apr 2026 13:55:38 +0200
Subject: [PATCH 29/37] client: enforce max_run_duration regardless of task
 state (#27827)

---
 client/allocrunner/alloc_runner.go            |  31 --
 client/allocrunner/alloc_runner_hooks.go      |   1 +
 client/allocrunner/max_run_duration_hook.go   | 159 ++++++++++
 .../allocrunner/max_run_duration_hook_test.go | 234 +++++++++++++++
 .../tasklifecycle/max_run_duration.go         | 145 ---------
 .../tasklifecycle/max_run_duration_test.go    | 275 ------------------
 6 files changed, 394 insertions(+), 451 deletions(-)
 create mode 100644 client/allocrunner/max_run_duration_hook.go
 create mode 100644 client/allocrunner/max_run_duration_hook_test.go
 delete mode 100644 client/allocrunner/tasklifecycle/max_run_duration.go
 delete mode 100644 client/allocrunner/tasklifecycle/max_run_duration_test.go

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index cbb01a08792..eaae5d9de0b 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -152,10 +152,6 @@ type allocRunner struct {
 	// transitions.
 	runnerHooks []interfaces.RunnerHook
 
-	// maxRunDuration coordinates alloc-level max_run_duration enforcement from
-	// authoritative alloc and task state updates.
-	maxRunDuration *tasklifecycle.MaxRunDuration
-
 	// hookResources holds the output from allocrunner hooks so that later
 	// allocrunner hooks or task runner hooks can read them
 	hookResources *cstructs.AllocHookResources
@@ -296,7 +292,6 @@ func NewAllocRunner(config *config.AllocRunnerConfig) (interfaces.AllocRunner, e
 	)
 
 	ar.taskCoordinator = tasklifecycle.NewCoordinator(ar.logger, tg.Tasks, ar.waitCh)
-	ar.maxRunDuration = tasklifecycle.NewMaxRunDuration(ar.EnforceMaxRunDurationTimeout)
 
 	shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background())
 	ar.shutdownDelayCtx = shutdownDelayCtx
@@ -413,10 +408,6 @@ func (ar *allocRunner) Run() {
 		return
 	}
 
-	if ar.maxRunDuration != nil {
-		ar.maxRunDuration.Stop()
-	}
-
 	// Run the postrun hooks
 	if err := ar.postrun(); err != nil {
 		ar.logger.Error("postrun failed", "error", err)
@@ -698,23 +689,6 @@ func (ar *allocRunner) handleTaskStateUpdates() {
 		// Get the client allocation
 		calloc := ar.clientAlloc(states)
 
-		hookAlloc := &structs.Allocation{
-			ID:                calloc.ID,
-			TaskStates:        calloc.TaskStates,
-			ClientStatus:      calloc.ClientStatus,
-			ClientDescription: calloc.ClientDescription,
-			DeploymentStatus:  calloc.DeploymentStatus,
-			NetworkStatus:     calloc.NetworkStatus,
-			Job:               ar.Alloc().Job,
-			TaskGroup:         ar.Alloc().TaskGroup,
-			DesiredStatus:     ar.Alloc().DesiredStatus,
-		}
-
-		if ar.maxRunDuration != nil {
-			ar.maxRunDuration.SetAlloc(hookAlloc)
-			ar.maxRunDuration.TaskStateUpdated(states)
-		}
-
 		// Update the server
 		ar.stateUpdater.AllocStateUpdated(calloc)
 
@@ -1311,11 +1285,6 @@ func (ar *allocRunner) Shutdown() {
 	go func() {
 		ar.logger.Trace("shutting down")
 
-		// Shutdown tasks gracefully if they were run
-		if ar.maxRunDuration != nil {
-			ar.maxRunDuration.Stop()
-		}
-
 		// Shutdown task runners
 		var wg sync.WaitGroup
 		for _, tr := range ar.tasks {
diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go
index abc2898b08b..51a14368599 100644
--- a/client/allocrunner/alloc_runner_hooks.go
+++ b/client/allocrunner/alloc_runner_hooks.go
@@ -111,6 +111,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
 	ar.runnerHooks = []interfaces.RunnerHook{
 		newIdentityHook(hookLogger, ar.widmgr),
 		newAllocDirHook(hookLogger, ar.allocDir),
+		newMaxRunDurationHook(hookLogger, alloc, ar.EnforceMaxRunDurationTimeout),
 		newConsulHook(consulHookConfig{
 			alloc:                   ar.alloc,
 			allocdir:                ar.allocDir,
diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go
new file mode 100644
index 00000000000..bf71e20d9f8
--- /dev/null
+++ b/client/allocrunner/max_run_duration_hook.go
@@ -0,0 +1,159 @@
+// Copyright IBM Corp. 2015, 2025
+// SPDX-License-Identifier: BUSL-1.1
+
+package allocrunner
+
+import (
+	"sync"
+	"time"
+
+	"github.com/hashicorp/go-hclog"
+	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
+	"github.com/hashicorp/nomad/client/taskenv"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+var (
+	_ interfaces.RunnerPrerunHook  = (*maxRunDurationHook)(nil)
+	_ interfaces.RunnerPostrunHook = (*maxRunDurationHook)(nil)
+	_ interfaces.RunnerUpdateHook  = (*maxRunDurationHook)(nil)
+	_ interfaces.ShutdownHook      = (*maxRunDurationHook)(nil)
+)
+
+type maxRunDurationHook struct {
+	mu sync.Mutex
+
+	alloc *structs.Allocation
+
+	timer             *time.Timer
+	deadline          time.Time
+	maxRunDuration    time.Duration
+	hasMaxRunDuration bool
+
+	onTimeout func(time.Time)
+	logger    hclog.Logger
+}
+
+func newMaxRunDurationHook(
+	logger hclog.Logger,
+	alloc *structs.Allocation,
+	onTimeout func(time.Time),
+) interfaces.RunnerHook {
+	return &maxRunDurationHook{
+		alloc:     alloc,
+		onTimeout: onTimeout,
+		logger:    logger.Named("max_run_duration"),
+	}
+}
+
+func (h *maxRunDurationHook) Name() string {
+	return "max_run_duration"
+}
+
+func (h *maxRunDurationHook) Prerun(*taskenv.TaskEnv) error {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	h.resetTimer()
+	return nil
+}
+
+func (h *maxRunDurationHook) Update(req *interfaces.RunnerUpdateRequest) error {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	h.alloc = req.Alloc
+	h.resetTimer()
+	return nil
+}
+
+func (h *maxRunDurationHook) Postrun() error {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	h.stopTimer()
+	return nil
+}
+
+func (h *maxRunDurationHook) Shutdown() {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	h.stopTimer()
+}
+
+func (h *maxRunDurationHook) resetTimer() {
+	maxRunDuration, ok := h.currentMaxRunDuration()
+	if !ok {
+		h.stopTimer()
+		h.deadline = time.Time{}
+		h.maxRunDuration = 0
+		h.hasMaxRunDuration = false
+		return
+	}
+
+	if h.hasMaxRunDuration && h.maxRunDuration == maxRunDuration && !h.deadline.IsZero() {
+		return
+	}
+
+	h.stopTimer()
+
+	h.maxRunDuration = maxRunDuration
+	h.hasMaxRunDuration = true
+	h.deadline = time.Now().Add(maxRunDuration)
+
+	deadline := h.deadline
+	remaining := time.Until(deadline)
+
+	if remaining <= 0 {
+		h.logger.Debug("allocation exceeded max_run_duration, enforcing immediately", "deadline", deadline)
+		go h.onTimeout(deadline)
+		return
+	}
+
+	timer := time.NewTimer(remaining)
+	h.timer = timer
+
+	h.logger.Trace("armed max_run_duration timer", "deadline", deadline, "remaining", remaining)
+
+	go func(t *time.Timer, deadline time.Time) {
+		<-t.C
+
+		h.mu.Lock()
+		if h.timer != t {
+			h.mu.Unlock()
+			return
+		}
+		h.timer = nil
+		h.mu.Unlock()
+
+		h.onTimeout(deadline)
+	}(timer, deadline)
+}
+
+func (h *maxRunDurationHook) stopTimer() {
+	if h.timer == nil {
+		return
+	}
+
+	if !h.timer.Stop() {
+		select {
+		case <-h.timer.C:
+		default:
+		}
+	}
+
+	h.timer = nil
+}
+
+func (h *maxRunDurationHook) currentMaxRunDuration() (time.Duration, bool) {
+	if h.alloc.TerminalStatus() {
+		return 0, false
+	}
+
+	if h.alloc.DesiredStatus != "" && h.alloc.DesiredStatus != structs.AllocDesiredStatusRun {
+		return 0, false
+	}
+
+	return h.alloc.MaxRunDuration()
+}
diff --git a/client/allocrunner/max_run_duration_hook_test.go b/client/allocrunner/max_run_duration_hook_test.go
new file mode 100644
index 00000000000..0002b3ccd3c
--- /dev/null
+++ b/client/allocrunner/max_run_duration_hook_test.go
@@ -0,0 +1,234 @@
+// Copyright IBM Corp. 2015, 2025
+// SPDX-License-Identifier: BUSL-1.1
+
+package allocrunner
+
+import (
+	"testing"
+	"time"
+
+	log "github.com/hashicorp/go-hclog"
+	"github.com/hashicorp/nomad/ci"
+	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
+	"github.com/hashicorp/nomad/client/taskenv"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/shoenig/test/must"
+)
+
+func newTestMaxRunDurationHookCallback() (func(time.Time), chan time.Time) {
+	deadlines := make(chan time.Time, 8)
+	return func(deadline time.Time) {
+		deadlines <- deadline
+	}, deadlines
+}
+
+func newTestMaxRunDurationHook(
+	alloc *structs.Allocation,
+	onTimeout func(time.Time),
+) *maxRunDurationHook {
+	hook := newMaxRunDurationHook(log.NewNullLogger(), alloc, onTimeout)
+
+	h, ok := hook.(*maxRunDurationHook)
+	if !ok {
+		panic("newMaxRunDurationHook returned unexpected hook type")
+	}
+
+	return h
+}
+
+func TestMaxRunDurationHook_Prerun_ArmsTimerImmediately(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 50 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
+	hook := newTestMaxRunDurationHook(alloc, onTimeout)
+
+	err := hook.Prerun((*taskenv.TaskEnv)(nil))
+	must.NoError(t, err)
+
+	select {
+	case deadline := <-deadlines:
+		must.False(t, deadline.IsZero())
+	case <-time.After(500 * time.Millisecond):
+		t.Fatal("timed out waiting for max_run_duration deadline")
+	}
+}
+
+func TestMaxRunDurationHook_Update_DoesNotExtendDeadlineOnUnrelatedAllocChange(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 50 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
+	hook := newTestMaxRunDurationHook(alloc, onTimeout)
+
+	err := hook.Prerun((*taskenv.TaskEnv)(nil))
+	must.NoError(t, err)
+
+	updated := alloc.Copy()
+	updated.ClientDescription = "unrelated alloc update"
+
+	time.Sleep(20 * time.Millisecond)
+
+	err = hook.Update(&interfaces.RunnerUpdateRequest{Alloc: updated})
+	must.NoError(t, err)
+
+	select {
+	case <-deadlines:
+	case <-time.After(200 * time.Millisecond):
+		t.Fatal("timed out waiting for original max_run_duration deadline after unrelated update")
+	}
+}
+
+func TestMaxRunDurationHook_Update_RearmsOnDurationChange(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	initial := 200 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &initial
+
+	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
+	hook := newTestMaxRunDurationHook(alloc, onTimeout)
+
+	err := hook.Prerun((*taskenv.TaskEnv)(nil))
+	must.NoError(t, err)
+
+	updated := alloc.Copy()
+	latest := 40 * time.Millisecond
+	updated.Job.TaskGroups[0].MaxRunDuration = &latest
+
+	err = hook.Update(&interfaces.RunnerUpdateRequest{Alloc: updated})
+	must.NoError(t, err)
+
+	select {
+	case <-deadlines:
+	case <-time.After(500 * time.Millisecond):
+		t.Fatal("timed out waiting for updated max_run_duration deadline")
+	}
+}
+
+func TestMaxRunDurationHook_DoesNotFireWhenAllocNotEligible(t *testing.T) {
+	ci.Parallel(t)
+
+	cases := []struct {
+		name  string
+		alloc *structs.Allocation
+	}{
+		{
+			name: "non-batch job",
+			alloc: func() *structs.Allocation {
+				alloc := mock.BatchAlloc()
+				maxRunDuration := 25 * time.Millisecond
+				alloc.Job.Type = structs.JobTypeService
+				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+				return alloc
+			}(),
+		},
+		{
+			name: "desired status not run",
+			alloc: func() *structs.Allocation {
+				alloc := mock.BatchAlloc()
+				maxRunDuration := 25 * time.Millisecond
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+				alloc.DesiredStatus = structs.AllocDesiredStatusStop
+				return alloc
+			}(),
+		},
+		{
+			name: "terminal alloc",
+			alloc: func() *structs.Allocation {
+				alloc := mock.BatchAlloc()
+				maxRunDuration := 25 * time.Millisecond
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+				alloc.ClientStatus = structs.AllocClientStatusComplete
+				return alloc
+			}(),
+		},
+		{
+			name: "no max run duration",
+			alloc: func() *structs.Allocation {
+				alloc := mock.BatchAlloc()
+				alloc.Job.Type = structs.JobTypeBatch
+				alloc.Job.TaskGroups[0].MaxRunDuration = nil
+				return alloc
+			}(),
+		},
+	}
+
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			onTimeout, deadlines := newTestMaxRunDurationHookCallback()
+			hook := newTestMaxRunDurationHook(tc.alloc, onTimeout)
+
+			err := hook.Prerun((*taskenv.TaskEnv)(nil))
+			must.NoError(t, err)
+
+			select {
+			case deadline := <-deadlines:
+				t.Fatalf("unexpected deadline fired: %v", deadline)
+			case <-time.After(100 * time.Millisecond):
+			}
+		})
+	}
+}
+
+func TestMaxRunDurationHook_Postrun_CancelsTimer(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 150 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
+	hook := newTestMaxRunDurationHook(alloc, onTimeout)
+
+	err := hook.Prerun((*taskenv.TaskEnv)(nil))
+	must.NoError(t, err)
+
+	err = hook.Postrun()
+	must.NoError(t, err)
+
+	select {
+	case deadline := <-deadlines:
+		t.Fatalf("unexpected deadline fired after postrun: %v", deadline)
+	case <-time.After(250 * time.Millisecond):
+	}
+}
+
+func TestMaxRunDurationHook_Shutdown_CancelsTimer(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 150 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
+	hook := newTestMaxRunDurationHook(alloc, onTimeout)
+
+	err := hook.Prerun((*taskenv.TaskEnv)(nil))
+	must.NoError(t, err)
+
+	hook.Shutdown()
+
+	select {
+	case deadline := <-deadlines:
+		t.Fatalf("unexpected deadline fired after shutdown: %v", deadline)
+	case <-time.After(250 * time.Millisecond):
+	}
+}
diff --git a/client/allocrunner/tasklifecycle/max_run_duration.go b/client/allocrunner/tasklifecycle/max_run_duration.go
deleted file mode 100644
index 40d2cd44a86..00000000000
--- a/client/allocrunner/tasklifecycle/max_run_duration.go
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright IBM Corp. 2015, 2025
-// SPDX-License-Identifier: BUSL-1.1
-
-package tasklifecycle
-
-import (
-	"sync"
-	"time"
-
-	"github.com/hashicorp/nomad/nomad/structs"
-)
-
-type MaxRunDurationTimeoutFn func(time.Time)
-
-// MaxRunDuration coordinates local enforcement of task group
-// `max_run_duration`.
-//
-// This component is alloc-scoped, not task-scoped. It observes the current
-// authoritative allocation plus task state snapshots and arms a timer once the
-// allocation is fully running. The timer is canceled and recomputed whenever
-// alloc or task state changes are observed.
-type MaxRunDuration struct {
-	mu sync.Mutex
-
-	alloc     *structs.Allocation
-	deadline  time.Time
-	timer     *time.Timer
-	onTimeout MaxRunDurationTimeoutFn
-}
-
-func NewMaxRunDuration(onTimeout MaxRunDurationTimeoutFn) *MaxRunDuration {
-	return &MaxRunDuration{onTimeout: onTimeout}
-}
-
-// SetAlloc updates the authoritative allocation used for
-// `max_run_duration` evaluation.
-func (m *MaxRunDuration) SetAlloc(alloc *structs.Allocation) {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-
-	m.alloc = alloc
-}
-
-// TaskStateUpdated recomputes the `max_run_duration` timer from the latest
-// alloc and task state snapshot.
-func (m *MaxRunDuration) TaskStateUpdated(taskStates map[string]*structs.TaskState) {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-
-	m.setDeadline(taskStates)
-	m.resetTimerLocked()
-}
-
-// Stop cancels any active timer.
-func (m *MaxRunDuration) Stop() {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-
-	m.stopTimerLocked()
-}
-
-func (m *MaxRunDuration) setDeadline(taskStates map[string]*structs.TaskState) {
-	if m.alloc == nil {
-		m.deadline = time.Time{}
-		return
-	}
-
-	// Only running allocations with a valid max_run_duration and a fully
-	// running task set should establish a deadline.
-	if m.alloc.TerminalStatus() {
-		m.deadline = time.Time{}
-		return
-	}
-
-	// DesiredStatus defaults to the zero value for normal running allocations.
-	// Treat an empty status the same as "run" and only clear the deadline when
-	// the alloc is explicitly marked with a non-run desired status.
-	if m.alloc.DesiredStatus != "" && m.alloc.DesiredStatus != structs.AllocDesiredStatusRun {
-		m.deadline = time.Time{}
-		return
-	}
-
-	maxRunDuration, ok := m.alloc.MaxRunDuration()
-	if !ok {
-		m.deadline = time.Time{}
-		return
-	}
-
-	if !m.deadline.IsZero() {
-		return
-	}
-
-	startedAt, ok := structs.FullyRunningSince(taskStates)
-	if !ok {
-		return
-	}
-
-	m.deadline = startedAt.Add(maxRunDuration)
-}
-
-func (m *MaxRunDuration) resetTimerLocked() {
-	m.stopTimerLocked()
-
-	if m.deadline.IsZero() {
-		return
-	}
-
-	deadline := m.deadline
-	remaining := time.Until(deadline)
-	if remaining <= 0 {
-		go m.onTimeout(deadline)
-		return
-	}
-
-	timer := time.NewTimer(remaining)
-	m.timer = timer
-
-	go func(t *time.Timer, deadline time.Time) {
-		<-t.C
-
-		m.mu.Lock()
-		if m.timer != t {
-			m.mu.Unlock()
-			return
-		}
-		m.timer = nil
-		m.mu.Unlock()
-
-		m.onTimeout(deadline)
-	}(timer, deadline)
-}
-
-func (m *MaxRunDuration) stopTimerLocked() {
-	if m.timer == nil {
-		return
-	}
-
-	if !m.timer.Stop() {
-		select {
-		case <-m.timer.C:
-		default:
-		}
-	}
-	m.timer = nil
-}
diff --git a/client/allocrunner/tasklifecycle/max_run_duration_test.go b/client/allocrunner/tasklifecycle/max_run_duration_test.go
deleted file mode 100644
index eea7f38326d..00000000000
--- a/client/allocrunner/tasklifecycle/max_run_duration_test.go
+++ /dev/null
@@ -1,275 +0,0 @@
-// Copyright IBM Corp. 2015, 2025
-// SPDX-License-Identifier: BUSL-1.1
-
-package tasklifecycle
-
-import (
-	"testing"
-	"time"
-
-	"github.com/hashicorp/nomad/nomad/mock"
-	"github.com/hashicorp/nomad/nomad/structs"
-	"github.com/shoenig/test/must"
-)
-
-func newTestMaxRunDurationCallback() (func(time.Time), chan time.Time) {
-	deadlines := make(chan time.Time, 8)
-	return func(deadline time.Time) {
-		deadlines <- deadline
-	}, deadlines
-}
-
-func TestMaxRunDuration_FullyRunningSince(t *testing.T) {
-	t.Parallel()
-
-	now := time.Now().UTC()
-	earlier := now.Add(-2 * time.Second)
-	later := now.Add(-1 * time.Second)
-
-	got, ok := structs.FullyRunningSince(map[string]*structs.TaskState{
-		"a": {
-			State:     structs.TaskStateRunning,
-			StartedAt: earlier,
-		},
-		"b": {
-			State:     structs.TaskStateRunning,
-			StartedAt: later,
-		},
-	})
-
-	must.True(t, ok)
-	must.Eq(t, later, got)
-}
-
-func TestMaxRunDuration_FullyRunningSince_FalseWhenNotFullyRunning(t *testing.T) {
-	t.Parallel()
-
-	_, ok := structs.FullyRunningSince(map[string]*structs.TaskState{
-		"a": {
-			State:     structs.TaskStateRunning,
-			StartedAt: time.Now().Add(-time.Second).UTC(),
-		},
-		"b": {
-			State: structs.TaskStatePending,
-		},
-	})
-
-	must.False(t, ok)
-}
-
-func TestMaxRunDuration_TaskStateUpdated_ArmsTimerAndFires(t *testing.T) {
-	t.Parallel()
-
-	alloc := mock.BatchAlloc()
-	maxRunDuration := 50 * time.Millisecond
-	alloc.Job.Type = structs.JobTypeBatch
-	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-
-	onTimeout, deadlines := newTestMaxRunDurationCallback()
-	m := NewMaxRunDuration(onTimeout)
-	m.SetAlloc(alloc)
-
-	startedAt := time.Now().UTC()
-	m.TaskStateUpdated(map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: startedAt,
-		},
-	})
-
-	select {
-	case deadline := <-deadlines:
-		must.Eq(t, startedAt.Add(maxRunDuration), deadline)
-	case <-time.After(500 * time.Millisecond):
-		t.Fatal("timed out waiting for max_run_duration deadline")
-	}
-}
-
-func TestMaxRunDuration_TaskStateUpdated_PreservesDeadlineWhenOneTaskFinishesEarly(t *testing.T) {
-	t.Parallel()
-
-	alloc := mock.BatchAlloc()
-	maxRunDuration := 50 * time.Millisecond
-	alloc.Job.Type = structs.JobTypeBatch
-	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-
-	task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
-	task2.Name = "web2"
-	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
-
-	onTimeout, deadlines := newTestMaxRunDurationCallback()
-	m := NewMaxRunDuration(onTimeout)
-	m.SetAlloc(alloc)
-
-	startedAt := time.Now().UTC()
-	m.TaskStateUpdated(map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: startedAt,
-		},
-		"web2": {
-			State:     structs.TaskStateRunning,
-			StartedAt: startedAt,
-		},
-	})
-
-	m.TaskStateUpdated(map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: startedAt,
-		},
-		"web2": {
-			State:      structs.TaskStateDead,
-			StartedAt:  startedAt,
-			FinishedAt: startedAt.Add(2 * time.Millisecond),
-		},
-	})
-
-	select {
-	case deadline := <-deadlines:
-		must.Eq(t, startedAt.Add(maxRunDuration), deadline)
-	case <-time.After(500 * time.Millisecond):
-		t.Fatal("timed out waiting for max_run_duration deadline after one task finished early")
-	}
-}
-
-func TestMaxRunDuration_TaskStateUpdated_DoesNotFireWhenAllocNotEligible(t *testing.T) {
-	t.Parallel()
-
-	cases := []struct {
-		name  string
-		alloc *structs.Allocation
-	}{
-		{
-			name: "not fully running",
-			alloc: func() *structs.Allocation {
-				alloc := mock.BatchAlloc()
-				maxRunDuration := 25 * time.Millisecond
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-				return alloc
-			}(),
-		},
-		{
-			name: "non-batch job",
-			alloc: func() *structs.Allocation {
-				alloc := mock.BatchAlloc()
-				maxRunDuration := 25 * time.Millisecond
-				alloc.Job.Type = structs.JobTypeService
-				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-				return alloc
-			}(),
-		},
-		{
-			name: "desired status not run",
-			alloc: func() *structs.Allocation {
-				alloc := mock.BatchAlloc()
-				maxRunDuration := 25 * time.Millisecond
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-				alloc.DesiredStatus = structs.AllocDesiredStatusStop
-				return alloc
-			}(),
-		},
-		{
-			name: "terminal alloc",
-			alloc: func() *structs.Allocation {
-				alloc := mock.BatchAlloc()
-				maxRunDuration := 25 * time.Millisecond
-				alloc.Job.Type = structs.JobTypeBatch
-				alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-				alloc.ClientStatus = structs.AllocClientStatusComplete
-				return alloc
-			}(),
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			t.Parallel()
-
-			onTimeout, deadlines := newTestMaxRunDurationCallback()
-			m := NewMaxRunDuration(onTimeout)
-			m.SetAlloc(tc.alloc)
-
-			taskStates := map[string]*structs.TaskState{
-				"web": {
-					State:     structs.TaskStateRunning,
-					StartedAt: time.Now().UTC(),
-				},
-			}
-			if tc.name == "not fully running" {
-				taskStates["web"] = &structs.TaskState{State: structs.TaskStatePending}
-			}
-
-			m.TaskStateUpdated(taskStates)
-
-			select {
-			case deadline := <-deadlines:
-				t.Fatalf("unexpected deadline fired: %v", deadline)
-			case <-time.After(100 * time.Millisecond):
-			}
-		})
-	}
-}
-
-func TestMaxRunDuration_SetAlloc_UsesLatestAllocConfig(t *testing.T) {
-	t.Parallel()
-
-	alloc := mock.BatchAlloc()
-	initial := 200 * time.Millisecond
-	alloc.Job.Type = structs.JobTypeBatch
-	alloc.Job.TaskGroups[0].MaxRunDuration = &initial
-
-	onTimeout, deadlines := newTestMaxRunDurationCallback()
-	m := NewMaxRunDuration(onTimeout)
-	m.SetAlloc(alloc)
-
-	updated := alloc.Copy()
-	latest := 40 * time.Millisecond
-	updated.Job.TaskGroups[0].MaxRunDuration = &latest
-	m.SetAlloc(updated)
-
-	startedAt := time.Now().UTC()
-	m.TaskStateUpdated(map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: startedAt,
-		},
-	})
-
-	select {
-	case deadline := <-deadlines:
-		must.Eq(t, startedAt.Add(latest), deadline)
-	case <-time.After(500 * time.Millisecond):
-		t.Fatal("timed out waiting for updated max_run_duration deadline")
-	}
-}
-
-func TestMaxRunDuration_Stop_CancelsTimer(t *testing.T) {
-	t.Parallel()
-
-	alloc := mock.BatchAlloc()
-	maxRunDuration := 150 * time.Millisecond
-	alloc.Job.Type = structs.JobTypeBatch
-	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
-
-	onTimeout, deadlines := newTestMaxRunDurationCallback()
-	m := NewMaxRunDuration(onTimeout)
-	m.SetAlloc(alloc)
-
-	m.TaskStateUpdated(map[string]*structs.TaskState{
-		"web": {
-			State:     structs.TaskStateRunning,
-			StartedAt: time.Now().UTC(),
-		},
-	})
-
-	m.Stop()
-
-	select {
-	case deadline := <-deadlines:
-		t.Fatalf("unexpected deadline fired after stop: %v", deadline)
-	case <-time.After(250 * time.Millisecond):
-	}
-}

From 3b6f4df95ab812f8785a1161230091f86f5266b6 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 17 Apr 2026 15:46:26 +0200
Subject: [PATCH 30/37] allow for updates to max_run_duration during job run

---
 client/allocrunner/alloc_runner_test.go     | 74 ++++++++++++++++++
 client/allocrunner/max_run_duration_hook.go | 25 ++++--
 scheduler/generic_sched_test.go             | 85 +++++++++++++++++++++
 scheduler/util.go                           | 16 ++++
 4 files changed, 192 insertions(+), 8 deletions(-)

diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go
index 01dce5c61d3..ee965ab2853 100644
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@@ -2720,6 +2720,80 @@ func TestAllocRunner_MaxRunDuration_StopsExpiredAlloc(t *testing.T) {
 	})
 }
 
+func TestAllocRunner_MaxRunDuration_UpdateExtendsRunningAlloc(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	task := alloc.Job.TaskGroups[0].Tasks[0]
+	task.Driver = "mock_driver"
+	task.Config = map[string]interface{}{
+		"run_for": "10s",
+	}
+	task.KillTimeout = 10 * time.Millisecond
+
+	initialMaxRunDuration := 75 * time.Millisecond
+	alloc.Job.TaskGroups[0].MaxRunDuration = &initialMaxRunDuration
+
+	conf, cleanup := testAllocRunnerConfig(t, alloc)
+	defer cleanup()
+
+	arIface, err := NewAllocRunner(conf)
+	must.NoError(t, err)
+	ar := arIface.(*allocRunner)
+
+	go ar.Run()
+	defer destroy(ar)
+
+	testutil.WaitForResult(func() (bool, error) {
+		state := ar.AllocState()
+		if state == nil {
+			return false, fmt.Errorf("no alloc state")
+		}
+		if state.ClientStatus != structs.AllocClientStatusRunning {
+			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusRunning)
+		}
+		return true, nil
+	}, func(err error) {
+		state := ar.AllocState()
+		t.Fatalf("timed out waiting for alloc runner to start: %v; state=%#v", err, state)
+	})
+
+	time.Sleep(40 * time.Millisecond)
+
+	updatedAlloc := ar.Alloc().Copy()
+	updatedAlloc.AllocModifyIndex++
+	updatedMaxRunDuration := 200 * time.Millisecond
+	updatedAlloc.Job.TaskGroups[0].MaxRunDuration = &updatedMaxRunDuration
+	ar.Update(updatedAlloc)
+
+	time.Sleep(60 * time.Millisecond)
+
+	state := ar.AllocState()
+	must.NotNil(t, state)
+	must.False(t, state.MaxRunDurationExceeded)
+	must.Eq(t, structs.AllocClientStatusRunning, state.ClientStatus)
+
+	testutil.WaitForResult(func() (bool, error) {
+		state := ar.AllocState()
+		if state == nil {
+			return false, fmt.Errorf("no alloc state")
+		}
+		if state.ClientStatus != structs.AllocClientStatusComplete {
+			return false, fmt.Errorf("got status %v; want %v", state.ClientStatus, structs.AllocClientStatusComplete)
+		}
+		if state.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration {
+			return false, fmt.Errorf("got description %q; want %q", state.ClientDescription, structs.AllocTimeoutReasonMaxRunDuration)
+		}
+		if !state.MaxRunDurationExceeded {
+			return false, fmt.Errorf("max run duration was not marked exceeded")
+		}
+		return true, nil
+	}, func(err error) {
+		state := ar.AllocState()
+		t.Fatalf("timed out waiting for alloc runner max_run_duration enforcement after update: %v; state=%#v", err, state)
+	})
+}
+
 func TestAllocRunner_setHookStatsHandler(t *testing.T) {
 	ci.Parallel(t)
 
diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go
index bf71e20d9f8..827231b92f4 100644
--- a/client/allocrunner/max_run_duration_hook.go
+++ b/client/allocrunner/max_run_duration_hook.go
@@ -83,7 +83,7 @@ func (h *maxRunDurationHook) Shutdown() {
 }
 
 func (h *maxRunDurationHook) resetTimer() {
-	maxRunDuration, ok := h.currentMaxRunDuration()
+	deadline, maxRunDuration, ok := h.currentDeadline()
 	if !ok {
 		h.stopTimer()
 		h.deadline = time.Time{}
@@ -92,7 +92,7 @@ func (h *maxRunDurationHook) resetTimer() {
 		return
 	}
 
-	if h.hasMaxRunDuration && h.maxRunDuration == maxRunDuration && !h.deadline.IsZero() {
+	if h.hasMaxRunDuration && h.maxRunDuration == maxRunDuration && h.deadline.Equal(deadline) {
 		return
 	}
 
@@ -100,9 +100,8 @@ func (h *maxRunDurationHook) resetTimer() {
 
 	h.maxRunDuration = maxRunDuration
 	h.hasMaxRunDuration = true
-	h.deadline = time.Now().Add(maxRunDuration)
+	h.deadline = deadline
 
-	deadline := h.deadline
 	remaining := time.Until(deadline)
 
 	if remaining <= 0 {
@@ -146,14 +145,24 @@ func (h *maxRunDurationHook) stopTimer() {
 	h.timer = nil
 }
 
-func (h *maxRunDurationHook) currentMaxRunDuration() (time.Duration, bool) {
+func (h *maxRunDurationHook) currentDeadline() (time.Time, time.Duration, bool) {
 	if h.alloc.TerminalStatus() {
-		return 0, false
+		return time.Time{}, 0, false
 	}
 
 	if h.alloc.DesiredStatus != "" && h.alloc.DesiredStatus != structs.AllocDesiredStatusRun {
-		return 0, false
+		return time.Time{}, 0, false
 	}
 
-	return h.alloc.MaxRunDuration()
+	maxRunDuration, ok := h.alloc.MaxRunDuration()
+	if !ok {
+		return time.Time{}, 0, false
+	}
+
+	deadline, ok := h.alloc.MaxRunDurationDeadline()
+	if !ok {
+		return time.Time{}, 0, false
+	}
+
+	return deadline, maxRunDuration, true
 }
diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go
index ff008e91258..3b4c6211e46 100644
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@@ -3380,6 +3380,91 @@ func TestServiceSched_JobModify_InPlace08(t *testing.T) {
 	must.NotNil(t, newAlloc.AllocatedResources)
 }
 
+func TestServiceSched_JobModify_MaxRunDuration_InPlace(t *testing.T) {
+	ci.Parallel(t)
+
+	h := tests.NewHarness(t)
+
+	// Create a node
+	node := mock.Node()
+	must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	// Create a batch job with a running allocation
+	job := mock.BatchJob()
+	job.TaskGroups[0].Count = 1
+	initialMaxRunDuration := 1 * time.Hour
+	job.TaskGroups[0].MaxRunDuration = &initialMaxRunDuration
+	must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
+
+	alloc := mock.BatchAlloc()
+	alloc.Job = job
+	alloc.JobID = job.ID
+	alloc.NodeID = node.ID
+	alloc.Name = fmt.Sprintf("%s.%s[%d]", job.ID, job.TaskGroups[0].Name, 0)
+	must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
+
+	// Update only max_run_duration
+	job2 := job.Copy()
+	updatedMaxRunDuration := 4 * time.Hour
+	job2.TaskGroups[0].MaxRunDuration = &updatedMaxRunDuration
+	must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2))
+
+	// Create a mock evaluation
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewBatchScheduler, eval)
+	must.NoError(t, err)
+
+	// Ensure a single plan
+	must.SliceLen(t, 1, h.Plans)
+	plan := h.Plans[0]
+
+	// Ensure the plan did not evict any allocs
+	var update []*structs.Allocation
+	for _, updateList := range plan.NodeUpdate {
+		update = append(update, updateList...)
+	}
+	must.SliceLen(t, 0, update)
+
+	// Ensure the plan updated the existing alloc in place
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	must.SliceLen(t, 1, planned)
+	must.Nil(t, planned[0].Job)
+	must.NotNil(t, plan.Annotations)
+	must.NotNil(t, plan.Annotations.DesiredTGUpdates)
+	must.NotNil(t, plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name])
+	must.Eq(t, uint64(1), plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name].InPlaceUpdate)
+	must.Eq(t, uint64(0), plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name].DestructiveUpdate)
+	must.Eq(t, uint64(0), plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name].Place)
+	must.Eq(t, uint64(0), plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name].Stop)
+
+	// Lookup the allocations by JobID
+	ws := memdb.NewWatchSet()
+	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
+	must.NoError(t, err)
+	must.Len(t, 1, out)
+
+	updatedAlloc := out[0]
+	maxRunDuration, ok := updatedAlloc.MaxRunDuration()
+	must.True(t, ok)
+	must.Eq(t, updatedMaxRunDuration, maxRunDuration)
+	must.Greater(t, updatedAlloc.AllocModifyIndex, alloc.AllocModifyIndex)
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
 func TestServiceSched_JobModify_DistinctProperty(t *testing.T) {
 	ci.Parallel(t)
 
diff --git a/scheduler/util.go b/scheduler/util.go
index 0af7dd72de9..b8b972ede73 100644
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -844,6 +844,22 @@ func genericAllocUpdateFn(ctx feasible.Context, stack feasible.Stack, evalID str
 			return false, true, nil
 		}
 
+		// max_run_duration-only updates. This field does not affect placement
+		// or allocated resources, so we can update the alloc in place without
+		// re-running feasibility.
+		if existingTG := existing.Job.LookupTaskGroup(newTG.Name); existingTG != nil {
+			oldMax, oldOK := existing.MaxRunDuration()
+			newAlloc := existing.Copy()
+			newAlloc.EvalID = evalID
+			newAlloc.Job = nil
+			newAlloc.Resources = nil
+
+			newMax, newOK := newAlloc.MaxRunDuration()
+			if oldOK != newOK || oldMax != newMax {
+				return false, false, newAlloc
+			}
+		}
+
 		// Set the existing node as the base set
 		stack.SetNodes([]*structs.Node{node})
 

From 53f621064971686ea00d1a1fd260f2167c2e9616 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 17 Apr 2026 16:03:58 +0200
Subject: [PATCH 31/37] metrics

---
 client/allocrunner/alloc_runner_hooks.go      |  2 +-
 client/allocrunner/max_run_duration_hook.go   | 31 +++++--
 .../allocrunner/max_run_duration_hook_test.go | 82 +++++++++++++++++--
 3 files changed, 102 insertions(+), 13 deletions(-)

diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go
index 51a14368599..f17d9c03c43 100644
--- a/client/allocrunner/alloc_runner_hooks.go
+++ b/client/allocrunner/alloc_runner_hooks.go
@@ -111,7 +111,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
 	ar.runnerHooks = []interfaces.RunnerHook{
 		newIdentityHook(hookLogger, ar.widmgr),
 		newAllocDirHook(hookLogger, ar.allocDir),
-		newMaxRunDurationHook(hookLogger, alloc, ar.EnforceMaxRunDurationTimeout),
+		newMaxRunDurationHook(hookLogger, alloc, ar.clientBaseLabels, ar.EnforceMaxRunDurationTimeout),
 		newConsulHook(consulHookConfig{
 			alloc:                   ar.alloc,
 			allocdir:                ar.allocDir,
diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go
index 827231b92f4..2726dbfea4e 100644
--- a/client/allocrunner/max_run_duration_hook.go
+++ b/client/allocrunner/max_run_duration_hook.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	"github.com/hashicorp/go-hclog"
+	metrics "github.com/hashicorp/go-metrics/compat"
 	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
 	"github.com/hashicorp/nomad/client/taskenv"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -30,19 +31,22 @@ type maxRunDurationHook struct {
 	maxRunDuration    time.Duration
 	hasMaxRunDuration bool
 
-	onTimeout func(time.Time)
-	logger    hclog.Logger
+	onTimeout  func(time.Time)
+	logger     hclog.Logger
+	baseLabels []metrics.Label
 }
 
 func newMaxRunDurationHook(
 	logger hclog.Logger,
 	alloc *structs.Allocation,
+	baseLabels []metrics.Label,
 	onTimeout func(time.Time),
 ) interfaces.RunnerHook {
 	return &maxRunDurationHook{
-		alloc:     alloc,
-		onTimeout: onTimeout,
-		logger:    logger.Named("max_run_duration"),
+		alloc:      alloc,
+		onTimeout:  onTimeout,
+		logger:     logger.Named("max_run_duration"),
+		baseLabels: baseLabels,
 	}
 }
 
@@ -101,6 +105,7 @@ func (h *maxRunDurationHook) resetTimer() {
 	h.maxRunDuration = maxRunDuration
 	h.hasMaxRunDuration = true
 	h.deadline = deadline
+	h.emitMetrics(maxRunDuration, deadline)
 
 	remaining := time.Until(deadline)
 
@@ -145,6 +150,22 @@ func (h *maxRunDurationHook) stopTimer() {
 	h.timer = nil
 }
 
+func (h *maxRunDurationHook) emitMetrics(maxRunDuration time.Duration, deadline time.Time) {
+	labels := h.baseLabels
+	labels = append(labels, metrics.Label{Name: "task_group", Value: h.alloc.TaskGroup})
+
+	metrics.SetGaugeWithLabels(
+		[]string{"client", "allocs", "max_run_duration", "configured_seconds"},
+		float32(maxRunDuration.Seconds()),
+		labels,
+	)
+	metrics.SetGaugeWithLabels(
+		[]string{"client", "allocs", "max_run_duration", "remaining_seconds"},
+		float32(time.Until(deadline).Seconds()),
+		labels,
+	)
+}
+
 func (h *maxRunDurationHook) currentDeadline() (time.Time, time.Duration, bool) {
 	if h.alloc.TerminalStatus() {
 		return time.Time{}, 0, false
diff --git a/client/allocrunner/max_run_duration_hook_test.go b/client/allocrunner/max_run_duration_hook_test.go
index 0002b3ccd3c..57606255224 100644
--- a/client/allocrunner/max_run_duration_hook_test.go
+++ b/client/allocrunner/max_run_duration_hook_test.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	log "github.com/hashicorp/go-hclog"
+	metrics "github.com/hashicorp/go-metrics/compat"
 	"github.com/hashicorp/nomad/ci"
 	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
 	"github.com/hashicorp/nomad/client/taskenv"
@@ -25,9 +26,10 @@ func newTestMaxRunDurationHookCallback() (func(time.Time), chan time.Time) {
 
 func newTestMaxRunDurationHook(
 	alloc *structs.Allocation,
+	baseLabels []metrics.Label,
 	onTimeout func(time.Time),
 ) *maxRunDurationHook {
-	hook := newMaxRunDurationHook(log.NewNullLogger(), alloc, onTimeout)
+	hook := newMaxRunDurationHook(log.NewNullLogger(), alloc, baseLabels, onTimeout)
 
 	h, ok := hook.(*maxRunDurationHook)
 	if !ok {
@@ -46,7 +48,7 @@ func TestMaxRunDurationHook_Prerun_ArmsTimerImmediately(t *testing.T) {
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
 	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
-	hook := newTestMaxRunDurationHook(alloc, onTimeout)
+	hook := newTestMaxRunDurationHook(alloc, nil, onTimeout)
 
 	err := hook.Prerun((*taskenv.TaskEnv)(nil))
 	must.NoError(t, err)
@@ -68,7 +70,7 @@ func TestMaxRunDurationHook_Update_DoesNotExtendDeadlineOnUnrelatedAllocChange(t
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
 	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
-	hook := newTestMaxRunDurationHook(alloc, onTimeout)
+	hook := newTestMaxRunDurationHook(alloc, nil, onTimeout)
 
 	err := hook.Prerun((*taskenv.TaskEnv)(nil))
 	must.NoError(t, err)
@@ -97,7 +99,7 @@ func TestMaxRunDurationHook_Update_RearmsOnDurationChange(t *testing.T) {
 	alloc.Job.TaskGroups[0].MaxRunDuration = &initial
 
 	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
-	hook := newTestMaxRunDurationHook(alloc, onTimeout)
+	hook := newTestMaxRunDurationHook(alloc, nil, onTimeout)
 
 	err := hook.Prerun((*taskenv.TaskEnv)(nil))
 	must.NoError(t, err)
@@ -172,7 +174,7 @@ func TestMaxRunDurationHook_DoesNotFireWhenAllocNotEligible(t *testing.T) {
 			t.Parallel()
 
 			onTimeout, deadlines := newTestMaxRunDurationHookCallback()
-			hook := newTestMaxRunDurationHook(tc.alloc, onTimeout)
+			hook := newTestMaxRunDurationHook(tc.alloc, nil, onTimeout)
 
 			err := hook.Prerun((*taskenv.TaskEnv)(nil))
 			must.NoError(t, err)
@@ -195,7 +197,7 @@ func TestMaxRunDurationHook_Postrun_CancelsTimer(t *testing.T) {
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
 	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
-	hook := newTestMaxRunDurationHook(alloc, onTimeout)
+	hook := newTestMaxRunDurationHook(alloc, nil, onTimeout)
 
 	err := hook.Prerun((*taskenv.TaskEnv)(nil))
 	must.NoError(t, err)
@@ -219,7 +221,7 @@ func TestMaxRunDurationHook_Shutdown_CancelsTimer(t *testing.T) {
 	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
 
 	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
-	hook := newTestMaxRunDurationHook(alloc, onTimeout)
+	hook := newTestMaxRunDurationHook(alloc, nil, onTimeout)
 
 	err := hook.Prerun((*taskenv.TaskEnv)(nil))
 	must.NoError(t, err)
@@ -232,3 +234,69 @@ func TestMaxRunDurationHook_Shutdown_CancelsTimer(t *testing.T) {
 	case <-time.After(250 * time.Millisecond):
 	}
 }
+
+func TestMaxRunDurationHook_EmitMetrics(t *testing.T) {
+	ci.Parallel(t)
+
+	inMemorySink := metrics.NewInmemSink(10*time.Millisecond, 50*time.Millisecond)
+	_, err := metrics.NewGlobal(metrics.DefaultConfig("nomad_test"), inMemorySink)
+	must.NoError(t, err)
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 2 * time.Minute
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	baseLabels := []metrics.Label{
+		{Name: "node_id", Value: "node-123"},
+	}
+
+	onTimeout, _ := newTestMaxRunDurationHookCallback()
+	hook := newTestMaxRunDurationHook(alloc, baseLabels, onTimeout)
+
+	err = hook.Prerun((*taskenv.TaskEnv)(nil))
+	must.NoError(t, err)
+
+	data := inMemorySink.Data()
+
+	var configuredFound bool
+	for _, interval := range data {
+		for _, gauge := range interval.Gauges {
+			if gauge.Name != "nomad_test.client.allocs.max_run_duration.configured_seconds" {
+				continue
+			}
+
+			labels := make(map[string]string, len(gauge.Labels))
+			for _, label := range gauge.Labels {
+				labels[label.Name] = label.Value
+			}
+
+			if labels["node_id"] == "node-123" && labels["task_group"] == alloc.TaskGroup {
+				must.Eq(t, float32(maxRunDuration.Seconds()), gauge.Value)
+				configuredFound = true
+			}
+		}
+	}
+	must.True(t, configuredFound)
+
+	var remainingFound bool
+	for _, interval := range data {
+		for _, gauge := range interval.Gauges {
+			if gauge.Name != "nomad_test.client.allocs.max_run_duration.remaining_seconds" {
+				continue
+			}
+
+			labels := make(map[string]string, len(gauge.Labels))
+			for _, label := range gauge.Labels {
+				labels[label.Name] = label.Value
+			}
+
+			if labels["node_id"] == "node-123" && labels["task_group"] == alloc.TaskGroup {
+				must.Positive(t, gauge.Value)
+				must.LessEq(t, gauge.Value, float32(maxRunDuration.Seconds()))
+				remainingFound = true
+			}
+		}
+	}
+	must.True(t, remainingFound)
+}

From 7841eda5c06917b293f8e2d374336116def3bbe5 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 17 Apr 2026 16:04:52 +0200
Subject: [PATCH 32/37] better logs

---
 client/allocrunner/max_run_duration_hook.go | 29 +++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go
index 2726dbfea4e..64357156026 100644
--- a/client/allocrunner/max_run_duration_hook.go
+++ b/client/allocrunner/max_run_duration_hook.go
@@ -100,6 +100,10 @@ func (h *maxRunDurationHook) resetTimer() {
 		return
 	}
 
+	prevMaxRunDuration := h.maxRunDuration
+	prevDeadline := h.deadline
+	hadMaxRunDuration := h.hasMaxRunDuration
+
 	h.stopTimer()
 
 	h.maxRunDuration = maxRunDuration
@@ -109,8 +113,24 @@ func (h *maxRunDurationHook) resetTimer() {
 
 	remaining := time.Until(deadline)
 
+	if hadMaxRunDuration {
+		h.logger.Debug("updated max_run_duration",
+			"task_group", h.alloc.TaskGroup,
+			"old_configured", prevMaxRunDuration,
+			"new_configured", maxRunDuration,
+			"old_deadline", prevDeadline,
+			"new_deadline", deadline,
+			"remaining", remaining,
+		)
+	}
+
 	if remaining <= 0 {
-		h.logger.Debug("allocation exceeded max_run_duration, enforcing immediately", "deadline", deadline)
+		h.logger.Debug("allocation exceeded max_run_duration, enforcing immediately",
+			"task_group", h.alloc.TaskGroup,
+			"configured", maxRunDuration,
+			"remaining", remaining,
+			"deadline", deadline,
+		)
 		go h.onTimeout(deadline)
 		return
 	}
@@ -118,7 +138,12 @@ func (h *maxRunDurationHook) resetTimer() {
 	timer := time.NewTimer(remaining)
 	h.timer = timer
 
-	h.logger.Trace("armed max_run_duration timer", "deadline", deadline, "remaining", remaining)
+	h.logger.Trace("armed max_run_duration timer",
+		"task_group", h.alloc.TaskGroup,
+		"configured", maxRunDuration,
+		"remaining", remaining,
+		"deadline", deadline,
+	)
 
 	go func(t *time.Timer, deadline time.Time) {
 		<-t.C

From ce45ae80436547284042f78cd993b99696e32b8d Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 17 Apr 2026 16:17:53 +0200
Subject: [PATCH 33/37] fix clock arming bug

---
 client/allocrunner/max_run_duration_hook.go   |  7 ++---
 .../allocrunner/max_run_duration_hook_test.go | 30 +++++++++++++++++++
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/client/allocrunner/max_run_duration_hook.go b/client/allocrunner/max_run_duration_hook.go
index 64357156026..7369e01571e 100644
--- a/client/allocrunner/max_run_duration_hook.go
+++ b/client/allocrunner/max_run_duration_hook.go
@@ -205,10 +205,9 @@ func (h *maxRunDurationHook) currentDeadline() (time.Time, time.Duration, bool)
 		return time.Time{}, 0, false
 	}
 
-	deadline, ok := h.alloc.MaxRunDurationDeadline()
-	if !ok {
-		return time.Time{}, 0, false
+	if deadline, ok := h.alloc.MaxRunDurationDeadline(); ok {
+		return deadline, maxRunDuration, true
 	}
 
-	return deadline, maxRunDuration, true
+	return time.Now().Add(maxRunDuration), maxRunDuration, true
 }
diff --git a/client/allocrunner/max_run_duration_hook_test.go b/client/allocrunner/max_run_duration_hook_test.go
index 57606255224..daa4ae0e298 100644
--- a/client/allocrunner/max_run_duration_hook_test.go
+++ b/client/allocrunner/max_run_duration_hook_test.go
@@ -235,6 +235,36 @@ func TestMaxRunDurationHook_Shutdown_CancelsTimer(t *testing.T) {
 	}
 }
 
+func TestMaxRunDurationHook_Prerun_ArmsTimerBeforeTasksAreFullyRunning(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.BatchAlloc()
+	maxRunDuration := 50 * time.Millisecond
+	alloc.Job.Type = structs.JobTypeBatch
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	// Simulate the initial prerun state where tasks have not yet started and
+	// therefore no StartedAt timestamps are available to compute a fully-running
+	// deadline from task state.
+	for _, ts := range alloc.TaskStates {
+		ts.State = structs.TaskStatePending
+		ts.StartedAt = time.Time{}
+	}
+
+	onTimeout, deadlines := newTestMaxRunDurationHookCallback()
+	hook := newTestMaxRunDurationHook(alloc, nil, onTimeout)
+
+	err := hook.Prerun((*taskenv.TaskEnv)(nil))
+	must.NoError(t, err)
+
+	select {
+	case deadline := <-deadlines:
+		must.False(t, deadline.IsZero())
+	case <-time.After(500 * time.Millisecond):
+		t.Fatal("timed out waiting for max_run_duration deadline before tasks were fully running")
+	}
+}
+
 func TestMaxRunDurationHook_EmitMetrics(t *testing.T) {
 	ci.Parallel(t)
 

From 638056127e329af7cbb41715b339998651836992 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 17 Apr 2026 16:28:06 +0200
Subject: [PATCH 34/37] don't start poststop tasks if the timeout has passed

---
 client/allocrunner/alloc_runner.go      | 17 +++++
 client/allocrunner/alloc_runner_test.go | 89 +++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index eaae5d9de0b..e289ea27c65 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -815,6 +815,23 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
 	}
 	wg.Wait()
 
+	// Skip poststop tasks entirely when max_run_duration has been exceeded so
+	// they are not started after the allocation has timed out.
+	if ar.state.MaxRunDurationExceeded {
+		for name, tr := range ar.tasks {
+			if !tr.IsPoststopTask() {
+				continue
+			}
+
+			state := tr.TaskState()
+			if state != nil {
+				states[name] = state
+			}
+		}
+
+		return states
+	}
+
 	// Perform no action on post stop tasks, but retain their states if they exist. This
 	// commonly happens at the time of alloc GC from the client node.
 	for name, tr := range ar.tasks {
diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go
index ee965ab2853..01401358998 100644
--- a/client/allocrunner/alloc_runner_test.go
+++ b/client/allocrunner/alloc_runner_test.go
@@ -497,6 +497,95 @@ func TestAllocRunner_Lifecycle_Poststop(t *testing.T) {
 
 }
 
+func TestAllocRunner_MaxRunDuration_SkipsPoststopTasks(t *testing.T) {
+	ci.Parallel(t)
+
+	alloc := mock.LifecycleAlloc()
+	tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name]
+
+	alloc.Job.Type = structs.JobTypeBatch
+	maxRunDuration := 50 * time.Millisecond
+	alloc.Job.TaskGroups[0].MaxRunDuration = &maxRunDuration
+
+	mainTask := alloc.Job.TaskGroups[0].Tasks[0]
+	mainTask.Config["run_for"] = "100s"
+	mainTask.KillTimeout = 10 * time.Millisecond
+
+	poststopTask := alloc.Job.TaskGroups[0].Tasks[1]
+	poststopTask.Name = "poststop"
+	poststopTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststop
+	poststopTask.Config["run_for"] = "10s"
+
+	alloc.Job.TaskGroups[0].Tasks = []*structs.Task{mainTask, poststopTask}
+	alloc.AllocatedResources.Tasks = map[string]*structs.AllocatedTaskResources{
+		mainTask.Name:     tr,
+		poststopTask.Name: tr,
+	}
+
+	conf, cleanup := testAllocRunnerConfig(t, alloc)
+	defer cleanup()
+
+	arIface, err := NewAllocRunner(conf)
+	must.NoError(t, err)
+	ar := arIface.(*allocRunner)
+
+	go ar.Run()
+	defer destroy(ar)
+
+	upd := conf.StateUpdater.(*MockStateUpdater)
+
+	testutil.WaitForResult(func() (bool, error) {
+		last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("no updates")
+		}
+
+		if last.ClientStatus != structs.AllocClientStatusRunning {
+			return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus)
+		}
+
+		if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateRunning {
+			return false, fmt.Errorf("expected main task to be running not %s", s)
+		}
+
+		if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStatePending {
+			return false, fmt.Errorf("expected poststop task to be pending not %s", s)
+		}
+
+		return true, nil
+	}, func(err error) {
+		t.Fatalf("error waiting for initial state:\n%v", err)
+	})
+
+	testutil.WaitForResult(func() (bool, error) {
+		last := upd.Last()
+		if last == nil {
+			return false, fmt.Errorf("no updates")
+		}
+
+		if last.ClientStatus != structs.AllocClientStatusComplete {
+			return false, fmt.Errorf("expected alloc to be complete not %s", last.ClientStatus)
+		}
+
+		if last.ClientDescription != structs.AllocTimeoutReasonMaxRunDuration {
+			return false, fmt.Errorf("expected alloc description %q not %q", structs.AllocTimeoutReasonMaxRunDuration, last.ClientDescription)
+		}
+
+		if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateDead {
+			return false, fmt.Errorf("expected main task to be dead not %s", s)
+		}
+
+		if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStatePending {
+			return false, fmt.Errorf("expected poststop task to remain pending not %s", s)
+		}
+
+		return true, nil
+	}, func(err error) {
+		last := upd.Last()
+		t.Fatalf("error waiting for max_run_duration state:\n%v\nlast=%#v", err, last)
+	})
+}
+
 func TestAllocRunner_Lifecycle_Restart(t *testing.T) {
 	ci.Parallel(t)
 

From 879219a6559352ff501437f0a795e81aa9a93766 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 17 Apr 2026 16:48:08 +0200
Subject: [PATCH 35/37] TestMaxRunDurationHook_EmitMetrics correction

---
 .../allocrunner/max_run_duration_hook_test.go | 54 ++++++-------------
 1 file changed, 16 insertions(+), 38 deletions(-)

diff --git a/client/allocrunner/max_run_duration_hook_test.go b/client/allocrunner/max_run_duration_hook_test.go
index daa4ae0e298..18930fb9019 100644
--- a/client/allocrunner/max_run_duration_hook_test.go
+++ b/client/allocrunner/max_run_duration_hook_test.go
@@ -287,46 +287,24 @@ func TestMaxRunDurationHook_EmitMetrics(t *testing.T) {
 	err = hook.Prerun((*taskenv.TaskEnv)(nil))
 	must.NoError(t, err)
 
-	data := inMemorySink.Data()
-
-	var configuredFound bool
-	for _, interval := range data {
-		for _, gauge := range interval.Gauges {
-			if gauge.Name != "nomad_test.client.allocs.max_run_duration.configured_seconds" {
-				continue
-			}
-
-			labels := make(map[string]string, len(gauge.Labels))
-			for _, label := range gauge.Labels {
-				labels[label.Name] = label.Value
-			}
-
-			if labels["node_id"] == "node-123" && labels["task_group"] == alloc.TaskGroup {
-				must.Eq(t, float32(maxRunDuration.Seconds()), gauge.Value)
-				configuredFound = true
-			}
-		}
+	var metricKeySuffix string
+	for _, label := range baseLabels {
+		metricKeySuffix += ";" + label.Name + "=" + label.Value
 	}
-	must.True(t, configuredFound)
+	metricKeySuffix += ";task_group=" + alloc.TaskGroup
 
-	var remainingFound bool
-	for _, interval := range data {
-		for _, gauge := range interval.Gauges {
-			if gauge.Name != "nomad_test.client.allocs.max_run_duration.remaining_seconds" {
-				continue
-			}
+	configuredName := "nomad_test.client.allocs.max_run_duration.configured_seconds" + metricKeySuffix
+	remainingName := "nomad_test.client.allocs.max_run_duration.remaining_seconds" + metricKeySuffix
 
-			labels := make(map[string]string, len(gauge.Labels))
-			for _, label := range gauge.Labels {
-				labels[label.Name] = label.Value
-			}
+	data := inMemorySink.Data()
+	must.Len(t, 1, data)
+	must.MapContainsKey(t, data[0].Gauges, configuredName)
+	must.MapContainsKey(t, data[0].Gauges, remainingName)
 
-			if labels["node_id"] == "node-123" && labels["task_group"] == alloc.TaskGroup {
-				must.Positive(t, gauge.Value)
-				must.LessEq(t, gauge.Value, float32(maxRunDuration.Seconds()))
-				remainingFound = true
-			}
-		}
-	}
-	must.True(t, remainingFound)
+	configuredGauge := data[0].Gauges[configuredName]
+	must.Eq(t, float32(maxRunDuration.Seconds()), configuredGauge.Value)
+
+	remainingGauge := data[0].Gauges[remainingName]
+	must.Positive(t, remainingGauge.Value)
+	must.LessEq(t, remainingGauge.Value, float32(maxRunDuration.Seconds()))
 }

From 6a6e6a6d80b50583aa6c6d4221c76917a9a8fa87 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 17 Apr 2026 16:57:17 +0200
Subject: [PATCH 36/37] TestServiceSched_JobModify_MaxRunDuration_InPlace fix

---
 scheduler/generic_sched_test.go | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go
index 3b4c6211e46..5c2351b6aff 100644
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@@ -3411,12 +3411,13 @@ func TestServiceSched_JobModify_MaxRunDuration_InPlace(t *testing.T) {
 
 	// Create a mock evaluation
 	eval := &structs.Evaluation{
-		Namespace:   structs.DefaultNamespace,
-		ID:          uuid.Generate(),
-		Priority:    50,
-		TriggeredBy: structs.EvalTriggerJobRegister,
-		JobID:       job.ID,
-		Status:      structs.EvalStatusPending,
+		Namespace:    structs.DefaultNamespace,
+		ID:           uuid.Generate(),
+		Priority:     50,
+		TriggeredBy:  structs.EvalTriggerJobRegister,
+		JobID:        job.ID,
+		Status:       structs.EvalStatusPending,
+		AnnotatePlan: true,
 	}
 	must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
 
@@ -3441,7 +3442,7 @@ func TestServiceSched_JobModify_MaxRunDuration_InPlace(t *testing.T) {
 		planned = append(planned, allocList...)
 	}
 	must.SliceLen(t, 1, planned)
-	must.Nil(t, planned[0].Job)
+	must.Eq(t, job2, planned[0].Job)
 	must.NotNil(t, plan.Annotations)
 	must.NotNil(t, plan.Annotations.DesiredTGUpdates)
 	must.NotNil(t, plan.Annotations.DesiredTGUpdates[job.TaskGroups[0].Name])
@@ -3460,7 +3461,8 @@ func TestServiceSched_JobModify_MaxRunDuration_InPlace(t *testing.T) {
 	maxRunDuration, ok := updatedAlloc.MaxRunDuration()
 	must.True(t, ok)
 	must.Eq(t, updatedMaxRunDuration, maxRunDuration)
-	must.Greater(t, updatedAlloc.AllocModifyIndex, alloc.AllocModifyIndex)
+	must.Eq(t, alloc.ID, planned[0].ID)
+	must.Eq(t, updatedAlloc.ID, planned[0].ID)
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }

From e34dbc9dbbd292aaeeb5e3020acc74b60d529e03 Mon Sep 17 00:00:00 2001
From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com>
Date: Fri, 17 Apr 2026 17:22:11 +0200
Subject: [PATCH 37/37] yet another fix to TestMaxRunDurationHook_EmitMetrics

---
 .../allocrunner/max_run_duration_hook_test.go | 37 +++++++++++--------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/client/allocrunner/max_run_duration_hook_test.go b/client/allocrunner/max_run_duration_hook_test.go
index 18930fb9019..e24a2489179 100644
--- a/client/allocrunner/max_run_duration_hook_test.go
+++ b/client/allocrunner/max_run_duration_hook_test.go
@@ -4,6 +4,7 @@
 package allocrunner
 
 import (
+	"strings"
 	"testing"
 	"time"
 
@@ -287,24 +288,28 @@ func TestMaxRunDurationHook_EmitMetrics(t *testing.T) {
 	err = hook.Prerun((*taskenv.TaskEnv)(nil))
 	must.NoError(t, err)
 
-	var metricKeySuffix string
-	for _, label := range baseLabels {
-		metricKeySuffix += ";" + label.Name + "=" + label.Value
-	}
-	metricKeySuffix += ";task_group=" + alloc.TaskGroup
-
-	configuredName := "nomad_test.client.allocs.max_run_duration.configured_seconds" + metricKeySuffix
-	remainingName := "nomad_test.client.allocs.max_run_duration.remaining_seconds" + metricKeySuffix
-
 	data := inMemorySink.Data()
 	must.Len(t, 1, data)
-	must.MapContainsKey(t, data[0].Gauges, configuredName)
-	must.MapContainsKey(t, data[0].Gauges, remainingName)
 
-	configuredGauge := data[0].Gauges[configuredName]
-	must.Eq(t, float32(maxRunDuration.Seconds()), configuredGauge.Value)
+	configuredSuffix := "client.allocs.max_run_duration.configured_seconds;node_id=node-123;task_group=" + alloc.TaskGroup
+	remainingSuffix := "client.allocs.max_run_duration.remaining_seconds;node_id=node-123;task_group=" + alloc.TaskGroup
+
+	var configuredFound bool
+	var remainingFound bool
+
+	for key, gauge := range data[0].Gauges {
+		if strings.HasSuffix(key, configuredSuffix) {
+			must.Eq(t, float32(maxRunDuration.Seconds()), gauge.Value)
+			configuredFound = true
+		}
+
+		if strings.HasSuffix(key, remainingSuffix) {
+			must.Positive(t, gauge.Value)
+			must.True(t, gauge.Value <= float32(maxRunDuration.Seconds()))
+			remainingFound = true
+		}
+	}
 
-	remainingGauge := data[0].Gauges[remainingName]
-	must.Positive(t, remainingGauge.Value)
-	must.LessEq(t, remainingGauge.Value, float32(maxRunDuration.Seconds()))
+	must.True(t, configuredFound)
+	must.True(t, remainingFound)
 }