From fb018d252abeee314eb185ce78a2cbef13129042 Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Tue, 11 Nov 2025 11:21:08 -0800 Subject: [PATCH 1/4] Add lease duration to worker heartbeat request --- temporal/api/workflowservice/v1/request_response.proto | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index bd9282998..3f13c1fc7 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -2467,6 +2467,11 @@ message RecordWorkerHeartbeatRequest { string identity = 2; repeated temporal.api.worker.v1.WorkerHeartbeat worker_heartbeat = 3; + + // Duration for which the worker lease should be valid. + // Server will calculate the actual expiration time based on when it receives this request. + // If not specified or zero, the server will use a default lease duration (typically 60 seconds). + google.protobuf.Duration lease_duration = 4; } message RecordWorkerHeartbeatResponse { From 8db1c113fa953d63378eb6eb4ca0b5e7fff5d1c6 Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Tue, 11 Nov 2025 11:53:51 -0800 Subject: [PATCH 2/4] Update openapi defs --- openapi/openapiv2.json | 4 ++++ openapi/openapiv3.yaml | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index dfe59394e..3ec310505 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -8125,6 +8125,10 @@ "type": "object", "$ref": "#/definitions/v1WorkerHeartbeat" } + }, + "leaseDuration": { + "type": "string", + "description": "Duration for which the worker lease should be valid.\nServer will calculate the actual expiration time based on when it receives this request.\nIf not specified or zero, the server will use a default lease duration (typically 60 seconds)." } } }, diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index 7c91c447f..17884b8d9 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -10355,6 +10355,13 @@ components: type: array items: $ref: '#/components/schemas/WorkerHeartbeat' + leaseDuration: + pattern: ^-?(?:0|[1-9][0-9]{0,11})(?:\.[0-9]{1,9})?s$ + type: string + description: |- + Duration for which the worker lease should be valid. + Server will calculate the actual expiration time based on when it receives this request. + If not specified or zero, the server will use a default lease duration (typically 60 seconds). RecordWorkerHeartbeatResponse: type: object properties: {} From a92e463cb1829ce7b3a01c25a0185d019c2109d9 Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Wed, 12 Nov 2025 10:43:04 -0800 Subject: [PATCH 3/4] Add more comments --- openapi/openapiv2.json | 2 +- openapi/openapiv3.yaml | 12 +++++++++++- .../api/workflowservice/v1/request_response.proto | 12 +++++++++++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index 3ec310505..a056aa775 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -8128,7 +8128,7 @@ }, "leaseDuration": { "type": "string", - "description": "Duration for which the worker lease should be valid.\nServer will calculate the actual expiration time based on when it receives this request.\nIf not specified or zero, the server will use a default lease duration (typically 60 seconds)." + "description": "Duration for which the worker lease should be valid. During this time, the server considers the worker to be active.\nThe worker is expected to send periodic heartbeats to renew its lease before it expires.\n\nServer will calculate the actual expiration time based on when it receives this request.\nIf not specified or zero, the server will use a default lease duration (typically 60 seconds).\n\nThere are 3 states for a worker: Active, Inactive, and CleanedUp.\nLifecycle transitions:\n- Active->Active: Each time the server receives a heartbeat from the worker, it will renew the lease and keep the worker in the active state.\n- Active->Inactive: When the lease expires, the server will consider the worker to be inactive, and reschedule activities that were known to be running as of that time.\n- Inactive->Active: If the server receives subsequent heartbeat from this worker, then it will transition it back to the active state.\n- Inactive->CleanedUp: If the worker remains inactive for a prolonged period, the server will cleanup the worker state. This is a terminal state.\n If the server receives subsequent heartbeat from this worker, then it will return an error." } } }, diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index 17884b8d9..79215f28c 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -10359,9 +10359,19 @@ components: pattern: ^-?(?:0|[1-9][0-9]{0,11})(?:\.[0-9]{1,9})?s$ type: string description: |- - Duration for which the worker lease should be valid. + Duration for which the worker lease should be valid. During this time, the server considers the worker to be active. + The worker is expected to send periodic heartbeats to renew its lease before it expires. + Server will calculate the actual expiration time based on when it receives this request. If not specified or zero, the server will use a default lease duration (typically 60 seconds). + + There are 3 states for a worker: Active, Inactive, and CleanedUp. + Lifecycle transitions: + - Active->Active: Each time the server receives a heartbeat from the worker, it will renew the lease and keep the worker in the active state. + - Active->Inactive: When the lease expires, the server will consider the worker to be inactive, and reschedule activities that were known to be running as of that time. + - Inactive->Active: If the server receives subsequent heartbeat from this worker, then it will transition it back to the active state. + - Inactive->CleanedUp: If the worker remains inactive for a prolonged period, the server will cleanup the worker state. This is a terminal state. + If the server receives subsequent heartbeat from this worker, then it will return an error. RecordWorkerHeartbeatResponse: type: object properties: {} diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 3f13c1fc7..09631a2a3 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -2468,9 +2468,19 @@ message RecordWorkerHeartbeatRequest { repeated temporal.api.worker.v1.WorkerHeartbeat worker_heartbeat = 3; - // Duration for which the worker lease should be valid. + // Duration for which the worker lease should be valid. During this time, the server considers the worker to be active. + // The worker is expected to send periodic heartbeats to renew its lease before it expires. + // // Server will calculate the actual expiration time based on when it receives this request. // If not specified or zero, the server will use a default lease duration (typically 60 seconds). + // + // There are 3 states for a worker: Active, Inactive, and CleanedUp. + // Lifecycle transitions: + // - Active->Active: Each time the server receives a heartbeat from the worker, it will renew the lease and keep the worker in the active state. + // - Active->Inactive: When the lease expires, the server will consider the worker to be inactive, and reschedule activities that were known to be running as of that time. + // - Inactive->Active: If the server receives subsequent heartbeat from this worker, then it will transition it back to the active state. + // - Inactive->CleanedUp: If the worker remains inactive for a prolonged period, the server will cleanup the worker state. This is a terminal state. + // If the server receives subsequent heartbeat from this worker, then it will return an error. google.protobuf.Duration lease_duration = 4; } From ee6f922d803b901597f4d10620f525183a2b1bfa Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Wed, 12 Nov 2025 16:17:20 -0800 Subject: [PATCH 4/4] Clarify semantics of non retryable error in cleanedup state. --- openapi/openapiv2.json | 2 +- openapi/openapiv3.yaml | 8 ++++++-- temporal/api/workflowservice/v1/request_response.proto | 8 ++++++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/openapi/openapiv2.json b/openapi/openapiv2.json index a056aa775..6ab0afa2f 100644 --- a/openapi/openapiv2.json +++ b/openapi/openapiv2.json @@ -8128,7 +8128,7 @@ }, "leaseDuration": { "type": "string", - "description": "Duration for which the worker lease should be valid. During this time, the server considers the worker to be active.\nThe worker is expected to send periodic heartbeats to renew its lease before it expires.\n\nServer will calculate the actual expiration time based on when it receives this request.\nIf not specified or zero, the server will use a default lease duration (typically 60 seconds).\n\nThere are 3 states for a worker: Active, Inactive, and CleanedUp.\nLifecycle transitions:\n- Active->Active: Each time the server receives a heartbeat from the worker, it will renew the lease and keep the worker in the active state.\n- Active->Inactive: When the lease expires, the server will consider the worker to be inactive, and reschedule activities that were known to be running as of that time.\n- Inactive->Active: If the server receives subsequent heartbeat from this worker, then it will transition it back to the active state.\n- Inactive->CleanedUp: If the worker remains inactive for a prolonged period, the server will cleanup the worker state. This is a terminal state.\n If the server receives subsequent heartbeat from this worker, then it will return an error." + "description": "Duration for which the worker lease should be valid. During this time, the server considers the worker to be active.\nThe worker is expected to send periodic heartbeats to renew its lease before it expires.\n\nServer will calculate the actual expiration time based on when it receives this request.\nIf not specified or zero, the server will use a default lease duration of 1 minute.\n\nThere are 3 states for a worker: Active, Inactive, and CleanedUp.\nLifecycle transitions:\n- Active->Active: Each time the server receives a heartbeat from the worker, it will renew the lease and keep the worker in the active state.\n\n- Active->Inactive: When the lease expires, the server will consider the worker to be inactive, and reschedule activities that were known to be running as of that time.\n\n- Inactive->Active: If the server receives subsequent heartbeat from this worker, then it will transition it back to the active state.\n\n- Inactive->CleanedUp: If the worker remains inactive for a prolonged period, the server will cleanup the worker state. This is a terminal state.\n If the server receives subsequent heartbeat from this worker, then it will always return an non-retryable FailedPrecondition error.\n The worker will need to shutdown and re-register using a different WorkerInstanceKey to become active again." } } }, diff --git a/openapi/openapiv3.yaml b/openapi/openapiv3.yaml index 79215f28c..d490939db 100644 --- a/openapi/openapiv3.yaml +++ b/openapi/openapiv3.yaml @@ -10363,15 +10363,19 @@ components: The worker is expected to send periodic heartbeats to renew its lease before it expires. Server will calculate the actual expiration time based on when it receives this request. - If not specified or zero, the server will use a default lease duration (typically 60 seconds). + If not specified or zero, the server will use a default lease duration of 1 minute. There are 3 states for a worker: Active, Inactive, and CleanedUp. Lifecycle transitions: - Active->Active: Each time the server receives a heartbeat from the worker, it will renew the lease and keep the worker in the active state. + - Active->Inactive: When the lease expires, the server will consider the worker to be inactive, and reschedule activities that were known to be running as of that time. + - Inactive->Active: If the server receives subsequent heartbeat from this worker, then it will transition it back to the active state. + - Inactive->CleanedUp: If the worker remains inactive for a prolonged period, the server will cleanup the worker state. This is a terminal state. - If the server receives subsequent heartbeat from this worker, then it will return an error. + If the server receives subsequent heartbeat from this worker, then it will always return an non-retryable FailedPrecondition error. + The worker will need to shutdown and re-register using a different WorkerInstanceKey to become active again. RecordWorkerHeartbeatResponse: type: object properties: {} diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 09631a2a3..ddbb3cbba 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -2472,15 +2472,19 @@ message RecordWorkerHeartbeatRequest { // The worker is expected to send periodic heartbeats to renew its lease before it expires. // // Server will calculate the actual expiration time based on when it receives this request. - // If not specified or zero, the server will use a default lease duration (typically 60 seconds). + // If not specified or zero, the server will use a default lease duration of 1 minute. // // There are 3 states for a worker: Active, Inactive, and CleanedUp. // Lifecycle transitions: // - Active->Active: Each time the server receives a heartbeat from the worker, it will renew the lease and keep the worker in the active state. + // // - Active->Inactive: When the lease expires, the server will consider the worker to be inactive, and reschedule activities that were known to be running as of that time. + // // - Inactive->Active: If the server receives subsequent heartbeat from this worker, then it will transition it back to the active state. + // // - Inactive->CleanedUp: If the worker remains inactive for a prolonged period, the server will cleanup the worker state. This is a terminal state. - // If the server receives subsequent heartbeat from this worker, then it will return an error. + // If the server receives subsequent heartbeat from this worker, then it will always return an non-retryable FailedPrecondition error. + // The worker will need to shutdown and re-register using a different WorkerInstanceKey to become active again. google.protobuf.Duration lease_duration = 4; }