diff --git a/README.md b/README.md index 1294941eaa..a99d2c6839 100644 --- a/README.md +++ b/README.md @@ -155,6 +155,8 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh) | [key\_name](#input\_key\_name) | Key pair name | `string` | `null` | no | | [kms\_key\_arn](#input\_kms\_key\_arn) | Optional CMK Key ARN to be used for Parameter Store. This key must be in the current account. | `string` | `null` | no | | [lambda\_architecture](#input\_lambda\_architecture) | AWS Lambda architecture. Lambda functions using Graviton processors ('arm64') tend to have better price/performance than 'x86\_64' functions. | `string` | `"arm64"` | no | +| [lambda\_event\_source\_mapping\_batch\_size](#input\_lambda\_event\_source\_mapping\_batch\_size) | Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default of 10 events will be used. | `number` | `10` | no | +| [lambda\_event\_source\_mapping\_maximum\_batching\_window\_in\_seconds](#input\_lambda\_event\_source\_mapping\_maximum\_batching\_window\_in\_seconds) | Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch\_size is greater than 10. Defaults to 0. | `number` | `0` | no | | [lambda\_principals](#input\_lambda\_principals) | (Optional) add extra principals to the role created for execution of the lambda, e.g. for local testing. |
list(object({
type = string
identifiers = list(string)
})) | `[]` | no |
| [lambda\_runtime](#input\_lambda\_runtime) | AWS Lambda runtime. | `string` | `"nodejs22.x"` | no |
| [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no |
diff --git a/lambdas/functions/control-plane/src/aws/runners.test.ts b/lambdas/functions/control-plane/src/aws/runners.test.ts
index a02f62cd36..5c973b4077 100644
--- a/lambdas/functions/control-plane/src/aws/runners.test.ts
+++ b/lambdas/functions/control-plane/src/aws/runners.test.ts
@@ -419,9 +419,12 @@ describe('create runner with errors', () => {
});
it('test ScaleError with multiple error.', async () => {
- createFleetMockWithErrors(['UnfulfillableCapacity', 'SomeError']);
+ createFleetMockWithErrors(['UnfulfillableCapacity', 'MaxSpotInstanceCountExceeded', 'NotMappedError']);
- await expect(createRunner(createRunnerConfig(defaultRunnerConfig))).rejects.toBeInstanceOf(ScaleError);
+ await expect(createRunner(createRunnerConfig(defaultRunnerConfig))).rejects.toMatchObject({
+ name: 'ScaleError',
+ failedInstanceCount: 2,
+ });
expect(mockEC2Client).toHaveReceivedCommandWith(
CreateFleetCommand,
expectedCreateFleetRequest(defaultExpectedFleetRequestValues),
@@ -530,7 +533,7 @@ describe('create runner with errors fail over to OnDemand', () => {
}),
});
- // second call with with OnDemand failback
+ // second call with with OnDemand fallback
expect(mockEC2Client).toHaveReceivedNthCommandWith(2, CreateFleetCommand, {
...expectedCreateFleetRequest({
...defaultExpectedFleetRequestValues,
@@ -540,13 +543,13 @@ describe('create runner with errors fail over to OnDemand', () => {
});
});
- it('test InsufficientInstanceCapacity no failback.', async () => {
+ it('test InsufficientInstanceCapacity no fallback.', async () => {
await expect(
createRunner(createRunnerConfig({ ...defaultRunnerConfig, onDemandFailoverOnError: [] })),
).rejects.toBeInstanceOf(Error);
});
- it('test InsufficientInstanceCapacity with mutlipte instances and fallback to on demand .', async () => {
+ it('test InsufficientInstanceCapacity with multiple instances and fallback to on demand .', async () => {
const instancesIds = ['i-123', 'i-456'];
createFleetMockWithWithOnDemandFallback(['InsufficientInstanceCapacity'], instancesIds);
diff --git a/lambdas/functions/control-plane/src/aws/runners.ts b/lambdas/functions/control-plane/src/aws/runners.ts
index 6779dd39d2..d95dc99fa4 100644
--- a/lambdas/functions/control-plane/src/aws/runners.ts
+++ b/lambdas/functions/control-plane/src/aws/runners.ts
@@ -166,53 +166,62 @@ async function processFleetResult(
): Promiselist(object({
type = string
identifiers = list(string)
})) | `[]` | no |
| [lambda\_runtime](#input\_lambda\_runtime) | AWS Lambda runtime. | `string` | `"nodejs22.x"` | no |
| [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no |
diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf
index 811ab36260..d58e61f6ac 100644
--- a/modules/multi-runner/runners.tf
+++ b/modules/multi-runner/runners.tf
@@ -58,28 +58,30 @@ module "runners" {
credit_specification = each.value.runner_config.credit_specification
cpu_options = each.value.runner_config.cpu_options
- enable_runner_binaries_syncer = each.value.runner_config.enable_runner_binaries_syncer
- lambda_s3_bucket = var.lambda_s3_bucket
- runners_lambda_s3_key = var.runners_lambda_s3_key
- runners_lambda_s3_object_version = var.runners_lambda_s3_object_version
- lambda_runtime = var.lambda_runtime
- lambda_architecture = var.lambda_architecture
- lambda_zip = var.runners_lambda_zip
- lambda_scale_up_memory_size = var.scale_up_lambda_memory_size
- lambda_timeout_scale_up = var.runners_scale_up_lambda_timeout
- lambda_scale_down_memory_size = var.scale_down_lambda_memory_size
- lambda_timeout_scale_down = var.runners_scale_down_lambda_timeout
- lambda_subnet_ids = var.lambda_subnet_ids
- lambda_security_group_ids = var.lambda_security_group_ids
- lambda_tags = var.lambda_tags
- tracing_config = var.tracing_config
- logging_retention_in_days = var.logging_retention_in_days
- logging_kms_key_id = var.logging_kms_key_id
- enable_cloudwatch_agent = each.value.runner_config.enable_cloudwatch_agent
- cloudwatch_config = try(coalesce(each.value.runner_config.cloudwatch_config, var.cloudwatch_config), null)
- runner_log_files = each.value.runner_config.runner_log_files
- runner_group_name = each.value.runner_config.runner_group_name
- runner_name_prefix = each.value.runner_config.runner_name_prefix
+ enable_runner_binaries_syncer = each.value.runner_config.enable_runner_binaries_syncer
+ lambda_s3_bucket = var.lambda_s3_bucket
+ runners_lambda_s3_key = var.runners_lambda_s3_key
+ runners_lambda_s3_object_version = var.runners_lambda_s3_object_version
+ lambda_runtime = var.lambda_runtime
+ lambda_architecture = var.lambda_architecture
+ lambda_zip = var.runners_lambda_zip
+ lambda_scale_up_memory_size = var.scale_up_lambda_memory_size
+ lambda_event_source_mapping_batch_size = var.lambda_event_source_mapping_batch_size
+ lambda_event_source_mapping_maximum_batching_window_in_seconds = var.lambda_event_source_mapping_maximum_batching_window_in_seconds
+ lambda_timeout_scale_up = var.runners_scale_up_lambda_timeout
+ lambda_scale_down_memory_size = var.scale_down_lambda_memory_size
+ lambda_timeout_scale_down = var.runners_scale_down_lambda_timeout
+ lambda_subnet_ids = var.lambda_subnet_ids
+ lambda_security_group_ids = var.lambda_security_group_ids
+ lambda_tags = var.lambda_tags
+ tracing_config = var.tracing_config
+ logging_retention_in_days = var.logging_retention_in_days
+ logging_kms_key_id = var.logging_kms_key_id
+ enable_cloudwatch_agent = each.value.runner_config.enable_cloudwatch_agent
+ cloudwatch_config = try(coalesce(each.value.runner_config.cloudwatch_config, var.cloudwatch_config), null)
+ runner_log_files = each.value.runner_config.runner_log_files
+ runner_group_name = each.value.runner_config.runner_group_name
+ runner_name_prefix = each.value.runner_config.runner_name_prefix
scale_up_reserved_concurrent_executions = each.value.runner_config.scale_up_reserved_concurrent_executions
diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf
index be35ad09f8..fb48980908 100644
--- a/modules/multi-runner/variables.tf
+++ b/modules/multi-runner/variables.tf
@@ -718,3 +718,15 @@ variable "user_agent" {
type = string
default = "github-aws-runners"
}
+
+variable "lambda_event_source_mapping_batch_size" {
+ description = "Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default of 10 events will be used."
+ type = number
+ default = 10
+}
+
+variable "lambda_event_source_mapping_maximum_batching_window_in_seconds" {
+ description = "Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch_size is greater than 10. Defaults to 0."
+ type = number
+ default = 0
+}
diff --git a/modules/runners/README.md b/modules/runners/README.md
index 0f2db503e5..2397f1f576 100644
--- a/modules/runners/README.md
+++ b/modules/runners/README.md
@@ -177,6 +177,8 @@ yarn run dist
| [key\_name](#input\_key\_name) | Key pair name | `string` | `null` | no |
| [kms\_key\_arn](#input\_kms\_key\_arn) | Optional CMK Key ARN to be used for Parameter Store. | `string` | `null` | no |
| [lambda\_architecture](#input\_lambda\_architecture) | AWS Lambda architecture. Lambda functions using Graviton processors ('arm64') tend to have better price/performance than 'x86\_64' functions. | `string` | `"arm64"` | no |
+| [lambda\_event\_source\_mapping\_batch\_size](#input\_lambda\_event\_source\_mapping\_batch\_size) | Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default of 10 events will be used. | `number` | `10` | no |
+| [lambda\_event\_source\_mapping\_maximum\_batching\_window\_in\_seconds](#input\_lambda\_event\_source\_mapping\_maximum\_batching\_window\_in\_seconds) | Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch\_size is greater than 10. Defaults to 0. | `number` | `0` | no |
| [lambda\_runtime](#input\_lambda\_runtime) | AWS Lambda runtime. | `string` | `"nodejs22.x"` | no |
| [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no |
| [lambda\_scale\_down\_memory\_size](#input\_lambda\_scale\_down\_memory\_size) | Memory size limit in MB for scale down lambda. | `number` | `512` | no |
diff --git a/modules/runners/job-retry.tf b/modules/runners/job-retry.tf
index e51c3903d4..130992667f 100644
--- a/modules/runners/job-retry.tf
+++ b/modules/runners/job-retry.tf
@@ -3,30 +3,32 @@ locals {
job_retry_enabled = var.job_retry != null && var.job_retry.enable ? true : false
job_retry = {
- prefix = var.prefix
- tags = local.tags
- aws_partition = var.aws_partition
- architecture = var.lambda_architecture
- runtime = var.lambda_runtime
- security_group_ids = var.lambda_security_group_ids
- subnet_ids = var.lambda_subnet_ids
- kms_key_arn = var.kms_key_arn
- lambda_tags = var.lambda_tags
- log_level = var.log_level
- logging_kms_key_id = var.logging_kms_key_id
- logging_retention_in_days = var.logging_retention_in_days
- metrics = var.metrics
- role_path = var.role_path
- role_permissions_boundary = var.role_permissions_boundary
- s3_bucket = var.lambda_s3_bucket
- s3_key = var.runners_lambda_s3_key
- s3_object_version = var.runners_lambda_s3_object_version
- zip = var.lambda_zip
- tracing_config = var.tracing_config
- github_app_parameters = var.github_app_parameters
- enable_organization_runners = var.enable_organization_runners
- sqs_build_queue = var.sqs_build_queue
- ghes_url = var.ghes_url
+ prefix = var.prefix
+ tags = local.tags
+ aws_partition = var.aws_partition
+ architecture = var.lambda_architecture
+ runtime = var.lambda_runtime
+ security_group_ids = var.lambda_security_group_ids
+ subnet_ids = var.lambda_subnet_ids
+ kms_key_arn = var.kms_key_arn
+ lambda_tags = var.lambda_tags
+ log_level = var.log_level
+ logging_kms_key_id = var.logging_kms_key_id
+ logging_retention_in_days = var.logging_retention_in_days
+ metrics = var.metrics
+ role_path = var.role_path
+ role_permissions_boundary = var.role_permissions_boundary
+ s3_bucket = var.lambda_s3_bucket
+ s3_key = var.runners_lambda_s3_key
+ s3_object_version = var.runners_lambda_s3_object_version
+ zip = var.lambda_zip
+ tracing_config = var.tracing_config
+ github_app_parameters = var.github_app_parameters
+ enable_organization_runners = var.enable_organization_runners
+ sqs_build_queue = var.sqs_build_queue
+ ghes_url = var.ghes_url
+ lambda_event_source_mapping_batch_size = var.lambda_event_source_mapping_batch_size
+ lambda_event_source_mapping_maximum_batching_window_in_seconds = var.lambda_event_source_mapping_maximum_batching_window_in_seconds
}
}
diff --git a/modules/runners/job-retry/README.md b/modules/runners/job-retry/README.md
index 91089a213b..4f4c80921c 100644
--- a/modules/runners/job-retry/README.md
+++ b/modules/runners/job-retry/README.md
@@ -42,7 +42,7 @@ The module is an inner module and used by the runner module when the opt-in feat
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
-| [config](#input\_config) | Configuration for the spot termination watcher lambda function.object({
aws_partition = optional(string, null)
architecture = optional(string, null)
enable_organization_runners = bool
environment_variables = optional(map(string), {})
ghes_url = optional(string, null)
user_agent = optional(string, null)
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
kms_key_arn = optional(string, null)
lambda_tags = optional(map(string), {})
log_level = optional(string, null)
logging_kms_key_id = optional(string, null)
logging_retention_in_days = optional(number, null)
memory_size = optional(number, null)
metrics = optional(object({
enable = optional(bool, false)
namespace = optional(string, null)
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
}), {})
}), {})
prefix = optional(string, null)
principals = optional(list(object({
type = string
identifiers = list(string)
})), [])
queue_encryption = optional(object({
kms_data_key_reuse_period_seconds = optional(number, null)
kms_master_key_id = optional(string, null)
sqs_managed_sse_enabled = optional(bool, true)
}), {})
role_path = optional(string, null)
role_permissions_boundary = optional(string, null)
runtime = optional(string, null)
security_group_ids = optional(list(string), [])
subnet_ids = optional(list(string), [])
s3_bucket = optional(string, null)
s3_key = optional(string, null)
s3_object_version = optional(string, null)
sqs_build_queue = object({
url = string
arn = string
})
tags = optional(map(string), {})
timeout = optional(number, 30)
tracing_config = optional(object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
}), {})
zip = optional(string, null)
}) | n/a | yes |
+| [config](#input\_config) | Configuration for the spot termination watcher lambda function.object({
aws_partition = optional(string, null)
architecture = optional(string, null)
enable_organization_runners = bool
environment_variables = optional(map(string), {})
ghes_url = optional(string, null)
user_agent = optional(string, null)
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
kms_key_arn = optional(string, null)
lambda_event_source_mapping_batch_size = optional(number, 10)
lambda_event_source_mapping_maximum_batching_window_in_seconds = optional(number, 0)
lambda_tags = optional(map(string), {})
log_level = optional(string, null)
logging_kms_key_id = optional(string, null)
logging_retention_in_days = optional(number, null)
memory_size = optional(number, null)
metrics = optional(object({
enable = optional(bool, false)
namespace = optional(string, null)
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
}), {})
}), {})
prefix = optional(string, null)
principals = optional(list(object({
type = string
identifiers = list(string)
})), [])
queue_encryption = optional(object({
kms_data_key_reuse_period_seconds = optional(number, null)
kms_master_key_id = optional(string, null)
sqs_managed_sse_enabled = optional(bool, true)
}), {})
role_path = optional(string, null)
role_permissions_boundary = optional(string, null)
runtime = optional(string, null)
security_group_ids = optional(list(string), [])
subnet_ids = optional(list(string), [])
s3_bucket = optional(string, null)
s3_key = optional(string, null)
s3_object_version = optional(string, null)
sqs_build_queue = object({
url = string
arn = string
})
tags = optional(map(string), {})
timeout = optional(number, 30)
tracing_config = optional(object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
}), {})
zip = optional(string, null)
}) | n/a | yes |
## Outputs
diff --git a/modules/runners/job-retry/main.tf b/modules/runners/job-retry/main.tf
index 9561c7db71..612c515f8c 100644
--- a/modules/runners/job-retry/main.tf
+++ b/modules/runners/job-retry/main.tf
@@ -44,9 +44,10 @@ module "job_retry" {
}
resource "aws_lambda_event_source_mapping" "job_retry" {
- event_source_arn = aws_sqs_queue.job_retry_check_queue.arn
- function_name = module.job_retry.lambda.function.arn
- batch_size = 1
+ event_source_arn = aws_sqs_queue.job_retry_check_queue.arn
+ function_name = module.job_retry.lambda.function.arn
+ batch_size = var.config.lambda_event_source_mapping_batch_size
+ maximum_batching_window_in_seconds = var.config.lambda_event_source_mapping_maximum_batching_window_in_seconds
}
resource "aws_lambda_permission" "job_retry" {
diff --git a/modules/runners/job-retry/variables.tf b/modules/runners/job-retry/variables.tf
index 4a8fe19fbf..f40bec1ba7 100644
--- a/modules/runners/job-retry/variables.tf
+++ b/modules/runners/job-retry/variables.tf
@@ -11,6 +11,8 @@ variable "config" {
'user_agent': Optional User-Agent header for GitHub API requests.
'github_app_parameters': Parameter Store for GitHub App Parameters.
'kms_key_arn': Optional CMK Key ARN instead of using the default AWS managed key.
+ `lambda_event_source_mapping_batch_size`: Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default will be used.
+ `lambda_event_source_mapping_maximum_batching_window_in_seconds`: Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch_size is greater than 10.
`lambda_principals`: Add extra principals to the role created for execution of the lambda, e.g. for local testing.
`lambda_tags`: Map of tags that will be added to created resources. By default resources will be tagged with name and environment.
`log_level`: Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'.
@@ -45,12 +47,14 @@ variable "config" {
key_base64 = map(string)
id = map(string)
})
- kms_key_arn = optional(string, null)
- lambda_tags = optional(map(string), {})
- log_level = optional(string, null)
- logging_kms_key_id = optional(string, null)
- logging_retention_in_days = optional(number, null)
- memory_size = optional(number, null)
+ kms_key_arn = optional(string, null)
+ lambda_event_source_mapping_batch_size = optional(number, 10)
+ lambda_event_source_mapping_maximum_batching_window_in_seconds = optional(number, 0)
+ lambda_tags = optional(map(string), {})
+ log_level = optional(string, null)
+ logging_kms_key_id = optional(string, null)
+ logging_retention_in_days = optional(number, null)
+ memory_size = optional(number, null)
metrics = optional(object({
enable = optional(bool, false)
namespace = optional(string, null)
diff --git a/modules/runners/scale-up.tf b/modules/runners/scale-up.tf
index 89d95a50d0..b1ea88652d 100644
--- a/modules/runners/scale-up.tf
+++ b/modules/runners/scale-up.tf
@@ -87,10 +87,12 @@ resource "aws_cloudwatch_log_group" "scale_up" {
}
resource "aws_lambda_event_source_mapping" "scale_up" {
- event_source_arn = var.sqs_build_queue.arn
- function_name = aws_lambda_function.scale_up.arn
- batch_size = 1
- tags = var.tags
+ event_source_arn = var.sqs_build_queue.arn
+ function_name = aws_lambda_function.scale_up.arn
+ function_response_types = ["ReportBatchItemFailures"]
+ batch_size = var.lambda_event_source_mapping_batch_size
+ maximum_batching_window_in_seconds = var.lambda_event_source_mapping_maximum_batching_window_in_seconds
+ tags = var.tags
}
resource "aws_lambda_permission" "scale_runners_lambda" {
diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf
index 856014564c..6310b8a442 100644
--- a/modules/runners/variables.tf
+++ b/modules/runners/variables.tf
@@ -770,3 +770,23 @@ variable "user_agent" {
type = string
default = null
}
+
+variable "lambda_event_source_mapping_batch_size" {
+ description = "Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default of 10 events will be used."
+ type = number
+ default = 10
+ validation {
+ condition = var.lambda_event_source_mapping_batch_size >= 1 && var.lambda_event_source_mapping_batch_size <= 1000
+ error_message = "The batch size for the lambda event source mapping must be between 1 and 1000."
+ }
+}
+
+variable "lambda_event_source_mapping_maximum_batching_window_in_seconds" {
+ description = "Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch_size is greater than 10. Defaults to 0."
+ type = number
+ default = 0
+ validation {
+ condition = var.lambda_event_source_mapping_maximum_batching_window_in_seconds >= 0 && var.lambda_event_source_mapping_maximum_batching_window_in_seconds <= 300
+ error_message = "Maximum batching window must be between 0 and 300 seconds."
+ }
+}
diff --git a/variables.tf b/variables.tf
index bec2574392..adb7d76ea8 100644
--- a/variables.tf
+++ b/variables.tf
@@ -1016,3 +1016,19 @@ variable "user_agent" {
type = string
default = "github-aws-runners"
}
+
+variable "lambda_event_source_mapping_batch_size" {
+ description = "Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default of 10 events will be used."
+ type = number
+ default = 10
+}
+
+variable "lambda_event_source_mapping_maximum_batching_window_in_seconds" {
+ description = "Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch_size is greater than 10. Defaults to 0."
+ type = number
+ default = 0
+ validation {
+ condition = var.lambda_event_source_mapping_maximum_batching_window_in_seconds >= 0 && var.lambda_event_source_mapping_maximum_batching_window_in_seconds <= 300
+ error_message = "Maximum batching window must be between 0 and 300 seconds."
+ }
+}