From 74bd132e74f18ebd7df7ee814fbaf78900c59cc3 Mon Sep 17 00:00:00 2001 From: jamesthompson26-nhs Date: Thu, 21 May 2026 12:30:05 +0100 Subject: [PATCH 1/4] CCM-14782: Lambda Alarms --- .../terraform/modules/lambda/README.md | 67 ++++++++++++++++++- ...oudwatch_metric_alarm_lambda_error_rate.tf | 48 +++++++++++++ .../terraform/modules/lambda/outputs.tf | 10 +++ .../terraform/modules/lambda/variables.tf | 19 ++++++ 4 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf diff --git a/infrastructure/terraform/modules/lambda/README.md b/infrastructure/terraform/modules/lambda/README.md index 9eb2e44..04ba83e 100644 --- a/infrastructure/terraform/modules/lambda/README.md +++ b/infrastructure/terraform/modules/lambda/README.md @@ -1,3 +1,20 @@ +## Pass-through Output Example + +If you are consuming this module from a higher-level module or environment stack, +you can re-expose the alarm details with outputs such as: + +```hcl +output "processor_lambda_error_rate_alarm_name" { + description = "CloudWatch alarm name for processor Lambda error rate" + value = module.my_lambda.lambda_error_rate_alarm_name +} + +output "processor_lambda_error_rate_alarm_arn" { + description = "CloudWatch alarm ARN for processor Lambda error rate" + value = module.my_lambda.lambda_error_rate_alarm_arn +} +``` + @@ -9,6 +26,51 @@ |------|---------| | [terraform](#requirement\_terraform) | >= 0.12 | +## Providers + +| Name | Version | +|------|---------| +| [archive](#provider\_archive) | n/a | +| [aws](#provider\_aws) | n/a | +| [external](#provider\_external) | n/a | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_cloudwatch_event_rule.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource | +| [aws_cloudwatch_event_target.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource | +| [aws_cloudwatch_log_group.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | +| [aws_cloudwatch_log_group.main_edge](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | +| [aws_cloudwatch_log_subscription_filter.firehose](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_subscription_filter) | resource | +| [aws_cloudwatch_metric_alarm.lambda_error_rate](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource | +| [aws_iam_policy.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_role.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy.ecr](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | +| [aws_iam_role_policy.publish](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | +| [aws_iam_role_policy.put_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | +| [aws_iam_role_policy.send_message](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | +| [aws_iam_role_policy_attachment.insights](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.xray](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_lambda_function.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | +| [aws_lambda_function_event_invoke_config.lambda_destination](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function_event_invoke_config) | resource | +| [aws_lambda_permission.events](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | +| [aws_lambda_permission.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | +| [aws_s3_object.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_sqs_queue.lambda_dlq](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | +| [archive_file.lambda](https://registry.terraform.io/providers/hashicorp/archive/latest/docs/data-sources/file) | data source | +| [aws_iam_policy_document.ecr](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.lambda_assumerole](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.publish](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.put_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.send_message](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [external_external.git_commit](https://registry.terraform.io/providers/hashicorp/external/latest/docs/data-sources/external) | data source | + ## Inputs | Name | Description | Type | Default | Required | @@ -19,6 +81,7 @@ | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | | [description](#input\_description) | Description of the Lambda | `string` | n/a | yes | | [enable\_dlq\_and\_notifications](#input\_enable\_dlq\_and\_notifications) | Create an SQS Queue and on-failure destination to be used as the Lambda's Dead Letter Queue and notifications | `bool` | `false` | no | +| [enable\_error\_rate\_alarm](#input\_enable\_error\_rate\_alarm) | Create a CloudWatch alarm when Lambda error rate exceeds the configured percentage threshold | `bool` | `true` | no | | [enable\_lambda\_insights](#input\_enable\_lambda\_insights) | Enable the lambda insights layer, this must be disabled for lambda@edge usage | `bool` | `true` | no | | [enable\_xray\_tracing](#input\_enable\_xray\_tracing) | Enable AWS X-Ray active tracing for the Lambda function. | `bool` | `false` | no | | [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes | @@ -40,6 +103,7 @@ | [lambda\_at\_edge](#input\_lambda\_at\_edge) | Whether this Lambda is a Lambda@Edge function | `bool` | `false` | no | | [lambda\_dlq\_message\_retention\_seconds](#input\_lambda\_dlq\_message\_retention\_seconds) | The number of seconds to retain messages in the Lambda DLQ SQS queue | `number` | `1209600` | no | | [lambda\_env\_vars](#input\_lambda\_env\_vars) | Lambda environment parameters map | `map(string)` | `{}` | no | +| [lambda\_error\_rate\_alarm\_config](#input\_lambda\_error\_rate\_alarm\_config) | Object of optional CloudWatch alarm settings for the Lambda error rate alarm |
object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
threshold = optional(number, 1)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
})
| `{}` | no | | [layers](#input\_layers) | Lambda layer arns to include | `list(any)` | `[]` | no | | [log\_destination\_arn](#input\_log\_destination\_arn) | Destination ARN to use for the log subscription filter | `string` | `""` | no | | [log\_level](#input\_log\_level) | The log level to be used in lambda functions within the component. Any log with a lower severity than the configured value will not be logged: https://docs.python.org/3/library/logging.html#levels | `string` | `"INFO"` | no | @@ -72,7 +136,8 @@ | [function\_qualified\_arn](#output\_function\_qualified\_arn) | Qualified ARN of the Lambda function, including version or alias | | [iam\_role\_arn](#output\_iam\_role\_arn) | ARN of the IAM role associated with the Lambda function | | [iam\_role\_name](#output\_iam\_role\_name) | Name of the IAM role associated with the Lambda function | - +| [lambda\_error\_rate\_alarm\_arn](#output\_lambda\_error\_rate\_alarm\_arn) | The ARN of the CloudWatch alarm for Lambda error rate | +| [lambda\_error\_rate\_alarm\_name](#output\_lambda\_error\_rate\_alarm\_name) | The name of the CloudWatch alarm for Lambda error rate | diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf new file mode 100644 index 0000000..87b6b7c --- /dev/null +++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf @@ -0,0 +1,48 @@ +resource "aws_cloudwatch_metric_alarm" "lambda_error_rate" { + count = var.enable_error_rate_alarm ? 1 : 0 + + alarm_name = "${local.csi}-lambda-error-rate-alarm" + alarm_description = "RELIABILITY: Alarm when Lambda error rate exceeds the configured percentage threshold" + comparison_operator = var.lambda_error_rate_alarm_config.comparison_operator + evaluation_periods = var.lambda_error_rate_alarm_config.evaluation_periods + threshold = var.lambda_error_rate_alarm_config.threshold + actions_enabled = var.lambda_error_rate_alarm_config.actions_enabled + treat_missing_data = var.lambda_error_rate_alarm_config.treat_missing_data + + metric_query { + id = "e1" + expression = "IF(m2>0,(m1/m2)*100,0)" + label = "ErrorRatePercent" + return_data = true + } + + metric_query { + id = "m1" + + metric { + metric_name = "Errors" + namespace = "AWS/Lambda" + period = var.lambda_error_rate_alarm_config.period + stat = "Sum" + dimensions = { + FunctionName = aws_lambda_function.main.function_name + } + } + } + + metric_query { + id = "m2" + + metric { + metric_name = "Invocations" + namespace = "AWS/Lambda" + period = var.lambda_error_rate_alarm_config.period + stat = "Sum" + dimensions = { + FunctionName = aws_lambda_function.main.function_name + } + } + } + + tags = local.default_tags +} \ No newline at end of file diff --git a/infrastructure/terraform/modules/lambda/outputs.tf b/infrastructure/terraform/modules/lambda/outputs.tf index eb9191e..17b99c8 100644 --- a/infrastructure/terraform/modules/lambda/outputs.tf +++ b/infrastructure/terraform/modules/lambda/outputs.tf @@ -37,3 +37,13 @@ output "cloudwatch_log_group_name" { description = "Name of the CloudWatch Log Group for the Lambda function" value = aws_cloudwatch_log_group.main.name } + +output "lambda_error_rate_alarm_name" { + description = "The name of the CloudWatch alarm for Lambda error rate" + value = var.enable_error_rate_alarm ? aws_cloudwatch_metric_alarm.lambda_error_rate[0].alarm_name : null +} + +output "lambda_error_rate_alarm_arn" { + description = "The ARN of the CloudWatch alarm for Lambda error rate" + value = var.enable_error_rate_alarm ? aws_cloudwatch_metric_alarm.lambda_error_rate[0].arn : null +} diff --git a/infrastructure/terraform/modules/lambda/variables.tf b/infrastructure/terraform/modules/lambda/variables.tf index fe079e2..03bc628 100644 --- a/infrastructure/terraform/modules/lambda/variables.tf +++ b/infrastructure/terraform/modules/lambda/variables.tf @@ -274,6 +274,25 @@ variable "enable_xray_tracing" { default = false } +variable "enable_error_rate_alarm" { + type = bool + description = "Create a CloudWatch alarm when Lambda error rate exceeds the configured percentage threshold" + default = true +} + +variable "lambda_error_rate_alarm_config" { + description = "Object of optional CloudWatch alarm settings for the Lambda error rate alarm" + type = object({ + comparison_operator = optional(string, "GreaterThanThreshold") + evaluation_periods = optional(number, 1) + period = optional(number, 300) + threshold = optional(number, 1) + actions_enabled = optional(bool, true) + treat_missing_data = optional(string, "notBreaching") + }) + default = {} +} + variable "lambda_at_edge" { type = bool description = "Whether this Lambda is a Lambda@Edge function" From 17ab49ac6f544473f4515b1b2463cf19b20392ab Mon Sep 17 00:00:00 2001 From: jamesthompson26-nhs Date: Thu, 21 May 2026 12:30:19 +0100 Subject: [PATCH 2/4] CCM-14782: Lambda Alarms --- .../terraform/modules/lambda/README.md | 46 +------------------ 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/infrastructure/terraform/modules/lambda/README.md b/infrastructure/terraform/modules/lambda/README.md index 04ba83e..f570656 100644 --- a/infrastructure/terraform/modules/lambda/README.md +++ b/infrastructure/terraform/modules/lambda/README.md @@ -26,51 +26,6 @@ output "processor_lambda_error_rate_alarm_arn" { |------|---------| | [terraform](#requirement\_terraform) | >= 0.12 | -## Providers - -| Name | Version | -|------|---------| -| [archive](#provider\_archive) | n/a | -| [aws](#provider\_aws) | n/a | -| [external](#provider\_external) | n/a | - -## Modules - -No modules. - -## Resources - -| Name | Type | -|------|------| -| [aws_cloudwatch_event_rule.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource | -| [aws_cloudwatch_event_target.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource | -| [aws_cloudwatch_log_group.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | -| [aws_cloudwatch_log_group.main_edge](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | -| [aws_cloudwatch_log_subscription_filter.firehose](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_subscription_filter) | resource | -| [aws_cloudwatch_metric_alarm.lambda_error_rate](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource | -| [aws_iam_policy.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | -| [aws_iam_role.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | -| [aws_iam_role_policy.ecr](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | -| [aws_iam_role_policy.publish](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | -| [aws_iam_role_policy.put_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | -| [aws_iam_role_policy.send_message](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | -| [aws_iam_role_policy_attachment.insights](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | -| [aws_iam_role_policy_attachment.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | -| [aws_iam_role_policy_attachment.xray](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | -| [aws_lambda_function.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | -| [aws_lambda_function_event_invoke_config.lambda_destination](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function_event_invoke_config) | resource | -| [aws_lambda_permission.events](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | -| [aws_lambda_permission.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | -| [aws_s3_object.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | -| [aws_sqs_queue.lambda_dlq](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | -| [archive_file.lambda](https://registry.terraform.io/providers/hashicorp/archive/latest/docs/data-sources/file) | data source | -| [aws_iam_policy_document.ecr](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | -| [aws_iam_policy_document.lambda_assumerole](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | -| [aws_iam_policy_document.publish](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | -| [aws_iam_policy_document.put_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | -| [aws_iam_policy_document.send_message](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | -| [external_external.git_commit](https://registry.terraform.io/providers/hashicorp/external/latest/docs/data-sources/external) | data source | - ## Inputs | Name | Description | Type | Default | Required | @@ -138,6 +93,7 @@ No modules. | [iam\_role\_name](#output\_iam\_role\_name) | Name of the IAM role associated with the Lambda function | | [lambda\_error\_rate\_alarm\_arn](#output\_lambda\_error\_rate\_alarm\_arn) | The ARN of the CloudWatch alarm for Lambda error rate | | [lambda\_error\_rate\_alarm\_name](#output\_lambda\_error\_rate\_alarm\_name) | The name of the CloudWatch alarm for Lambda error rate | + From 60abdc289afee4fa3544021425d490f3444f1357 Mon Sep 17 00:00:00 2001 From: jamesthompson26-nhs Date: Thu, 21 May 2026 13:14:00 +0100 Subject: [PATCH 3/4] CCM-14782: Lambda Alarms --- .../terraform/modules/lambda/README.md | 12 ++++ ...dwatch_metric_alarm_lambda_dlq_messages.tf | 21 +++++++ ...cloudwatch_metric_alarm_lambda_duration.tf | 21 +++++++ ...loudwatch_metric_alarm_lambda_throttles.tf | 21 +++++++ .../terraform/modules/lambda/outputs.tf | 30 ++++++++++ .../terraform/modules/lambda/variables.tf | 60 +++++++++++++++++++ 6 files changed, 165 insertions(+) create mode 100644 infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf create mode 100644 infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf create mode 100644 infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf diff --git a/infrastructure/terraform/modules/lambda/README.md b/infrastructure/terraform/modules/lambda/README.md index f570656..d30da55 100644 --- a/infrastructure/terraform/modules/lambda/README.md +++ b/infrastructure/terraform/modules/lambda/README.md @@ -36,8 +36,11 @@ output "processor_lambda_error_rate_alarm_arn" { | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | | [description](#input\_description) | Description of the Lambda | `string` | n/a | yes | | [enable\_dlq\_and\_notifications](#input\_enable\_dlq\_and\_notifications) | Create an SQS Queue and on-failure destination to be used as the Lambda's Dead Letter Queue and notifications | `bool` | `false` | no | +| [enable\_dlq\_messages\_alarm](#input\_enable\_dlq\_messages\_alarm) | Create a CloudWatch alarm when visible messages are present in the Lambda DLQ | `bool` | `true` | no | +| [enable\_duration\_alarm](#input\_enable\_duration\_alarm) | Create a CloudWatch alarm when Lambda duration percentile exceeds the configured threshold | `bool` | `true` | no | | [enable\_error\_rate\_alarm](#input\_enable\_error\_rate\_alarm) | Create a CloudWatch alarm when Lambda error rate exceeds the configured percentage threshold | `bool` | `true` | no | | [enable\_lambda\_insights](#input\_enable\_lambda\_insights) | Enable the lambda insights layer, this must be disabled for lambda@edge usage | `bool` | `true` | no | +| [enable\_throttles\_alarm](#input\_enable\_throttles\_alarm) | Create a CloudWatch alarm when Lambda throttles exceed the configured threshold | `bool` | `true` | no | | [enable\_xray\_tracing](#input\_enable\_xray\_tracing) | Enable AWS X-Ray active tracing for the Lambda function. | `bool` | `false` | no | | [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes | | [filter\_pattern](#input\_filter\_pattern) | Filter pattern to use for the log subscription filter | `string` | `""` | no | @@ -57,8 +60,11 @@ output "processor_lambda_error_rate_alarm_arn" { | [kms\_key\_arn](#input\_kms\_key\_arn) | KMS key arn to use for this function | `string` | n/a | yes | | [lambda\_at\_edge](#input\_lambda\_at\_edge) | Whether this Lambda is a Lambda@Edge function | `bool` | `false` | no | | [lambda\_dlq\_message\_retention\_seconds](#input\_lambda\_dlq\_message\_retention\_seconds) | The number of seconds to retain messages in the Lambda DLQ SQS queue | `number` | `1209600` | no | +| [lambda\_dlq\_messages\_alarm\_config](#input\_lambda\_dlq\_messages\_alarm\_config) | Object of optional CloudWatch alarm settings for the Lambda DLQ messages alarm |
object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
statistic = optional(string, "Sum")
threshold = optional(number, 0)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
})
| `{}` | no | +| [lambda\_duration\_alarm\_config](#input\_lambda\_duration\_alarm\_config) | Object of optional CloudWatch alarm settings for the Lambda duration percentile alarm |
object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
percentile = optional(string, "p95")
threshold_ms = optional(number)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
})
| `{}` | no | | [lambda\_env\_vars](#input\_lambda\_env\_vars) | Lambda environment parameters map | `map(string)` | `{}` | no | | [lambda\_error\_rate\_alarm\_config](#input\_lambda\_error\_rate\_alarm\_config) | Object of optional CloudWatch alarm settings for the Lambda error rate alarm |
object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
threshold = optional(number, 1)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
})
| `{}` | no | +| [lambda\_throttles\_alarm\_config](#input\_lambda\_throttles\_alarm\_config) | Object of optional CloudWatch alarm settings for the Lambda throttles alarm |
object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
statistic = optional(string, "Sum")
threshold = optional(number, 0)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
})
| `{}` | no | | [layers](#input\_layers) | Lambda layer arns to include | `list(any)` | `[]` | no | | [log\_destination\_arn](#input\_log\_destination\_arn) | Destination ARN to use for the log subscription filter | `string` | `""` | no | | [log\_level](#input\_log\_level) | The log level to be used in lambda functions within the component. Any log with a lower severity than the configured value will not be logged: https://docs.python.org/3/library/logging.html#levels | `string` | `"INFO"` | no | @@ -91,8 +97,14 @@ output "processor_lambda_error_rate_alarm_arn" { | [function\_qualified\_arn](#output\_function\_qualified\_arn) | Qualified ARN of the Lambda function, including version or alias | | [iam\_role\_arn](#output\_iam\_role\_arn) | ARN of the IAM role associated with the Lambda function | | [iam\_role\_name](#output\_iam\_role\_name) | Name of the IAM role associated with the Lambda function | +| [lambda\_dlq\_messages\_alarm\_arn](#output\_lambda\_dlq\_messages\_alarm\_arn) | The ARN of the CloudWatch alarm for Lambda DLQ messages | +| [lambda\_dlq\_messages\_alarm\_name](#output\_lambda\_dlq\_messages\_alarm\_name) | The name of the CloudWatch alarm for Lambda DLQ messages | +| [lambda\_duration\_alarm\_arn](#output\_lambda\_duration\_alarm\_arn) | The ARN of the CloudWatch alarm for Lambda duration percentile | +| [lambda\_duration\_alarm\_name](#output\_lambda\_duration\_alarm\_name) | The name of the CloudWatch alarm for Lambda duration percentile | | [lambda\_error\_rate\_alarm\_arn](#output\_lambda\_error\_rate\_alarm\_arn) | The ARN of the CloudWatch alarm for Lambda error rate | | [lambda\_error\_rate\_alarm\_name](#output\_lambda\_error\_rate\_alarm\_name) | The name of the CloudWatch alarm for Lambda error rate | +| [lambda\_throttles\_alarm\_arn](#output\_lambda\_throttles\_alarm\_arn) | The ARN of the CloudWatch alarm for Lambda throttles | +| [lambda\_throttles\_alarm\_name](#output\_lambda\_throttles\_alarm\_name) | The name of the CloudWatch alarm for Lambda throttles | diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf new file mode 100644 index 0000000..e8a37d7 --- /dev/null +++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf @@ -0,0 +1,21 @@ +resource "aws_cloudwatch_metric_alarm" "lambda_dlq_messages" { + count = var.enable_dlq_and_notifications && var.enable_dlq_messages_alarm ? 1 : 0 + + alarm_name = "${local.csi}-lambda-dlq-messages-alarm" + alarm_description = "RELIABILITY: Alarm when visible messages are present in the Lambda DLQ" + comparison_operator = var.lambda_dlq_messages_alarm_config.comparison_operator + evaluation_periods = var.lambda_dlq_messages_alarm_config.evaluation_periods + metric_name = "ApproximateNumberOfMessagesVisible" + namespace = "AWS/SQS" + period = var.lambda_dlq_messages_alarm_config.period + statistic = var.lambda_dlq_messages_alarm_config.statistic + threshold = var.lambda_dlq_messages_alarm_config.threshold + actions_enabled = var.lambda_dlq_messages_alarm_config.actions_enabled + treat_missing_data = var.lambda_dlq_messages_alarm_config.treat_missing_data + + dimensions = { + QueueName = aws_sqs_queue.lambda_dlq[0].name + } + + tags = local.default_tags +} \ No newline at end of file diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf new file mode 100644 index 0000000..bddbffc --- /dev/null +++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf @@ -0,0 +1,21 @@ +resource "aws_cloudwatch_metric_alarm" "lambda_duration" { + count = var.enable_duration_alarm ? 1 : 0 + + alarm_name = "${local.csi}-lambda-duration-alarm" + alarm_description = "RELIABILITY: Alarm when Lambda duration percentile exceeds the configured threshold" + comparison_operator = var.lambda_duration_alarm_config.comparison_operator + evaluation_periods = var.lambda_duration_alarm_config.evaluation_periods + metric_name = "Duration" + namespace = "AWS/Lambda" + period = var.lambda_duration_alarm_config.period + extended_statistic = var.lambda_duration_alarm_config.percentile + threshold = coalesce(var.lambda_duration_alarm_config.threshold_ms, var.timeout * 800) + actions_enabled = var.lambda_duration_alarm_config.actions_enabled + treat_missing_data = var.lambda_duration_alarm_config.treat_missing_data + + dimensions = { + FunctionName = aws_lambda_function.main.function_name + } + + tags = local.default_tags +} \ No newline at end of file diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf new file mode 100644 index 0000000..f445bb9 --- /dev/null +++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf @@ -0,0 +1,21 @@ +resource "aws_cloudwatch_metric_alarm" "lambda_throttles" { + count = var.enable_throttles_alarm ? 1 : 0 + + alarm_name = "${local.csi}-lambda-throttles-alarm" + alarm_description = "RELIABILITY: Alarm when Lambda throttles exceed the configured threshold" + comparison_operator = var.lambda_throttles_alarm_config.comparison_operator + evaluation_periods = var.lambda_throttles_alarm_config.evaluation_periods + metric_name = "Throttles" + namespace = "AWS/Lambda" + period = var.lambda_throttles_alarm_config.period + statistic = var.lambda_throttles_alarm_config.statistic + threshold = var.lambda_throttles_alarm_config.threshold + actions_enabled = var.lambda_throttles_alarm_config.actions_enabled + treat_missing_data = var.lambda_throttles_alarm_config.treat_missing_data + + dimensions = { + FunctionName = aws_lambda_function.main.function_name + } + + tags = local.default_tags +} \ No newline at end of file diff --git a/infrastructure/terraform/modules/lambda/outputs.tf b/infrastructure/terraform/modules/lambda/outputs.tf index 17b99c8..c93080b 100644 --- a/infrastructure/terraform/modules/lambda/outputs.tf +++ b/infrastructure/terraform/modules/lambda/outputs.tf @@ -47,3 +47,33 @@ output "lambda_error_rate_alarm_arn" { description = "The ARN of the CloudWatch alarm for Lambda error rate" value = var.enable_error_rate_alarm ? aws_cloudwatch_metric_alarm.lambda_error_rate[0].arn : null } + +output "lambda_throttles_alarm_name" { + description = "The name of the CloudWatch alarm for Lambda throttles" + value = var.enable_throttles_alarm ? aws_cloudwatch_metric_alarm.lambda_throttles[0].alarm_name : null +} + +output "lambda_throttles_alarm_arn" { + description = "The ARN of the CloudWatch alarm for Lambda throttles" + value = var.enable_throttles_alarm ? aws_cloudwatch_metric_alarm.lambda_throttles[0].arn : null +} + +output "lambda_duration_alarm_name" { + description = "The name of the CloudWatch alarm for Lambda duration percentile" + value = var.enable_duration_alarm ? aws_cloudwatch_metric_alarm.lambda_duration[0].alarm_name : null +} + +output "lambda_duration_alarm_arn" { + description = "The ARN of the CloudWatch alarm for Lambda duration percentile" + value = var.enable_duration_alarm ? aws_cloudwatch_metric_alarm.lambda_duration[0].arn : null +} + +output "lambda_dlq_messages_alarm_name" { + description = "The name of the CloudWatch alarm for Lambda DLQ messages" + value = var.enable_dlq_and_notifications && var.enable_dlq_messages_alarm ? aws_cloudwatch_metric_alarm.lambda_dlq_messages[0].alarm_name : null +} + +output "lambda_dlq_messages_alarm_arn" { + description = "The ARN of the CloudWatch alarm for Lambda DLQ messages" + value = var.enable_dlq_and_notifications && var.enable_dlq_messages_alarm ? aws_cloudwatch_metric_alarm.lambda_dlq_messages[0].arn : null +} diff --git a/infrastructure/terraform/modules/lambda/variables.tf b/infrastructure/terraform/modules/lambda/variables.tf index 03bc628..c4b488c 100644 --- a/infrastructure/terraform/modules/lambda/variables.tf +++ b/infrastructure/terraform/modules/lambda/variables.tf @@ -293,6 +293,66 @@ variable "lambda_error_rate_alarm_config" { default = {} } +variable "enable_throttles_alarm" { + type = bool + description = "Create a CloudWatch alarm when Lambda throttles exceed the configured threshold" + default = true +} + +variable "lambda_throttles_alarm_config" { + description = "Object of optional CloudWatch alarm settings for the Lambda throttles alarm" + type = object({ + comparison_operator = optional(string, "GreaterThanThreshold") + evaluation_periods = optional(number, 1) + period = optional(number, 300) + statistic = optional(string, "Sum") + threshold = optional(number, 0) + actions_enabled = optional(bool, true) + treat_missing_data = optional(string, "notBreaching") + }) + default = {} +} + +variable "enable_duration_alarm" { + type = bool + description = "Create a CloudWatch alarm when Lambda duration percentile exceeds the configured threshold" + default = true +} + +variable "lambda_duration_alarm_config" { + description = "Object of optional CloudWatch alarm settings for the Lambda duration percentile alarm" + type = object({ + comparison_operator = optional(string, "GreaterThanThreshold") + evaluation_periods = optional(number, 1) + period = optional(number, 300) + percentile = optional(string, "p95") + threshold_ms = optional(number) + actions_enabled = optional(bool, true) + treat_missing_data = optional(string, "notBreaching") + }) + default = {} +} + +variable "enable_dlq_messages_alarm" { + type = bool + description = "Create a CloudWatch alarm when visible messages are present in the Lambda DLQ" + default = true +} + +variable "lambda_dlq_messages_alarm_config" { + description = "Object of optional CloudWatch alarm settings for the Lambda DLQ messages alarm" + type = object({ + comparison_operator = optional(string, "GreaterThanThreshold") + evaluation_periods = optional(number, 1) + period = optional(number, 300) + statistic = optional(string, "Sum") + threshold = optional(number, 0) + actions_enabled = optional(bool, true) + treat_missing_data = optional(string, "notBreaching") + }) + default = {} +} + variable "lambda_at_edge" { type = bool description = "Whether this Lambda is a Lambda@Edge function" From 3c85af441620cdb6201e10698d2ff93f66d5bd23 Mon Sep 17 00:00:00 2001 From: jamesthompson26-nhs Date: Thu, 21 May 2026 13:17:57 +0100 Subject: [PATCH 4/4] CCM-14782: Lambda Alarms --- infrastructure/terraform/modules/lambda/README.md | 10 +++++----- .../cloudwatch_metric_alarm_lambda_dlq_messages.tf | 2 +- .../lambda/cloudwatch_metric_alarm_lambda_duration.tf | 2 +- .../cloudwatch_metric_alarm_lambda_error_rate.tf | 2 +- .../lambda/cloudwatch_metric_alarm_lambda_throttles.tf | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/infrastructure/terraform/modules/lambda/README.md b/infrastructure/terraform/modules/lambda/README.md index d30da55..84dca0b 100644 --- a/infrastructure/terraform/modules/lambda/README.md +++ b/infrastructure/terraform/modules/lambda/README.md @@ -1,17 +1,17 @@ -## Pass-through Output Example +# Pass-through Output Example If you are consuming this module from a higher-level module or environment stack, you can re-expose the alarm details with outputs such as: ```hcl output "processor_lambda_error_rate_alarm_name" { - description = "CloudWatch alarm name for processor Lambda error rate" - value = module.my_lambda.lambda_error_rate_alarm_name + description = "CloudWatch alarm name for processor Lambda error rate" + value = module.my_lambda.lambda_error_rate_alarm_name } output "processor_lambda_error_rate_alarm_arn" { - description = "CloudWatch alarm ARN for processor Lambda error rate" - value = module.my_lambda.lambda_error_rate_alarm_arn + description = "CloudWatch alarm ARN for processor Lambda error rate" + value = module.my_lambda.lambda_error_rate_alarm_arn } ``` diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf index e8a37d7..08f9ff9 100644 --- a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf +++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf @@ -18,4 +18,4 @@ resource "aws_cloudwatch_metric_alarm" "lambda_dlq_messages" { } tags = local.default_tags -} \ No newline at end of file +} diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf index bddbffc..15f9b14 100644 --- a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf +++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf @@ -18,4 +18,4 @@ resource "aws_cloudwatch_metric_alarm" "lambda_duration" { } tags = local.default_tags -} \ No newline at end of file +} diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf index 87b6b7c..59a38c6 100644 --- a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf +++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf @@ -45,4 +45,4 @@ resource "aws_cloudwatch_metric_alarm" "lambda_error_rate" { } tags = local.default_tags -} \ No newline at end of file +} diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf index f445bb9..1180e4d 100644 --- a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf +++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf @@ -18,4 +18,4 @@ resource "aws_cloudwatch_metric_alarm" "lambda_throttles" { } tags = local.default_tags -} \ No newline at end of file +}