diff --git a/infrastructure/terraform/modules/lambda/README.md b/infrastructure/terraform/modules/lambda/README.md index 9eb2e44..84dca0b 100644 --- a/infrastructure/terraform/modules/lambda/README.md +++ b/infrastructure/terraform/modules/lambda/README.md @@ -1,3 +1,20 @@ +# Pass-through Output Example + +If you are consuming this module from a higher-level module or environment stack, +you can re-expose the alarm details with outputs such as: + +```hcl +output "processor_lambda_error_rate_alarm_name" { + description = "CloudWatch alarm name for processor Lambda error rate" + value = module.my_lambda.lambda_error_rate_alarm_name +} + +output "processor_lambda_error_rate_alarm_arn" { + description = "CloudWatch alarm ARN for processor Lambda error rate" + value = module.my_lambda.lambda_error_rate_alarm_arn +} +``` + @@ -19,7 +36,11 @@ | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | | [description](#input\_description) | Description of the Lambda | `string` | n/a | yes | | [enable\_dlq\_and\_notifications](#input\_enable\_dlq\_and\_notifications) | Create an SQS Queue and on-failure destination to be used as the Lambda's Dead Letter Queue and notifications | `bool` | `false` | no | +| [enable\_dlq\_messages\_alarm](#input\_enable\_dlq\_messages\_alarm) | Create a CloudWatch alarm when visible messages are present in the Lambda DLQ | `bool` | `true` | no | +| [enable\_duration\_alarm](#input\_enable\_duration\_alarm) | Create a CloudWatch alarm when Lambda duration percentile exceeds the configured threshold | `bool` | `true` | no | +| [enable\_error\_rate\_alarm](#input\_enable\_error\_rate\_alarm) | Create a CloudWatch alarm when Lambda error rate exceeds the configured percentage threshold | `bool` | `true` | no | | [enable\_lambda\_insights](#input\_enable\_lambda\_insights) | Enable the lambda insights layer, this must be disabled for lambda@edge usage | `bool` | `true` | no | +| [enable\_throttles\_alarm](#input\_enable\_throttles\_alarm) | Create a CloudWatch alarm when Lambda throttles exceed the configured threshold | `bool` | `true` | no | | [enable\_xray\_tracing](#input\_enable\_xray\_tracing) | Enable AWS X-Ray active tracing for the Lambda function. | `bool` | `false` | no | | [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes | | [filter\_pattern](#input\_filter\_pattern) | Filter pattern to use for the log subscription filter | `string` | `""` | no | @@ -39,7 +60,11 @@ | [kms\_key\_arn](#input\_kms\_key\_arn) | KMS key arn to use for this function | `string` | n/a | yes | | [lambda\_at\_edge](#input\_lambda\_at\_edge) | Whether this Lambda is a Lambda@Edge function | `bool` | `false` | no | | [lambda\_dlq\_message\_retention\_seconds](#input\_lambda\_dlq\_message\_retention\_seconds) | The number of seconds to retain messages in the Lambda DLQ SQS queue | `number` | `1209600` | no | +| [lambda\_dlq\_messages\_alarm\_config](#input\_lambda\_dlq\_messages\_alarm\_config) | Object of optional CloudWatch alarm settings for the Lambda DLQ messages alarm |
object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
statistic = optional(string, "Sum")
threshold = optional(number, 0)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
}) | `{}` | no |
+| [lambda\_duration\_alarm\_config](#input\_lambda\_duration\_alarm\_config) | Object of optional CloudWatch alarm settings for the Lambda duration percentile alarm | object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
percentile = optional(string, "p95")
threshold_ms = optional(number)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
}) | `{}` | no |
| [lambda\_env\_vars](#input\_lambda\_env\_vars) | Lambda environment parameters map | `map(string)` | `{}` | no |
+| [lambda\_error\_rate\_alarm\_config](#input\_lambda\_error\_rate\_alarm\_config) | Object of optional CloudWatch alarm settings for the Lambda error rate alarm | object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
threshold = optional(number, 1)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
}) | `{}` | no |
+| [lambda\_throttles\_alarm\_config](#input\_lambda\_throttles\_alarm\_config) | Object of optional CloudWatch alarm settings for the Lambda throttles alarm | object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
statistic = optional(string, "Sum")
threshold = optional(number, 0)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
}) | `{}` | no |
| [layers](#input\_layers) | Lambda layer arns to include | `list(any)` | `[]` | no |
| [log\_destination\_arn](#input\_log\_destination\_arn) | Destination ARN to use for the log subscription filter | `string` | `""` | no |
| [log\_level](#input\_log\_level) | The log level to be used in lambda functions within the component. Any log with a lower severity than the configured value will not be logged: https://docs.python.org/3/library/logging.html#levels | `string` | `"INFO"` | no |
@@ -72,6 +97,14 @@
| [function\_qualified\_arn](#output\_function\_qualified\_arn) | Qualified ARN of the Lambda function, including version or alias |
| [iam\_role\_arn](#output\_iam\_role\_arn) | ARN of the IAM role associated with the Lambda function |
| [iam\_role\_name](#output\_iam\_role\_name) | Name of the IAM role associated with the Lambda function |
+| [lambda\_dlq\_messages\_alarm\_arn](#output\_lambda\_dlq\_messages\_alarm\_arn) | The ARN of the CloudWatch alarm for Lambda DLQ messages |
+| [lambda\_dlq\_messages\_alarm\_name](#output\_lambda\_dlq\_messages\_alarm\_name) | The name of the CloudWatch alarm for Lambda DLQ messages |
+| [lambda\_duration\_alarm\_arn](#output\_lambda\_duration\_alarm\_arn) | The ARN of the CloudWatch alarm for Lambda duration percentile |
+| [lambda\_duration\_alarm\_name](#output\_lambda\_duration\_alarm\_name) | The name of the CloudWatch alarm for Lambda duration percentile |
+| [lambda\_error\_rate\_alarm\_arn](#output\_lambda\_error\_rate\_alarm\_arn) | The ARN of the CloudWatch alarm for Lambda error rate |
+| [lambda\_error\_rate\_alarm\_name](#output\_lambda\_error\_rate\_alarm\_name) | The name of the CloudWatch alarm for Lambda error rate |
+| [lambda\_throttles\_alarm\_arn](#output\_lambda\_throttles\_alarm\_arn) | The ARN of the CloudWatch alarm for Lambda throttles |
+| [lambda\_throttles\_alarm\_name](#output\_lambda\_throttles\_alarm\_name) | The name of the CloudWatch alarm for Lambda throttles |
diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf
new file mode 100644
index 0000000..08f9ff9
--- /dev/null
+++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_dlq_messages.tf
@@ -0,0 +1,21 @@
+resource "aws_cloudwatch_metric_alarm" "lambda_dlq_messages" {
+ count = var.enable_dlq_and_notifications && var.enable_dlq_messages_alarm ? 1 : 0
+
+ alarm_name = "${local.csi}-lambda-dlq-messages-alarm"
+ alarm_description = "RELIABILITY: Alarm when visible messages are present in the Lambda DLQ"
+ comparison_operator = var.lambda_dlq_messages_alarm_config.comparison_operator
+ evaluation_periods = var.lambda_dlq_messages_alarm_config.evaluation_periods
+ metric_name = "ApproximateNumberOfMessagesVisible"
+ namespace = "AWS/SQS"
+ period = var.lambda_dlq_messages_alarm_config.period
+ statistic = var.lambda_dlq_messages_alarm_config.statistic
+ threshold = var.lambda_dlq_messages_alarm_config.threshold
+ actions_enabled = var.lambda_dlq_messages_alarm_config.actions_enabled
+ treat_missing_data = var.lambda_dlq_messages_alarm_config.treat_missing_data
+
+ dimensions = {
+ QueueName = aws_sqs_queue.lambda_dlq[0].name
+ }
+
+ tags = local.default_tags
+}
diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf
new file mode 100644
index 0000000..15f9b14
--- /dev/null
+++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_duration.tf
@@ -0,0 +1,21 @@
+resource "aws_cloudwatch_metric_alarm" "lambda_duration" {
+ count = var.enable_duration_alarm ? 1 : 0
+
+ alarm_name = "${local.csi}-lambda-duration-alarm"
+ alarm_description = "RELIABILITY: Alarm when Lambda duration percentile exceeds the configured threshold"
+ comparison_operator = var.lambda_duration_alarm_config.comparison_operator
+ evaluation_periods = var.lambda_duration_alarm_config.evaluation_periods
+ metric_name = "Duration"
+ namespace = "AWS/Lambda"
+ period = var.lambda_duration_alarm_config.period
+ extended_statistic = var.lambda_duration_alarm_config.percentile
+ threshold = coalesce(var.lambda_duration_alarm_config.threshold_ms, var.timeout * 800)
+ actions_enabled = var.lambda_duration_alarm_config.actions_enabled
+ treat_missing_data = var.lambda_duration_alarm_config.treat_missing_data
+
+ dimensions = {
+ FunctionName = aws_lambda_function.main.function_name
+ }
+
+ tags = local.default_tags
+}
diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf
new file mode 100644
index 0000000..59a38c6
--- /dev/null
+++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_error_rate.tf
@@ -0,0 +1,48 @@
+resource "aws_cloudwatch_metric_alarm" "lambda_error_rate" {
+ count = var.enable_error_rate_alarm ? 1 : 0
+
+ alarm_name = "${local.csi}-lambda-error-rate-alarm"
+ alarm_description = "RELIABILITY: Alarm when Lambda error rate exceeds the configured percentage threshold"
+ comparison_operator = var.lambda_error_rate_alarm_config.comparison_operator
+ evaluation_periods = var.lambda_error_rate_alarm_config.evaluation_periods
+ threshold = var.lambda_error_rate_alarm_config.threshold
+ actions_enabled = var.lambda_error_rate_alarm_config.actions_enabled
+ treat_missing_data = var.lambda_error_rate_alarm_config.treat_missing_data
+
+ metric_query {
+ id = "e1"
+ expression = "IF(m2>0,(m1/m2)*100,0)"
+ label = "ErrorRatePercent"
+ return_data = true
+ }
+
+ metric_query {
+ id = "m1"
+
+ metric {
+ metric_name = "Errors"
+ namespace = "AWS/Lambda"
+ period = var.lambda_error_rate_alarm_config.period
+ stat = "Sum"
+ dimensions = {
+ FunctionName = aws_lambda_function.main.function_name
+ }
+ }
+ }
+
+ metric_query {
+ id = "m2"
+
+ metric {
+ metric_name = "Invocations"
+ namespace = "AWS/Lambda"
+ period = var.lambda_error_rate_alarm_config.period
+ stat = "Sum"
+ dimensions = {
+ FunctionName = aws_lambda_function.main.function_name
+ }
+ }
+ }
+
+ tags = local.default_tags
+}
diff --git a/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf
new file mode 100644
index 0000000..1180e4d
--- /dev/null
+++ b/infrastructure/terraform/modules/lambda/cloudwatch_metric_alarm_lambda_throttles.tf
@@ -0,0 +1,21 @@
+resource "aws_cloudwatch_metric_alarm" "lambda_throttles" {
+ count = var.enable_throttles_alarm ? 1 : 0
+
+ alarm_name = "${local.csi}-lambda-throttles-alarm"
+ alarm_description = "RELIABILITY: Alarm when Lambda throttles exceed the configured threshold"
+ comparison_operator = var.lambda_throttles_alarm_config.comparison_operator
+ evaluation_periods = var.lambda_throttles_alarm_config.evaluation_periods
+ metric_name = "Throttles"
+ namespace = "AWS/Lambda"
+ period = var.lambda_throttles_alarm_config.period
+ statistic = var.lambda_throttles_alarm_config.statistic
+ threshold = var.lambda_throttles_alarm_config.threshold
+ actions_enabled = var.lambda_throttles_alarm_config.actions_enabled
+ treat_missing_data = var.lambda_throttles_alarm_config.treat_missing_data
+
+ dimensions = {
+ FunctionName = aws_lambda_function.main.function_name
+ }
+
+ tags = local.default_tags
+}
diff --git a/infrastructure/terraform/modules/lambda/outputs.tf b/infrastructure/terraform/modules/lambda/outputs.tf
index eb9191e..c93080b 100644
--- a/infrastructure/terraform/modules/lambda/outputs.tf
+++ b/infrastructure/terraform/modules/lambda/outputs.tf
@@ -37,3 +37,43 @@ output "cloudwatch_log_group_name" {
description = "Name of the CloudWatch Log Group for the Lambda function"
value = aws_cloudwatch_log_group.main.name
}
+
+output "lambda_error_rate_alarm_name" {
+ description = "The name of the CloudWatch alarm for Lambda error rate"
+ value = var.enable_error_rate_alarm ? aws_cloudwatch_metric_alarm.lambda_error_rate[0].alarm_name : null
+}
+
+output "lambda_error_rate_alarm_arn" {
+ description = "The ARN of the CloudWatch alarm for Lambda error rate"
+ value = var.enable_error_rate_alarm ? aws_cloudwatch_metric_alarm.lambda_error_rate[0].arn : null
+}
+
+output "lambda_throttles_alarm_name" {
+ description = "The name of the CloudWatch alarm for Lambda throttles"
+ value = var.enable_throttles_alarm ? aws_cloudwatch_metric_alarm.lambda_throttles[0].alarm_name : null
+}
+
+output "lambda_throttles_alarm_arn" {
+ description = "The ARN of the CloudWatch alarm for Lambda throttles"
+ value = var.enable_throttles_alarm ? aws_cloudwatch_metric_alarm.lambda_throttles[0].arn : null
+}
+
+output "lambda_duration_alarm_name" {
+ description = "The name of the CloudWatch alarm for Lambda duration percentile"
+ value = var.enable_duration_alarm ? aws_cloudwatch_metric_alarm.lambda_duration[0].alarm_name : null
+}
+
+output "lambda_duration_alarm_arn" {
+ description = "The ARN of the CloudWatch alarm for Lambda duration percentile"
+ value = var.enable_duration_alarm ? aws_cloudwatch_metric_alarm.lambda_duration[0].arn : null
+}
+
+output "lambda_dlq_messages_alarm_name" {
+ description = "The name of the CloudWatch alarm for Lambda DLQ messages"
+ value = var.enable_dlq_and_notifications && var.enable_dlq_messages_alarm ? aws_cloudwatch_metric_alarm.lambda_dlq_messages[0].alarm_name : null
+}
+
+output "lambda_dlq_messages_alarm_arn" {
+ description = "The ARN of the CloudWatch alarm for Lambda DLQ messages"
+ value = var.enable_dlq_and_notifications && var.enable_dlq_messages_alarm ? aws_cloudwatch_metric_alarm.lambda_dlq_messages[0].arn : null
+}
diff --git a/infrastructure/terraform/modules/lambda/variables.tf b/infrastructure/terraform/modules/lambda/variables.tf
index fe079e2..c4b488c 100644
--- a/infrastructure/terraform/modules/lambda/variables.tf
+++ b/infrastructure/terraform/modules/lambda/variables.tf
@@ -274,6 +274,85 @@ variable "enable_xray_tracing" {
default = false
}
+variable "enable_error_rate_alarm" {
+ type = bool
+ description = "Create a CloudWatch alarm when Lambda error rate exceeds the configured percentage threshold"
+ default = true
+}
+
+variable "lambda_error_rate_alarm_config" {
+ description = "Object of optional CloudWatch alarm settings for the Lambda error rate alarm"
+ type = object({
+ comparison_operator = optional(string, "GreaterThanThreshold")
+ evaluation_periods = optional(number, 1)
+ period = optional(number, 300)
+ threshold = optional(number, 1)
+ actions_enabled = optional(bool, true)
+ treat_missing_data = optional(string, "notBreaching")
+ })
+ default = {}
+}
+
+variable "enable_throttles_alarm" {
+ type = bool
+ description = "Create a CloudWatch alarm when Lambda throttles exceed the configured threshold"
+ default = true
+}
+
+variable "lambda_throttles_alarm_config" {
+ description = "Object of optional CloudWatch alarm settings for the Lambda throttles alarm"
+ type = object({
+ comparison_operator = optional(string, "GreaterThanThreshold")
+ evaluation_periods = optional(number, 1)
+ period = optional(number, 300)
+ statistic = optional(string, "Sum")
+ threshold = optional(number, 0)
+ actions_enabled = optional(bool, true)
+ treat_missing_data = optional(string, "notBreaching")
+ })
+ default = {}
+}
+
+variable "enable_duration_alarm" {
+ type = bool
+ description = "Create a CloudWatch alarm when Lambda duration percentile exceeds the configured threshold"
+ default = true
+}
+
+variable "lambda_duration_alarm_config" {
+ description = "Object of optional CloudWatch alarm settings for the Lambda duration percentile alarm"
+ type = object({
+ comparison_operator = optional(string, "GreaterThanThreshold")
+ evaluation_periods = optional(number, 1)
+ period = optional(number, 300)
+ percentile = optional(string, "p95")
+ threshold_ms = optional(number)
+ actions_enabled = optional(bool, true)
+ treat_missing_data = optional(string, "notBreaching")
+ })
+ default = {}
+}
+
+variable "enable_dlq_messages_alarm" {
+ type = bool
+ description = "Create a CloudWatch alarm when visible messages are present in the Lambda DLQ"
+ default = true
+}
+
+variable "lambda_dlq_messages_alarm_config" {
+ description = "Object of optional CloudWatch alarm settings for the Lambda DLQ messages alarm"
+ type = object({
+ comparison_operator = optional(string, "GreaterThanThreshold")
+ evaluation_periods = optional(number, 1)
+ period = optional(number, 300)
+ statistic = optional(string, "Sum")
+ threshold = optional(number, 0)
+ actions_enabled = optional(bool, true)
+ treat_missing_data = optional(string, "notBreaching")
+ })
+ default = {}
+}
+
variable "lambda_at_edge" {
type = bool
description = "Whether this Lambda is a Lambda@Edge function"