From a346bf5f5d3094b4a525a7d4dfcb21e41d179c6c Mon Sep 17 00:00:00 2001 From: Hardik Desai Date: Fri, 22 May 2026 14:03:27 +0100 Subject: [PATCH] added new resource for the health check Failure Alert --- infrastructure/modules/function-app/alerts.tf | 39 +++++++++++++++++++ .../modules/function-app/variables.tf | 24 ++++++++++++ 2 files changed, 63 insertions(+) diff --git a/infrastructure/modules/function-app/alerts.tf b/infrastructure/modules/function-app/alerts.tf index 5c888fe7..63ed3973 100644 --- a/infrastructure/modules/function-app/alerts.tf +++ b/infrastructure/modules/function-app/alerts.tf @@ -61,3 +61,42 @@ resource "azurerm_monitor_metric_alert" "function_5xx" { ] } } + +# Health Check Failure Alert — fires when failed health check probes exceed threshold +resource "azurerm_monitor_scheduled_query_rules_alert_v2" "health_check_failures" { + count = var.enable_alerting && var.health_check_alert_enabled && var.health_check_path != null && var.health_check_path != "" ? 1 : 0 + + name = "${azurerm_linux_function_app.function_app.name}-health-check-failures" + resource_group_name = var.resource_group_name_monitoring != null ? var.resource_group_name_monitoring : var.resource_group_name + location = var.location + + evaluation_frequency = var.health_check_alert_evaluation_frequency + window_duration = var.health_check_alert_window_duration + scopes = [var.log_analytics_workspace_id] + severity = 1 # Error + + criteria { + query = <<-KQL + FunctionAppLogs + | where _ResourceId has "${lower(azurerm_linux_function_app.function_app.name)}" + | where Category == "Function.health" + | where Level == "Error" or Message has "Failed" + | summarize FailureCount = count() + KQL + + time_aggregation_method = "Total" + threshold = var.health_check_alert_threshold + operator = "GreaterThan" + metric_measure_column = "FailureCount" + } + + description = "Alert when health check failures for ${azurerm_linux_function_app.function_app.name} exceed ${var.health_check_alert_threshold} in ${var.health_check_alert_window_duration}." + + action { + action_groups = [var.action_group_id] + } + + lifecycle { + ignore_changes = [tags] + } +} diff --git a/infrastructure/modules/function-app/variables.tf b/infrastructure/modules/function-app/variables.tf index 325372d8..3dc57df8 100644 --- a/infrastructure/modules/function-app/variables.tf +++ b/infrastructure/modules/function-app/variables.tf @@ -315,6 +315,30 @@ variable "enable_alerting" { default = false } +variable "health_check_alert_enabled" { + type = bool + description = "Whether to create a Log Analytics alert for health check failures. Requires enable_alerting = true and health_check_path to be set." + default = false +} + +variable "health_check_alert_threshold" { + type = number + description = "Number of health check failures in the evaluation window to trigger the alert." + default = 10 +} + +variable "health_check_alert_evaluation_frequency" { + type = string + description = "How often the scheduled query rule is evaluated. ISO 8601 duration (e.g., PT5M)." + default = "PT5M" +} + +variable "health_check_alert_window_duration" { + type = string + description = "The time window over which health check failures are counted. ISO 8601 duration (e.g., PT5M)." + default = "PT5M" +} + variable "log_analytics_workspace_id" { type = string description = "id of the log analytics workspace to send resource logging to via diagnostic settings"