Skip to content

Commit b9849d9

Browse files
Copilottmikula-dev
andcommitted
Add /health endpoint for service monitoring
- Created HandlerHealth class with dependency checks for Kafka, EventBridge, and PostgreSQL - Added /health route to lambda_handler - Implemented uptime tracking and status reporting - Added comprehensive tests for healthy and degraded states - Updated OpenAPI spec with /health endpoint documentation Co-authored-by: tmikula-dev <72911271+tmikula-dev@users.noreply.github.com>
1 parent 61a9ba2 commit b9849d9

4 files changed

Lines changed: 305 additions & 0 deletions

File tree

conf/api.yaml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,41 @@ paths:
3838
'303':
3939
description: Redirect to actual address of Loing service which performs auth up to its capabilities
4040

41+
/health:
42+
get:
43+
summary: Service health check
44+
description: Service health and dependency status check
45+
responses:
46+
'200':
47+
description: Service is healthy
48+
content:
49+
application/json:
50+
schema:
51+
type: object
52+
properties:
53+
status:
54+
type: string
55+
example: ok
56+
uptime_seconds:
57+
type: integer
58+
example: 12345
59+
'503':
60+
description: Service is degraded
61+
content:
62+
application/json:
63+
schema:
64+
type: object
65+
properties:
66+
status:
67+
type: string
68+
example: degraded
69+
details:
70+
type: object
71+
additionalProperties:
72+
type: string
73+
example:
74+
kafka: not_initialized
75+
4176
/topics:
4277
get:
4378
summary: Get a list of topics

src/event_gate_lambda.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
from src.handlers.handler_token import HandlerToken
2828
from src.handlers.handler_topic import HandlerTopic
29+
from src.handlers.handler_health import HandlerHealth
2930
from src.utils.constants import SSL_CA_BUNDLE_KEY
3031
from src.utils.utils import build_error_response
3132
from src.writers import writer_eventbridge, writer_kafka, writer_postgres
@@ -85,6 +86,9 @@
8586
# Initialize topic handler and load topic schemas
8687
handler_topic = HandlerTopic(CONF_DIR, ACCESS, handler_token).load_topic_schemas()
8788

89+
# Initialize health handler
90+
handler_health = HandlerHealth(logger, config)
91+
8892

8993
def get_api() -> Dict[str, Any]:
9094
"""Return the OpenAPI specification text."""
@@ -108,6 +112,8 @@ def lambda_handler(event: Dict[str, Any], _context: Any = None) -> Dict[str, Any
108112
return get_api()
109113
if resource == "/token":
110114
return handler_token.get_token_provider_info()
115+
if resource == "/health":
116+
return handler_health.get_health()
111117
if resource == "/topics":
112118
return handler_topic.get_topics_list()
113119
if resource == "/topics/{topic_name}":

src/handlers/handler_health.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#
2+
# Copyright 2025 ABSA Group Limited
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
"""
18+
This module provides the HandlerHealth class for service health monitoring.
19+
"""
20+
import json
21+
import logging
22+
import os
23+
from datetime import datetime, timezone
24+
from typing import Dict, Any
25+
26+
from src.writers import writer_eventbridge, writer_kafka, writer_postgres
27+
28+
logger = logging.getLogger(__name__)
29+
log_level = os.environ.get("LOG_LEVEL", "INFO")
30+
logger.setLevel(log_level)
31+
32+
33+
class HandlerHealth:
34+
"""
35+
HandlerHealth manages service health checks and dependency status monitoring.
36+
"""
37+
38+
def __init__(self, logger_instance: logging.Logger, config: Dict[str, Any]):
39+
"""
40+
Initialize the health handler.
41+
42+
Args:
43+
logger_instance: Shared application logger.
44+
config: Configuration dictionary.
45+
"""
46+
self.logger = logger_instance
47+
self.config = config
48+
self.start_time = datetime.now(timezone.utc)
49+
50+
def get_health(self) -> Dict[str, Any]:
51+
"""
52+
Check service health and return status.
53+
54+
Performs lightweight dependency checks by verifying that writer STATE
55+
dictionaries are properly initialized with required keys.
56+
57+
Returns:
58+
Dict[str, Any]: API Gateway response with health status.
59+
- 200: All dependencies healthy
60+
- 503: One or more dependencies not initialized
61+
"""
62+
logger.debug("Handling GET Health")
63+
64+
details: Dict[str, str] = {}
65+
all_healthy = True
66+
67+
# Check Kafka writer STATE
68+
kafka_state = writer_kafka.STATE
69+
if not all(key in kafka_state for key in ["logger", "producer"]):
70+
details["kafka"] = "not_initialized"
71+
all_healthy = False
72+
logger.debug("Kafka writer not properly initialized")
73+
74+
# Check EventBridge writer STATE
75+
eventbridge_state = writer_eventbridge.STATE
76+
if not all(key in eventbridge_state for key in ["logger", "client", "event_bus_arn"]):
77+
details["eventbridge"] = "not_initialized"
78+
all_healthy = False
79+
logger.debug("EventBridge writer not properly initialized")
80+
81+
# Check PostgreSQL writer - it uses global logger variable and POSTGRES dict
82+
# Just verify the module is accessible (init is always called in event_gate_lambda)
83+
try:
84+
_ = writer_postgres.logger
85+
except AttributeError:
86+
details["postgres"] = "not_initialized"
87+
all_healthy = False
88+
logger.debug("PostgreSQL writer not accessible")
89+
90+
# Calculate uptime
91+
uptime_seconds = int((datetime.now(timezone.utc) - self.start_time).total_seconds())
92+
93+
if all_healthy:
94+
logger.debug("Health check passed - all dependencies healthy")
95+
return {
96+
"statusCode": 200,
97+
"headers": {"Content-Type": "application/json"},
98+
"body": json.dumps({"status": "ok", "uptime_seconds": uptime_seconds}),
99+
}
100+
101+
logger.debug("Health check degraded - some dependencies not initialized: %s", details)
102+
return {
103+
"statusCode": 503,
104+
"headers": {"Content-Type": "application/json"},
105+
"body": json.dumps({"status": "degraded", "details": details}),
106+
}
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
#
2+
# Copyright 2025 ABSA Group Limited
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
import json
18+
from unittest.mock import MagicMock, patch
19+
import logging
20+
21+
from src.handlers.handler_health import HandlerHealth
22+
23+
24+
## get_health() - healthy state
25+
def test_get_health_all_dependencies_healthy():
26+
"""Health check returns 200 when all writer STATEs are properly initialized."""
27+
logger = logging.getLogger("test")
28+
config = {}
29+
handler = HandlerHealth(logger, config)
30+
31+
# Mock all writers as healthy
32+
with (
33+
patch("src.handlers.handler_health.writer_kafka.STATE", {"logger": logger, "producer": MagicMock()}),
34+
patch(
35+
"src.handlers.handler_health.writer_eventbridge.STATE",
36+
{
37+
"logger": logger,
38+
"client": MagicMock(),
39+
"event_bus_arn": "arn:aws:events:us-east-1:123456789012:event-bus/my-bus",
40+
},
41+
),
42+
patch("src.handlers.handler_health.writer_postgres.logger", logger),
43+
):
44+
response = handler.get_health()
45+
46+
assert response["statusCode"] == 200
47+
body = json.loads(response["body"])
48+
assert body["status"] == "ok"
49+
assert "uptime_seconds" in body
50+
assert isinstance(body["uptime_seconds"], int)
51+
assert body["uptime_seconds"] >= 0
52+
53+
54+
## get_health() - degraded state - kafka
55+
def test_get_health_kafka_not_initialized():
56+
"""Health check returns 503 when Kafka writer is not initialized."""
57+
logger = logging.getLogger("test")
58+
config = {}
59+
handler = HandlerHealth(logger, config)
60+
61+
# Mock Kafka as not initialized (missing producer key)
62+
with (
63+
patch("src.handlers.handler_health.writer_kafka.STATE", {"logger": logger}),
64+
patch(
65+
"src.handlers.handler_health.writer_eventbridge.STATE",
66+
{"logger": logger, "client": MagicMock(), "event_bus_arn": "arn"},
67+
),
68+
patch("src.handlers.handler_health.writer_postgres.logger", logger),
69+
):
70+
response = handler.get_health()
71+
72+
assert response["statusCode"] == 503
73+
body = json.loads(response["body"])
74+
assert body["status"] == "degraded"
75+
assert "details" in body
76+
assert "kafka" in body["details"]
77+
assert body["details"]["kafka"] == "not_initialized"
78+
79+
80+
## get_health() - degraded state - eventbridge
81+
def test_get_health_eventbridge_not_initialized():
82+
"""Health check returns 503 when EventBridge writer is not initialized."""
83+
logger = logging.getLogger("test")
84+
config = {}
85+
handler = HandlerHealth(logger, config)
86+
87+
# Mock EventBridge as not initialized (missing client key)
88+
with (
89+
patch("src.handlers.handler_health.writer_kafka.STATE", {"logger": logger, "producer": MagicMock()}),
90+
patch("src.handlers.handler_health.writer_eventbridge.STATE", {"logger": logger}),
91+
patch("src.handlers.handler_health.writer_postgres.logger", logger),
92+
):
93+
response = handler.get_health()
94+
95+
assert response["statusCode"] == 503
96+
body = json.loads(response["body"])
97+
assert body["status"] == "degraded"
98+
assert "eventbridge" in body["details"]
99+
assert body["details"]["eventbridge"] == "not_initialized"
100+
101+
102+
## get_health() - degraded state - multiple failures
103+
def test_get_health_multiple_dependencies_not_initialized():
104+
"""Health check returns 503 when multiple writers are not initialized."""
105+
logger = logging.getLogger("test")
106+
config = {}
107+
handler = HandlerHealth(logger, config)
108+
109+
# Mock multiple writers as not initialized
110+
with (
111+
patch("src.handlers.handler_health.writer_kafka.STATE", {}),
112+
patch("src.handlers.handler_health.writer_eventbridge.STATE", {}),
113+
patch("src.handlers.handler_health.writer_postgres", spec=[]), # spec=[] makes logger not exist
114+
):
115+
response = handler.get_health()
116+
117+
assert response["statusCode"] == 503
118+
body = json.loads(response["body"])
119+
assert body["status"] == "degraded"
120+
assert len(body["details"]) >= 2 # At least kafka and eventbridge
121+
assert "kafka" in body["details"]
122+
assert "eventbridge" in body["details"]
123+
124+
125+
## get_health() - uptime calculation
126+
def test_get_health_uptime_is_positive():
127+
"""Verify uptime_seconds is calculated and is a positive integer."""
128+
logger = logging.getLogger("test")
129+
config = {}
130+
handler = HandlerHealth(logger, config)
131+
132+
with (
133+
patch("src.handlers.handler_health.writer_kafka.STATE", {"logger": logger, "producer": MagicMock()}),
134+
patch(
135+
"src.handlers.handler_health.writer_eventbridge.STATE",
136+
{"logger": logger, "client": MagicMock(), "event_bus_arn": "arn"},
137+
),
138+
patch("src.handlers.handler_health.writer_postgres.logger", logger),
139+
):
140+
response = handler.get_health()
141+
142+
body = json.loads(response["body"])
143+
assert "uptime_seconds" in body
144+
assert isinstance(body["uptime_seconds"], int)
145+
assert body["uptime_seconds"] >= 0
146+
147+
148+
## Integration test with event_gate_module
149+
def test_health_endpoint_integration(event_gate_module, make_event):
150+
"""Test /health endpoint through lambda_handler."""
151+
event = make_event("/health")
152+
resp = event_gate_module.lambda_handler(event)
153+
154+
# Should return 200 since writers are mocked as initialized in conftest
155+
assert resp["statusCode"] == 200
156+
body = json.loads(resp["body"])
157+
assert body["status"] == "ok"
158+
assert "uptime_seconds" in body

0 commit comments

Comments
 (0)