From c91ffff75dfc65436122c0e8b45281baeb688ad8 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 13 Mar 2026 19:20:22 +0000 Subject: [PATCH 01/15] feat(logging): Logging improvements --- pyproject.toml | 3 + src/common/core/constants.py | 2 + src/common/core/logging.py | 180 ++++++++++++- src/common/core/main.py | 8 +- src/common/core/sentry.py | 22 ++ src/common/gunicorn/logging.py | 29 ++- tests/unit/common/core/test_logging.py | 285 ++++++++++++++++++++- tests/unit/common/core/test_main.py | 45 ++++ tests/unit/common/core/test_sentry.py | 51 ++++ tests/unit/common/gunicorn/test_logging.py | 4 +- uv.lock | 28 ++ 11 files changed, 636 insertions(+), 21 deletions(-) create mode 100644 src/common/core/sentry.py create mode 100644 tests/unit/common/core/test_main.py create mode 100644 tests/unit/common/core/test_sentry.py diff --git a/pyproject.toml b/pyproject.toml index 911f3645..823c4462 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,8 @@ optional-dependencies = { test-tools = [ "psycopg2-binary (>=2.9,<3)", "requests", "simplejson (>=3,<4)", + "structlog (>=24.4,<26)", + "typing_extensions", ], task-processor = [ "backoff (>=2.2.1,<3.0.0)", "django (>4,<6)", @@ -82,6 +84,7 @@ dev = [ "pytest-freezegun (>=0.4.2, <1.0.0)", "pytest-httpserver (>=1.1.3, <2.0.0)", "pytest-mock (>=3.14.0, <4.0.0)", + "sentry-sdk (>=2.0.0, <3.0.0)", "setuptools (>=78.1.1, <79.0.0)", "types-python-dateutil (>=2.9.0.20250516, <3.0.0)", "types-simplejson (>=3.20.0.20250326, <4.0.0)", diff --git a/src/common/core/constants.py b/src/common/core/constants.py index b55a6ca0..9e41f99b 100644 --- a/src/common/core/constants.py +++ b/src/common/core/constants.py @@ -1 +1,3 @@ DEFAULT_PROMETHEUS_MULTIPROC_DIR_NAME = "flagsmith-prometheus" + +LOGGING_DEFAULT_ROOT_LOG_LEVEL = "WARNING" diff --git a/src/common/core/logging.py b/src/common/core/logging.py index 44534688..60c180ac 100644 --- a/src/common/core/logging.py +++ b/src/common/core/logging.py @@ -1,14 +1,39 @@ import json import logging +import logging.config +import os +import threading from typing import Any +import structlog +import structlog.contextvars +import structlog.dev +import structlog.processors +import structlog.stdlib +from structlog.typing import EventDict, Processor, WrappedLogger +from typing_extensions import TypedDict + +from common.core.constants import LOGGING_DEFAULT_ROOT_LOG_LEVEL + +logger = logging.getLogger(__name__) + + +class JsonRecord(TypedDict, extra_items=Any, total=False): # type: ignore[call-arg] # TODO https://github.com/python/mypy/issues/18176 + levelname: str + message: str + timestamp: str + logger_name: str + pid: int | None + thread_name: str | None + exc_info: str + class JsonFormatter(logging.Formatter): """Custom formatter for json logs.""" - def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]: + def get_json_record(self, record: logging.LogRecord) -> JsonRecord: formatted_message = record.getMessage() - json_record = { + json_record: JsonRecord = { "levelname": record.levelname, "message": formatted_message, "timestamp": self.formatTime(record, self.datefmt), @@ -22,3 +47,154 @@ def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]: def format(self, record: logging.LogRecord) -> str: return json.dumps(self.get_json_record(record)) + + +def setup_logging( + log_level: str = "INFO", + log_format: str = "generic", + logging_configuration_file: str | None = None, + extra_processors: list[Processor] | None = None, + application_loggers: list[str] | None = None, +) -> None: + """ + Set up logging for the application. + + This should be called early, before Django settings are loaded, to ensure + that all log output is properly formatted from the start. + + Args: + log_level: The log level for application code (e.g. "DEBUG", "INFO"). + log_format: Either "generic" or "json". + logging_configuration_file: Path to a JSON logging config file. + If provided, this takes precedence over other format options. + extra_processors: Additional structlog processors (e.g. OTel, Sentry). + application_loggers: Top-level logger names for application packages. + These loggers are set to ``log_level`` while the root logger uses + ``max(log_level, WARNING)`` to suppress noise from third-party + libraries. If ``log_level`` is DEBUG, everything logs at DEBUG. + """ + if extra_processors is None: + extra_processors = [] + + if logging_configuration_file: + with open(logging_configuration_file) as f: + config = json.load(f) + logging.config.dictConfig(config) + else: + log_level_int = logging.getLevelNamesMapping()[log_level.upper()] + root_level_int = logging.getLevelNamesMapping()[LOGGING_DEFAULT_ROOT_LOG_LEVEL] + # Suppress third-party noise at WARNING, but if the user requests + # DEBUG, honour that for the entire process. + effective_root_level = ( + log_level_int if log_level_int < logging.INFO else root_level_int + ) + + dict_config: dict[str, Any] = { + "version": 1, + "disable_existing_loggers": False, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "stream": "ext://sys.stdout", + "level": log_level, + }, + }, + "root": { + "level": effective_root_level, + "handlers": ["console"], + }, + "loggers": { + name: {"level": log_level, "handlers": [], "propagate": True} + for name in application_loggers or [] + }, + } + logging.config.dictConfig(dict_config) + + setup_structlog(log_format=log_format, extra_processors=extra_processors) + + +def map_event_to_json_record( + logger: WrappedLogger, + method_name: str, + event_dict: EventDict, +) -> EventDict: + """Map structlog fields to match :class:`JsonFormatter` output schema.""" + record: JsonRecord = { + "message": event_dict.pop("event", ""), + "levelname": event_dict.pop("level", "").upper(), + "logger_name": event_dict.pop("logger", ""), + "pid": os.getpid(), + "thread_name": threading.current_thread().name, + } + if "exception" in event_dict: + record["exc_info"] = event_dict.pop("exception") + # Merge remaining structlog keys (e.g. extra_key from bind()) with the + # canonical record so they appear in the JSON output. + event_dict.update(record) + return event_dict + + +def build_processor_formatter(log_format: str) -> structlog.stdlib.ProcessorFormatter: + """Build a ``ProcessorFormatter`` for the given log format. + + This is used both by :func:`setup_structlog` (for the root logger) and by + Gunicorn logger classes so that all log output shares the same rendering. + """ + if log_format == "json": + renderer_processors: list[Processor] = [ + map_event_to_json_record, + structlog.processors.JSONRenderer(), + ] + else: + renderer_processors = [ + structlog.dev.ConsoleRenderer(), + ] + + foreign_pre_chain: list[Processor] = [ + structlog.contextvars.merge_contextvars, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.processors.TimeStamper(fmt="iso"), + structlog.stdlib.ExtraAdder(), + ] + + return structlog.stdlib.ProcessorFormatter( + processors=[ + structlog.stdlib.ProcessorFormatter.remove_processors_meta, + *renderer_processors, + ], + foreign_pre_chain=foreign_pre_chain, + ) + + +def setup_structlog( + log_format: str, + extra_processors: list[Processor], +) -> None: + """Configure structlog to route through stdlib logging.""" + formatter = build_processor_formatter(log_format) + + # Replace the formatter on existing root handlers with ProcessorFormatter. + root = logging.getLogger() + for handler in root.handlers: + handler.setFormatter(formatter) + + structlog.configure( + processors=[ + structlog.contextvars.merge_contextvars, + structlog.stdlib.filter_by_level, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.StackInfoRenderer(), + structlog.processors.UnicodeDecoder(), + structlog.processors.format_exc_info, + structlog.processors.TimeStamper(fmt="iso"), + *extra_processors, + structlog.stdlib.ProcessorFormatter.wrap_for_formatter, + ], + wrapper_class=structlog.stdlib.BoundLogger, + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory(), + cache_logger_on_first_use=True, + ) diff --git a/src/common/core/main.py b/src/common/core/main.py index c1638064..6a47a7f9 100644 --- a/src/common/core/main.py +++ b/src/common/core/main.py @@ -10,6 +10,7 @@ ) from common.core.cli import healthcheck +from common.core.logging import setup_logging logger = logging.getLogger(__name__) @@ -32,7 +33,12 @@ def ensure_cli_env() -> typing.Generator[None, None, None]: """ ctx = contextlib.ExitStack() - # TODO @khvn26 Move logging setup to here + # Set up logging early, before Django settings are loaded. + setup_logging( + log_level=os.environ.get("LOG_LEVEL", "INFO"), + log_format=os.environ.get("LOG_FORMAT", "generic"), + logging_configuration_file=os.environ.get("LOGGING_CONFIGURATION_FILE"), + ) # Prometheus multiproc support if not os.environ.get("PROMETHEUS_MULTIPROC_DIR"): diff --git a/src/common/core/sentry.py b/src/common/core/sentry.py new file mode 100644 index 00000000..3bfd552f --- /dev/null +++ b/src/common/core/sentry.py @@ -0,0 +1,22 @@ +import sentry_sdk +from structlog.typing import EventDict, WrappedLogger + +_SKIP_CONTEXT_FIELDS = frozenset({"event", "level", "timestamp", "_record"}) + + +def sentry_processor( + logger: WrappedLogger, + method_name: str, + event_dict: EventDict, +) -> EventDict: + """ + Structlog processor that enriches Sentry with structured context and tags. + + Since structlog routes through stdlib, Sentry's LoggingIntegration + automatically captures ERROR+ logs as Sentry events. This processor + adds structured context on top of that. + """ + context = {k: v for k, v in event_dict.items() if k not in _SKIP_CONTEXT_FIELDS} + sentry_sdk.set_context("structlog", context) + + return event_dict diff --git a/src/common/gunicorn/logging.py b/src/common/gunicorn/logging.py index 16a54508..01f5cbb8 100644 --- a/src/common/gunicorn/logging.py +++ b/src/common/gunicorn/logging.py @@ -11,7 +11,7 @@ Statsd as StatsdGunicornLogger, ) -from common.core.logging import JsonFormatter +from common.core.logging import JsonFormatter, JsonRecord, build_processor_formatter from common.gunicorn import metrics from common.gunicorn.constants import ( WSGI_EXTRA_PREFIX, @@ -21,6 +21,18 @@ from common.gunicorn.utils import get_extra +class GunicornAccessLogJsonRecord(JsonRecord, extra_items=Any, total=False): # type: ignore[call-arg] # TODO https://github.com/python/mypy/issues/18176 + time: str + path: str + remote_ip: str + route: str + method: str + status: str + user_agent: str + duration_in_ms: int + response_size_in_bytes: int + + class GunicornAccessLogJsonFormatter(JsonFormatter): def _get_extra(self, record_args: dict[str, Any]) -> dict[str, Any]: ret: dict[str, dict[str, Any]] = {} @@ -49,7 +61,7 @@ def _get_extra(self, record_args: dict[str, Any]) -> dict[str, Any]: return ret - def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]: + def get_json_record(self, record: logging.LogRecord) -> GunicornAccessLogJsonRecord: args = record.args if TYPE_CHECKING: @@ -61,6 +73,7 @@ def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]: return { **super().get_json_record(record), + **self._get_extra(args), # type: ignore[typeddict-item] # TODO https://github.com/python/mypy/issues/18176 "time": datetime.strptime(args["t"], "[%d/%b/%Y:%H:%M:%S %z]").isoformat(), "path": url, "remote_ip": args["h"], @@ -70,7 +83,6 @@ def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]: "user_agent": args["a"], "duration_in_ms": args["M"], "response_size_in_bytes": args["B"] or 0, - **self._get_extra(args), } @@ -106,15 +118,14 @@ def access( class GunicornJsonCapableLogger(PrometheusGunicornLogger): def setup(self, cfg: Config) -> None: super().setup(cfg) - if getattr(settings, "LOG_FORMAT", None) == "json": - self._set_handler( - self.error_log, - cfg.errorlog, - JsonFormatter(), - ) + log_format = getattr(settings, "LOG_FORMAT", "generic") + if log_format == "json": self._set_handler( self.access_log, cfg.accesslog, GunicornAccessLogJsonFormatter(), stream=sys.stdout, ) + formatter = build_processor_formatter(log_format) + for handler in self.error_log.handlers: + handler.setFormatter(formatter) diff --git a/tests/unit/common/core/test_logging.py b/tests/unit/common/core/test_logging.py index e42bd70e..83902d52 100644 --- a/tests/unit/common/core/test_logging.py +++ b/tests/unit/common/core/test_logging.py @@ -1,10 +1,27 @@ import json import logging import os +from pathlib import Path +from unittest.mock import MagicMock import pytest +import structlog -from common.core.logging import JsonFormatter +from common.core.logging import JsonFormatter, setup_logging + + +@pytest.fixture(autouse=True) +def reset_logging() -> None: + """Reset logging and structlog state before each test.""" + root = logging.getLogger() + for handler in root.handlers[:]: + root.removeHandler(handler) + structlog.reset_defaults() + + +@pytest.fixture +def test_app_loggers() -> list[str]: + return ["test"] @pytest.mark.freeze_time("2023-12-08T06:05:47+00:00") @@ -21,12 +38,6 @@ def test_json_formatter__format_log__outputs_expected( expected_pid = os.getpid() expected_module_path = os.path.abspath(request.path) - expected_tb_string = ( - "Traceback (most recent call last):\n" - f' File "{expected_module_path}",' - " line 33, in _log_traceback\n" - " raise Exception()\nException" - ) def _log_traceback() -> None: try: @@ -34,6 +45,14 @@ def _log_traceback() -> None: except Exception as exc: logger.error("this is an error", exc_info=exc) + expected_lineno = _log_traceback.__code__.co_firstlineno + 2 + expected_tb_string = ( + "Traceback (most recent call last):\n" + f' File "{expected_module_path}",' + f" line {expected_lineno}, in _log_traceback\n" + " raise Exception()\nException" + ) + # When logger.info("hello %s, %d", "arg1", 22.22) _log_traceback() @@ -58,3 +77,255 @@ def _log_traceback() -> None: "exc_info": expected_tb_string, }, ] + + +def test_setup_logging__generic_format__configures_stdlib( + capsys: pytest.CaptureFixture[str], + test_app_loggers: list[str], +) -> None: + # Given + setup_logging( + log_level="DEBUG", log_format="generic", application_loggers=test_app_loggers + ) + test_logger = logging.getLogger("test.generic") + + # When + test_logger.info("hello world") + + # Then + output = capsys.readouterr().out + assert "hello world" in output + + +def test_setup_logging__json_format__stdlib_outputs_json( + capsys: pytest.CaptureFixture[str], + test_app_loggers: list[str], +) -> None: + # Given + setup_logging( + log_level="DEBUG", log_format="json", application_loggers=test_app_loggers + ) + test_logger = logging.getLogger("test.json_stdlib") + + # When + test_logger.info("stdlib json message") + + # Then + output = capsys.readouterr().out.strip() + parsed = json.loads(output) + assert parsed["message"] == "stdlib json message" + assert parsed["levelname"] == "INFO" + assert parsed["logger_name"] == "test.json_stdlib" + assert "pid" in parsed + assert "thread_name" in parsed + + +def test_setup_logging__json_format__structlog_outputs_json( + capsys: pytest.CaptureFixture[str], + test_app_loggers: list[str], +) -> None: + # Given + setup_logging( + log_level="DEBUG", log_format="json", application_loggers=test_app_loggers + ) + structured_logger = structlog.get_logger("test.json_structlog") + + # When + structured_logger.info("structlog json message", extra_key="extra_value") + + # Then + output = capsys.readouterr().out.strip() + parsed = json.loads(output) + assert parsed["message"] == "structlog json message" + assert parsed["levelname"] == "INFO" + assert parsed["logger_name"] == "test.json_structlog" + assert parsed["pid"] == os.getpid() + assert parsed["extra_key"] == "extra_value" + + +def test_setup_logging__generic_format__structlog_outputs_console( + capsys: pytest.CaptureFixture[str], + test_app_loggers: list[str], +) -> None: + # Given + setup_logging( + log_level="DEBUG", log_format="generic", application_loggers=test_app_loggers + ) + structured_logger = structlog.get_logger("test.generic_structlog") + + # When + structured_logger.info("structlog console message") + + # Then + output = capsys.readouterr().out + assert "structlog console message" in output + + +def test_setup_logging__custom_config_file__loads_dictconfig( + tmp_path: Path, +) -> None: + # Given + config = { + "version": 1, + "disable_existing_loggers": False, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "stream": "ext://sys.stdout", + }, + }, + "loggers": { + "custom_config_test": { + "level": "CRITICAL", + "handlers": ["console"], + }, + }, + } + config_file = tmp_path / "logging.json" + config_file.write_text(json.dumps(config)) + + # When + setup_logging(logging_configuration_file=str(config_file)) + + # Then + custom_logger = logging.getLogger("custom_config_test") + assert custom_logger.level == logging.CRITICAL + + +def test_setup_logging__extra_processors__passed_to_structlog( + test_app_loggers: list[str], +) -> None: + # Given + mock_processor = MagicMock() + setup_logging( + log_level="DEBUG", + log_format="generic", + extra_processors=[mock_processor], + application_loggers=test_app_loggers, + ) + structured_logger = structlog.get_logger("test.extra_proc") + + # When + structured_logger.info("test extra") + + # Then + mock_processor.assert_called_once() + event_dict = mock_processor.call_args[0][2] + assert event_dict["event"] == "test extra" + + +def test_setup_logging__json_format__structlog_exception_included( + capsys: pytest.CaptureFixture[str], + test_app_loggers: list[str], +) -> None: + # Given + setup_logging( + log_level="DEBUG", log_format="json", application_loggers=test_app_loggers + ) + structured_logger = structlog.get_logger("test.exc") + + # When + try: + raise ValueError("boom") + except ValueError: + structured_logger.exception("something failed") + + # Then + output = capsys.readouterr().out.strip() + parsed = json.loads(output) + assert parsed["message"] == "something failed" + assert parsed["levelname"] == "ERROR" + assert "exc_info" in parsed + assert "ValueError: boom" in parsed["exc_info"] + + +def test_setup_logging__json_format__stdlib_and_structlog_same_schema( + capsys: pytest.CaptureFixture[str], + test_app_loggers: list[str], +) -> None: + # Given + setup_logging( + log_level="DEBUG", log_format="json", application_loggers=test_app_loggers + ) + stdlib_logger = logging.getLogger("test.schema.stdlib") + structured_logger = structlog.get_logger("test.schema.structlog") + + # When + stdlib_logger.info("stdlib message") + stdlib_output = capsys.readouterr().out.strip() + + structured_logger.info("structlog message") + structlog_output = capsys.readouterr().out.strip() + + # Then + expected_keys = { + "timestamp", + "message", + "levelname", + "logger_name", + "pid", + "thread_name", + } + stdlib_keys = set(json.loads(stdlib_output).keys()) + structlog_keys = set(json.loads(structlog_output).keys()) + assert stdlib_keys == structlog_keys == expected_keys + + +def test_setup_logging__info_level__root_at_warning() -> None: + # Given / When + setup_logging(log_level="INFO", log_format="generic") + + # Then + root = logging.getLogger() + assert root.level == logging.WARNING + + +def test_setup_logging__debug_level__root_at_debug() -> None: + # Given / When + setup_logging(log_level="DEBUG", log_format="generic") + + # Then + root = logging.getLogger() + assert root.level == logging.DEBUG + + +def test_setup_logging__application_loggers__set_to_log_level() -> None: + # Given / When + setup_logging( + log_level="INFO", + log_format="generic", + application_loggers=["myapp", "mylib"], + ) + + # Then + root = logging.getLogger() + assert root.level == logging.WARNING + assert logging.getLogger("myapp").level == logging.INFO + assert logging.getLogger("mylib").level == logging.INFO + + +def test_setup_logging__application_loggers__third_party_filtered( + capsys: pytest.CaptureFixture[str], +) -> None: + # Given + setup_logging(log_level="INFO", log_format="generic", application_loggers=["myapp"]) + app_logger = logging.getLogger("myapp.views") + third_party_logger = logging.getLogger("urllib3.connectionpool") + + # When + app_logger.info("app message") + third_party_logger.info("noisy third-party message") + + # Then + output = capsys.readouterr().out + assert "app message" in output + assert "noisy third-party message" not in output + + +def test_setup_logging__default_args__root_at_warning() -> None: + # Given / When + setup_logging() + + # Then + root = logging.getLogger() + assert root.level == logging.WARNING diff --git a/tests/unit/common/core/test_main.py b/tests/unit/common/core/test_main.py new file mode 100644 index 00000000..1cb6aae2 --- /dev/null +++ b/tests/unit/common/core/test_main.py @@ -0,0 +1,45 @@ +import os +from unittest.mock import patch + +from common.core.main import ensure_cli_env + + +def test_ensure_cli_env__env_vars_set__calls_setup_logging_with_env_values() -> None: + # Given + env = { + "LOG_LEVEL": "WARNING", + "LOG_FORMAT": "json", + "LOGGING_CONFIGURATION_FILE": "/tmp/logging.json", + } + + # When + with ( + patch.dict(os.environ, env, clear=False), + patch("common.core.main.setup_logging") as mock_setup, + ensure_cli_env(), + ): + pass + + # Then + mock_setup.assert_called_once_with( + log_level="WARNING", + log_format="json", + logging_configuration_file="/tmp/logging.json", + ) + + +def test_ensure_cli_env__no_env_vars__calls_setup_logging_with_defaults() -> None: + # Given / When + with ( + patch.dict(os.environ, {}, clear=False), + patch("common.core.main.setup_logging") as mock_setup, + ensure_cli_env(), + ): + pass + + # Then + mock_setup.assert_called_once_with( + log_level="INFO", + log_format="generic", + logging_configuration_file=None, + ) diff --git a/tests/unit/common/core/test_sentry.py b/tests/unit/common/core/test_sentry.py new file mode 100644 index 00000000..7f7ce263 --- /dev/null +++ b/tests/unit/common/core/test_sentry.py @@ -0,0 +1,51 @@ +from unittest.mock import patch + +from common.core.sentry import sentry_processor + + +def test_sentry_processor__context_set__includes_non_meta_fields() -> None: + # Given + event_dict = { + "event": "something happened", + "level": "error", + "timestamp": "2024-01-01T00:00:00", + "environment_id": "123", + "extra_field": "value", + } + + # When + with patch("common.core.sentry.sentry_sdk") as mock_sentry: + result = sentry_processor(None, "error", event_dict) + + # Then + assert result is event_dict + mock_sentry.set_context.assert_called_once_with( + "structlog", + { + "environment_id": "123", + "extra_field": "value", + }, + ) + + +def test_sentry_processor__meta_fields_present__excludes_from_context() -> None: + # Given + event_dict = { + "event": "test", + "level": "info", + "timestamp": "2024-01-01T00:00:00", + "_record": "internal", + "custom_field": "keep_me", + } + + # When + with patch("common.core.sentry.sentry_sdk") as mock_sentry: + sentry_processor(None, "info", event_dict) + + # Then + context = mock_sentry.set_context.call_args[0][1] + assert "event" not in context + assert "level" not in context + assert "timestamp" not in context + assert "_record" not in context + assert context["custom_field"] == "keep_me" diff --git a/tests/unit/common/gunicorn/test_logging.py b/tests/unit/common/gunicorn/test_logging.py index f501b52d..e84990d4 100644 --- a/tests/unit/common/gunicorn/test_logging.py +++ b/tests/unit/common/gunicorn/test_logging.py @@ -3,11 +3,11 @@ from datetime import datetime, timedelta import pytest +import structlog.stdlib from gunicorn.config import AccessLogFormat, Config # type: ignore[import-untyped] from pytest_django.fixtures import SettingsWrapper from pytest_mock import MockerFixture -from common.core.logging import JsonFormatter from common.gunicorn.logging import ( GunicornAccessLogJsonFormatter, GunicornJsonCapableLogger, @@ -150,7 +150,7 @@ def test_gunicorn_json_capable_logger__json_log_format__sets_expected_formatters # Then assert isinstance( logger.error_log.handlers[0].formatter, - JsonFormatter, + structlog.stdlib.ProcessorFormatter, ) assert isinstance( logger.access_log.handlers[0].formatter, diff --git a/uv.lock b/uv.lock index 11c16d11..531ad7d0 100644 --- a/uv.lock +++ b/uv.lock @@ -457,6 +457,8 @@ common-core = [ { name = "psycopg2-binary" }, { name = "requests" }, { name = "simplejson" }, + { name = "structlog" }, + { name = "typing-extensions" }, ] flagsmith-schemas = [ { name = "flagsmith-flag-engine" }, @@ -491,6 +493,7 @@ dev = [ { name = "pytest-freezegun" }, { name = "pytest-httpserver" }, { name = "pytest-mock" }, + { name = "sentry-sdk" }, { name = "setuptools" }, { name = "types-python-dateutil" }, { name = "types-simplejson" }, @@ -518,6 +521,8 @@ requires-dist = [ { name = "requests", marker = "extra == 'common-core'" }, { name = "simplejson", marker = "extra == 'common-core'", specifier = ">=3,<4" }, { name = "simplejson", marker = "extra == 'flagsmith-schemas'" }, + { name = "structlog", marker = "extra == 'common-core'", specifier = ">=24.4,<26" }, + { name = "typing-extensions", marker = "extra == 'common-core'" }, { name = "typing-extensions", marker = "extra == 'flagsmith-schemas'" }, ] provides-extras = ["test-tools", "common-core", "task-processor", "flagsmith-schemas"] @@ -539,6 +544,7 @@ dev = [ { name = "pytest-freezegun", specifier = ">=0.4.2,<1.0.0" }, { name = "pytest-httpserver", specifier = ">=1.1.3,<2.0.0" }, { name = "pytest-mock", specifier = ">=3.14.0,<4.0.0" }, + { name = "sentry-sdk", specifier = ">=2.0.0,<3.0.0" }, { name = "setuptools", specifier = ">=78.1.1,<79.0.0" }, { name = "types-python-dateutil", specifier = ">=2.9.0.20250516,<3.0.0" }, { name = "types-simplejson", specifier = ">=3.20.0.20250326,<4.0.0" }, @@ -1543,6 +1549,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/24/4d91e05817e92e3a61c8a21e08fd0f390f5301f1c448b137c57c4bc6e543/semver-3.0.4-py3-none-any.whl", hash = "sha256:9c824d87ba7f7ab4a1890799cec8596f15c1241cb473404ea1cb0c55e4b04746", size = 17912, upload-time = "2025-01-24T13:19:24.949Z" }, ] +[[package]] +name = "sentry-sdk" +version = "2.54.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c8/e9/2e3a46c304e7fa21eaa70612f60354e32699c7102eb961f67448e222ad7c/sentry_sdk-2.54.0.tar.gz", hash = "sha256:2620c2575128d009b11b20f7feb81e4e4e8ae08ec1d36cbc845705060b45cc1b", size = 413813, upload-time = "2026-03-02T15:12:41.355Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/39/be412cc86bc6247b8f69e9383d7950711bd86f8d0a4a4b0fe8fad685bc21/sentry_sdk-2.54.0-py2.py3-none-any.whl", hash = "sha256:fd74e0e281dcda63afff095d23ebcd6e97006102cdc8e78a29f19ecdf796a0de", size = 439198, upload-time = "2026-03-02T15:12:39.546Z" }, +] + [[package]] name = "setuptools" version = "78.1.1" @@ -1618,6 +1637,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/70/001ee337f7aa888fb2e3f5fd7592a6afc5283adb1ed44ce8df5764070f22/sqlparse-0.5.4-py3-none-any.whl", hash = "sha256:99a9f0314977b76d776a0fcb8554de91b9bb8a18560631d6bc48721d07023dcb", size = 45933, upload-time = "2025-11-28T07:10:19.73Z" }, ] +[[package]] +name = "structlog" +version = "25.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/52/9ba0f43b686e7f3ddfeaa78ac3af750292662284b3661e91ad5494f21dbc/structlog-25.5.0.tar.gz", hash = "sha256:098522a3bebed9153d4570c6d0288abf80a031dfdb2048d59a49e9dc2190fc98", size = 1460830, upload-time = "2025-10-27T08:28:23.028Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" }, +] + [[package]] name = "tomli" version = "2.3.0" From 59a8c129af21bb488aca9a39b8e4dd86c42965c8 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 13 Mar 2026 19:34:09 +0000 Subject: [PATCH 02/15] intro APPLICATION_LOGGERS --- src/common/core/main.py | 7 +++++++ tests/unit/common/core/test_main.py | 3 +++ 2 files changed, 10 insertions(+) diff --git a/src/common/core/main.py b/src/common/core/main.py index 6a47a7f9..fed21db4 100644 --- a/src/common/core/main.py +++ b/src/common/core/main.py @@ -34,10 +34,17 @@ def ensure_cli_env() -> typing.Generator[None, None, None]: ctx = contextlib.ExitStack() # Set up logging early, before Django settings are loaded. + application_loggers_env = os.environ.get("APPLICATION_LOGGERS", "") setup_logging( log_level=os.environ.get("LOG_LEVEL", "INFO"), log_format=os.environ.get("LOG_FORMAT", "generic"), logging_configuration_file=os.environ.get("LOGGING_CONFIGURATION_FILE"), + application_loggers=[ + logger_name + for name in application_loggers_env.split(",") + if (logger_name := name.strip()) + ] + or None, ) # Prometheus multiproc support diff --git a/tests/unit/common/core/test_main.py b/tests/unit/common/core/test_main.py index 1cb6aae2..f8a40b98 100644 --- a/tests/unit/common/core/test_main.py +++ b/tests/unit/common/core/test_main.py @@ -10,6 +10,7 @@ def test_ensure_cli_env__env_vars_set__calls_setup_logging_with_env_values() -> "LOG_LEVEL": "WARNING", "LOG_FORMAT": "json", "LOGGING_CONFIGURATION_FILE": "/tmp/logging.json", + "APPLICATION_LOGGERS": "myapp,mylib", } # When @@ -25,6 +26,7 @@ def test_ensure_cli_env__env_vars_set__calls_setup_logging_with_env_values() -> log_level="WARNING", log_format="json", logging_configuration_file="/tmp/logging.json", + application_loggers=["myapp", "mylib"], ) @@ -42,4 +44,5 @@ def test_ensure_cli_env__no_env_vars__calls_setup_logging_with_defaults() -> Non log_level="INFO", log_format="generic", logging_configuration_file=None, + application_loggers=None, ) From b32ea42772cd6953cd49503d247a1a66608d9b92 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 13 Mar 2026 19:40:13 +0000 Subject: [PATCH 03/15] fix tests --- .../task_processor/test_unit_task_processor_decorators.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/task_processor/test_unit_task_processor_decorators.py b/tests/unit/task_processor/test_unit_task_processor_decorators.py index 27c577bf..d54a380a 100644 --- a/tests/unit/task_processor/test_unit_task_processor_decorators.py +++ b/tests/unit/task_processor/test_unit_task_processor_decorators.py @@ -61,7 +61,8 @@ def my_function(*args: str, **kwargs: str) -> None: assert len(caplog.records) == 1 assert ( - caplog.records[0].message == "Running function my_function in unmanaged thread." + caplog.records[0].getMessage() + == "Running function my_function in unmanaged thread." ) @@ -92,7 +93,8 @@ def my_function(*args: typing.Any, **kwargs: typing.Any) -> None: assert len(caplog.records) == 1 assert ( - caplog.records[0].message == "Running function my_function in unmanaged thread." + caplog.records[0].getMessage() + == "Running function my_function in unmanaged thread." ) From 83cb8a18f5907e4c7fb612060f37f4e60ee685a9 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sun, 15 Mar 2026 10:58:51 +0000 Subject: [PATCH 04/15] no optional sentry --- pyproject.toml | 2 +- src/common/core/logging.py | 12 ++++-------- tests/unit/common/core/test_logging.py | 18 +++++++++--------- uv.lock | 4 ++-- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 823c4462..9a67eeba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ optional-dependencies = { test-tools = [ "prometheus-client (>=0.0.16)", "psycopg2-binary (>=2.9,<3)", "requests", + "sentry-sdk (>=2.0.0,<3.0.0)", "simplejson (>=3,<4)", "structlog (>=24.4,<26)", "typing_extensions", @@ -84,7 +85,6 @@ dev = [ "pytest-freezegun (>=0.4.2, <1.0.0)", "pytest-httpserver (>=1.1.3, <2.0.0)", "pytest-mock (>=3.14.0, <4.0.0)", - "sentry-sdk (>=2.0.0, <3.0.0)", "setuptools (>=78.1.1, <79.0.0)", "types-python-dateutil (>=2.9.0.20250516, <3.0.0)", "types-simplejson (>=3.20.0.20250326, <4.0.0)", diff --git a/src/common/core/logging.py b/src/common/core/logging.py index 60c180ac..e8f98b7f 100644 --- a/src/common/core/logging.py +++ b/src/common/core/logging.py @@ -53,7 +53,6 @@ def setup_logging( log_level: str = "INFO", log_format: str = "generic", logging_configuration_file: str | None = None, - extra_processors: list[Processor] | None = None, application_loggers: list[str] | None = None, ) -> None: """ @@ -67,15 +66,11 @@ def setup_logging( log_format: Either "generic" or "json". logging_configuration_file: Path to a JSON logging config file. If provided, this takes precedence over other format options. - extra_processors: Additional structlog processors (e.g. OTel, Sentry). application_loggers: Top-level logger names for application packages. These loggers are set to ``log_level`` while the root logger uses ``max(log_level, WARNING)`` to suppress noise from third-party libraries. If ``log_level`` is DEBUG, everything logs at DEBUG. """ - if extra_processors is None: - extra_processors = [] - if logging_configuration_file: with open(logging_configuration_file) as f: config = json.load(f) @@ -110,7 +105,7 @@ def setup_logging( } logging.config.dictConfig(dict_config) - setup_structlog(log_format=log_format, extra_processors=extra_processors) + setup_structlog(log_format=log_format) def map_event_to_json_record( @@ -169,9 +164,10 @@ def build_processor_formatter(log_format: str) -> structlog.stdlib.ProcessorForm def setup_structlog( log_format: str, - extra_processors: list[Processor], ) -> None: """Configure structlog to route through stdlib logging.""" + from common.core.sentry import sentry_processor + formatter = build_processor_formatter(log_format) # Replace the formatter on existing root handlers with ProcessorFormatter. @@ -190,7 +186,7 @@ def setup_structlog( structlog.processors.UnicodeDecoder(), structlog.processors.format_exc_info, structlog.processors.TimeStamper(fmt="iso"), - *extra_processors, + sentry_processor, structlog.stdlib.ProcessorFormatter.wrap_for_formatter, ], wrapper_class=structlog.stdlib.BoundLogger, diff --git a/tests/unit/common/core/test_logging.py b/tests/unit/common/core/test_logging.py index 83902d52..1e1b3c7a 100644 --- a/tests/unit/common/core/test_logging.py +++ b/tests/unit/common/core/test_logging.py @@ -2,7 +2,7 @@ import logging import os from pathlib import Path -from unittest.mock import MagicMock +from unittest.mock import patch import pytest import structlog @@ -192,26 +192,26 @@ def test_setup_logging__custom_config_file__loads_dictconfig( assert custom_logger.level == logging.CRITICAL -def test_setup_logging__extra_processors__passed_to_structlog( +def test_setup_logging__structlog_configured__sentry_processor_wired( + capsys: pytest.CaptureFixture[str], test_app_loggers: list[str], ) -> None: # Given - mock_processor = MagicMock() setup_logging( log_level="DEBUG", log_format="generic", - extra_processors=[mock_processor], application_loggers=test_app_loggers, ) - structured_logger = structlog.get_logger("test.extra_proc") + structured_logger = structlog.get_logger("test.sentry_proc") # When - structured_logger.info("test extra") + with patch("common.core.sentry.sentry_sdk") as mock_sentry: + structured_logger.info("test sentry context", environment_id="env-1") # Then - mock_processor.assert_called_once() - event_dict = mock_processor.call_args[0][2] - assert event_dict["event"] == "test extra" + mock_sentry.set_context.assert_called_once() + context = mock_sentry.set_context.call_args[0][1] + assert context["environment_id"] == "env-1" def test_setup_logging__json_format__structlog_exception_included( diff --git a/uv.lock b/uv.lock index 531ad7d0..8b09a512 100644 --- a/uv.lock +++ b/uv.lock @@ -456,6 +456,7 @@ common-core = [ { name = "prometheus-client" }, { name = "psycopg2-binary" }, { name = "requests" }, + { name = "sentry-sdk" }, { name = "simplejson" }, { name = "structlog" }, { name = "typing-extensions" }, @@ -493,7 +494,6 @@ dev = [ { name = "pytest-freezegun" }, { name = "pytest-httpserver" }, { name = "pytest-mock" }, - { name = "sentry-sdk" }, { name = "setuptools" }, { name = "types-python-dateutil" }, { name = "types-simplejson" }, @@ -519,6 +519,7 @@ requires-dist = [ { name = "pyfakefs", marker = "extra == 'test-tools'", specifier = ">=5,<6" }, { name = "pytest-django", marker = "extra == 'test-tools'", specifier = ">=4,<5" }, { name = "requests", marker = "extra == 'common-core'" }, + { name = "sentry-sdk", marker = "extra == 'common-core'", specifier = ">=2.0.0,<3.0.0" }, { name = "simplejson", marker = "extra == 'common-core'", specifier = ">=3,<4" }, { name = "simplejson", marker = "extra == 'flagsmith-schemas'" }, { name = "structlog", marker = "extra == 'common-core'", specifier = ">=24.4,<26" }, @@ -544,7 +545,6 @@ dev = [ { name = "pytest-freezegun", specifier = ">=0.4.2,<1.0.0" }, { name = "pytest-httpserver", specifier = ">=1.1.3,<2.0.0" }, { name = "pytest-mock", specifier = ">=3.14.0,<4.0.0" }, - { name = "sentry-sdk", specifier = ">=2.0.0,<3.0.0" }, { name = "setuptools", specifier = ">=78.1.1,<79.0.0" }, { name = "types-python-dateutil", specifier = ">=2.9.0.20250516,<3.0.0" }, { name = "types-simplejson", specifier = ">=3.20.0.20250326,<4.0.0" }, From ca49486412d3961292cbd4f417755e31dd56d30e Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sun, 15 Mar 2026 11:12:17 +0000 Subject: [PATCH 05/15] update docs --- src/common/gunicorn/logging.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/common/gunicorn/logging.py b/src/common/gunicorn/logging.py index 01f5cbb8..ffaba199 100644 --- a/src/common/gunicorn/logging.py +++ b/src/common/gunicorn/logging.py @@ -34,6 +34,17 @@ class GunicornAccessLogJsonRecord(JsonRecord, extra_items=Any, total=False): # class GunicornAccessLogJsonFormatter(JsonFormatter): + """JSON formatter for Gunicorn access logs. + + Extends :class:`~common.core.logging.JsonFormatter` with request-specific + fields extracted from Gunicorn's ``LogRecord.args`` dict. + + This uses a stdlib ``Formatter`` rather than a structlog + ``ProcessorFormatter`` because Gunicorn populates ``record.args`` with + its own format variables — working with the ``LogRecord`` directly is + the natural abstraction for this data. + """ + def _get_extra(self, record_args: dict[str, Any]) -> dict[str, Any]: ret: dict[str, dict[str, Any]] = {} @@ -43,7 +54,7 @@ def _get_extra(self, record_args: dict[str, Any]) -> dict[str, Any]: # Gunicorn's access log format string for # request headers, response headers and environ variables # without the % prefix, e.g. "{origin}i" or "{flagsmith.environment_id}e" - # https://docs.gunicorn.org/en/stable/settings.html#access-log-format + # https://gunicorn.org/reference/settings/#access_log_format for extra_key in extra_items_to_log: extra_key_lower = extra_key.lower() if ( @@ -87,6 +98,8 @@ def get_json_record(self, record: logging.LogRecord) -> GunicornAccessLogJsonRec class PrometheusGunicornLogger(StatsdGunicornLogger): # type: ignore[misc] + """Gunicorn logger that records Prometheus metrics on each access log entry.""" + def access( self, resp: Response, @@ -116,6 +129,22 @@ def access( class GunicornJsonCapableLogger(PrometheusGunicornLogger): + """Gunicorn logger that aligns formatting with the application logging setup. + + Gunicorn manages its own loggers (``gunicorn.error``, ``gunicorn.access``) + with ``propagate=False``, so they bypass the root handler configured by + :func:`~common.core.logging.setup_logging`. This class bridges that gap: + + * **Error log** — receives a ``ProcessorFormatter`` (via + :func:`~common.core.logging.build_processor_formatter`) so that + Gunicorn's operational messages share the same format and ISO timestamps + as application logs. + * **Access log (JSON mode)** — receives a + :class:`GunicornAccessLogJsonFormatter` that produces structured JSON + with request-specific fields. In generic mode the access log keeps + Gunicorn's default CLF format. + """ + def setup(self, cfg: Config) -> None: super().setup(cfg) log_format = getattr(settings, "LOG_FORMAT", "generic") From f0efca565cf543f7df7a75a6b4dfec071413a75f Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sat, 21 Mar 2026 11:43:29 +0000 Subject: [PATCH 06/15] sentry loggingintegration support --- src/common/core/logging.py | 31 ++++++++- tests/unit/common/core/test_logging.py | 96 ++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 3 deletions(-) diff --git a/src/common/core/logging.py b/src/common/core/logging.py index e8f98b7f..f57c3d88 100644 --- a/src/common/core/logging.py +++ b/src/common/core/logging.py @@ -129,8 +129,33 @@ def map_event_to_json_record( return event_dict -def build_processor_formatter(log_format: str) -> structlog.stdlib.ProcessorFormatter: - """Build a ``ProcessorFormatter`` for the given log format. +class _SentryFriendlyProcessorFormatter(structlog.stdlib.ProcessorFormatter): + """Preserves ``record.msg`` and ``record.args`` across formatting. + + Sentry's ``LoggingIntegration`` reads these fields *after* handlers run; + structlog's ``ProcessorFormatter`` replaces them with rendered output, breaking event + grouping. We snapshot before and restore after so Sentry sees the originals + and avoids failed event deduplication. + """ + + def format(self, record: logging.LogRecord) -> str: + # Snapshot the original fields before ProcessorFormatter + # replaces them with rendered output. + original_msg = record.msg + original_args = record.args + + formatted = super().format(record) + + # Restore so Sentry (and any other post-handler hook) sees + # the original message template and substitution args. + record.msg = original_msg + record.args = original_args + + return formatted + + +def build_processor_formatter(log_format: str) -> _SentryFriendlyProcessorFormatter: + """Build a ``_SentryFriendlyProcessorFormatter`` for the given log format. This is used both by :func:`setup_structlog` (for the root logger) and by Gunicorn logger classes so that all log output shares the same rendering. @@ -153,7 +178,7 @@ def build_processor_formatter(log_format: str) -> structlog.stdlib.ProcessorForm structlog.stdlib.ExtraAdder(), ] - return structlog.stdlib.ProcessorFormatter( + return _SentryFriendlyProcessorFormatter( processors=[ structlog.stdlib.ProcessorFormatter.remove_processors_meta, *renderer_processors, diff --git a/tests/unit/common/core/test_logging.py b/tests/unit/common/core/test_logging.py index 1e1b3c7a..e50b388d 100644 --- a/tests/unit/common/core/test_logging.py +++ b/tests/unit/common/core/test_logging.py @@ -2,13 +2,20 @@ import logging import os from pathlib import Path +from typing import TYPE_CHECKING, Generator from unittest.mock import patch import pytest +import sentry_sdk import structlog +from sentry_sdk.envelope import Envelope +from sentry_sdk.transport import Transport from common.core.logging import JsonFormatter, setup_logging +if TYPE_CHECKING: + from sentry_sdk._types import Event + @pytest.fixture(autouse=True) def reset_logging() -> None: @@ -24,6 +31,24 @@ def test_app_loggers() -> list[str]: return ["test"] +class MockSentryTransport(Transport): + def __init__(self) -> None: + self.events: list[Event] = [] + + def capture_envelope(self, envelope: Envelope) -> None: + if event := envelope.get_event(): + self.events.append(event) + + +@pytest.fixture +def sentry_transport_mock() -> Generator[MockSentryTransport, None, None]: + transport = MockSentryTransport() + sentry_sdk.init(transport=transport, transport_queue_size=0) + yield transport + sentry_sdk.flush() + sentry_sdk.init(transport=None) + + @pytest.mark.freeze_time("2023-12-08T06:05:47+00:00") def test_json_formatter__format_log__outputs_expected( caplog: pytest.LogCaptureFixture, @@ -322,6 +347,77 @@ def test_setup_logging__application_loggers__third_party_filtered( assert "noisy third-party message" not in output +def test_setup_logging__stdlib_error__sentry_captures_clean_message( + capsys: pytest.CaptureFixture[str], + test_app_loggers: list[str], + sentry_transport_mock: "MockSentryTransport", +) -> None: + """Verify Sentry receives the original message, not the rendered output. + + Sentry's ``LoggingIntegration`` monkey-patches ``Logger.callHandlers`` + and reads ``record.msg`` / ``record.args`` in a ``finally`` block + *after* all handlers (and formatters) have run:: + + try: + old_callhandlers(self, record) # formatters run here + finally: + integration._handle_record(record) # reads record.msg + + structlog's ``ProcessorFormatter.format()`` replaces ``record.msg`` + with rendered output (JSON/console) and clears ``record.args`` to + ``()``. Without ``_SentryFriendlyProcessorFormatter``, Sentry would + see a JSON blob as the message — breaking event grouping because every + rendered string (containing timestamps, PIDs, …) is unique. + + ``_SentryFriendlyProcessorFormatter`` snapshots and restores the + originals so Sentry sees the clean message template and args. + """ + # Given + setup_logging( + log_level="DEBUG", log_format="json", application_loggers=test_app_loggers + ) + test_logger = logging.getLogger("test.sentry_compat") + + # When + test_logger.error("order %s failed for %s", "ORD-123", "alice") + + # Then — the stream got JSON + output = capsys.readouterr().out.strip() + parsed = json.loads(output) + assert parsed["message"] == "order ORD-123 failed for alice" + + # And — Sentry received the original message template, not the JSON blob. + assert len(sentry_transport_mock.events) == 1 + logentry = sentry_transport_mock.events[0]["logentry"] + assert logentry["message"] == "order %s failed for %s" + assert logentry["params"] == ["ORD-123", "alice"] + assert logentry["formatted"] == "order ORD-123 failed for alice" + + +def test_setup_logging__structlog_error__sentry_captures_with_context( + capsys: pytest.CaptureFixture[str], + test_app_loggers: list[str], + sentry_transport_mock: "MockSentryTransport", +) -> None: + """Verify structlog errors reach Sentry with context from sentry_processor.""" + # Given + setup_logging( + log_level="DEBUG", log_format="json", application_loggers=test_app_loggers + ) + structured_logger = structlog.get_logger("test.sentry_structlog") + + # When + structured_logger.error("payment failed", order_id="ORD-456") + + # Then — Sentry captured the event + assert len(sentry_transport_mock.events) == 1 + event = sentry_transport_mock.events[0] + + # And — sentry_processor set the structlog context + assert "structlog" in event["contexts"] + assert event["contexts"]["structlog"]["order_id"] == "ORD-456" + + def test_setup_logging__default_args__root_at_warning() -> None: # Given / When setup_logging() From 7c84484676ad16e86a120560fa515a4e132687b1 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sat, 21 Mar 2026 13:42:36 +0000 Subject: [PATCH 07/15] streamline gunicorn logging --- src/common/core/logging.py | 42 +++- src/common/core/main.py | 24 +- src/common/gunicorn/logging.py | 154 +++++------- tests/unit/common/core/test_main.py | 5 +- tests/unit/common/gunicorn/test_logging.py | 275 +++++++++++++-------- 5 files changed, 292 insertions(+), 208 deletions(-) diff --git a/src/common/core/logging.py b/src/common/core/logging.py index f57c3d88..e025940b 100644 --- a/src/common/core/logging.py +++ b/src/common/core/logging.py @@ -2,6 +2,7 @@ import logging import logging.config import os +import sys import threading from typing import Any @@ -54,6 +55,7 @@ def setup_logging( log_format: str = "generic", logging_configuration_file: str | None = None, application_loggers: list[str] | None = None, + extra_foreign_processors: list[Processor] | None = None, ) -> None: """ Set up logging for the application. @@ -70,6 +72,9 @@ def setup_logging( These loggers are set to ``log_level`` while the root logger uses ``max(log_level, WARNING)`` to suppress noise from third-party libraries. If ``log_level`` is DEBUG, everything logs at DEBUG. + extra_foreign_processors: Additional structlog processors to run in + the ``foreign_pre_chain`` for stdlib log records (e.g. the + Gunicorn access log field extractor). """ if logging_configuration_file: with open(logging_configuration_file) as f: @@ -105,7 +110,9 @@ def setup_logging( } logging.config.dictConfig(dict_config) - setup_structlog(log_format=log_format) + setup_structlog( + log_format=log_format, extra_foreign_processors=extra_foreign_processors + ) def map_event_to_json_record( @@ -144,21 +151,38 @@ def format(self, record: logging.LogRecord) -> str: original_msg = record.msg original_args = record.args + # Stash original args on the record so foreign_pre_chain + # processors (e.g. the Gunicorn access log extractor) can + # access them — ProcessorFormatter clears record.args to () + # before running the chain. + record._original_args = original_args # noqa: SLF001 + formatted = super().format(record) # Restore so Sentry (and any other post-handler hook) sees # the original message template and substitution args. record.msg = original_msg record.args = original_args + record.__dict__.pop("_original_args", None) return formatted -def build_processor_formatter(log_format: str) -> _SentryFriendlyProcessorFormatter: +def _drop_internal_keys( + logger: WrappedLogger, method_name: str, event_dict: EventDict +) -> EventDict: + """Remove internal attributes that leak via ``ExtraAdder``.""" + event_dict.pop("_original_args", None) + return event_dict + + +def build_processor_formatter( + log_format: str, + extra_foreign_processors: list[Processor] | None = None, +) -> _SentryFriendlyProcessorFormatter: """Build a ``_SentryFriendlyProcessorFormatter`` for the given log format. - This is used both by :func:`setup_structlog` (for the root logger) and by - Gunicorn logger classes so that all log output shares the same rendering. + This is used by :func:`setup_structlog` for the root logger. """ if log_format == "json": renderer_processors: list[Processor] = [ @@ -166,8 +190,9 @@ def build_processor_formatter(log_format: str) -> _SentryFriendlyProcessorFormat structlog.processors.JSONRenderer(), ] else: + colors = sys.stdout.isatty() and structlog.dev._has_colors renderer_processors = [ - structlog.dev.ConsoleRenderer(), + structlog.dev.ConsoleRenderer(colors=colors), ] foreign_pre_chain: list[Processor] = [ @@ -176,6 +201,8 @@ def build_processor_formatter(log_format: str) -> _SentryFriendlyProcessorFormat structlog.stdlib.add_log_level, structlog.processors.TimeStamper(fmt="iso"), structlog.stdlib.ExtraAdder(), + _drop_internal_keys, + *(extra_foreign_processors or []), ] return _SentryFriendlyProcessorFormatter( @@ -189,11 +216,14 @@ def build_processor_formatter(log_format: str) -> _SentryFriendlyProcessorFormat def setup_structlog( log_format: str, + extra_foreign_processors: list[Processor] | None = None, ) -> None: """Configure structlog to route through stdlib logging.""" from common.core.sentry import sentry_processor - formatter = build_processor_formatter(log_format) + formatter = build_processor_formatter( + log_format, extra_foreign_processors=extra_foreign_processors + ) # Replace the formatter on existing root handlers with ProcessorFormatter. root = logging.getLogger() diff --git a/src/common/core/main.py b/src/common/core/main.py index fed21db4..dacfe786 100644 --- a/src/common/core/main.py +++ b/src/common/core/main.py @@ -8,10 +8,13 @@ from django.core.management import ( execute_from_command_line as django_execute_from_command_line, ) +from environs import Env from common.core.cli import healthcheck from common.core.logging import setup_logging +env = Env() + logger = logging.getLogger(__name__) @@ -34,17 +37,18 @@ def ensure_cli_env() -> typing.Generator[None, None, None]: ctx = contextlib.ExitStack() # Set up logging early, before Django settings are loaded. - application_loggers_env = os.environ.get("APPLICATION_LOGGERS", "") + from common.gunicorn.logging import make_gunicorn_access_processor + setup_logging( - log_level=os.environ.get("LOG_LEVEL", "INFO"), - log_format=os.environ.get("LOG_FORMAT", "generic"), - logging_configuration_file=os.environ.get("LOGGING_CONFIGURATION_FILE"), - application_loggers=[ - logger_name - for name in application_loggers_env.split(",") - if (logger_name := name.strip()) - ] - or None, + log_level=env.str("LOG_LEVEL", "INFO"), + log_format=env.str("LOG_FORMAT", "generic"), + logging_configuration_file=env.str("LOGGING_CONFIGURATION_FILE", None), + application_loggers=env.list("APPLICATION_LOGGERS", []) or None, + extra_foreign_processors=[ + make_gunicorn_access_processor( + env.list("ACCESS_LOG_EXTRA_ITEMS", []) or None, + ), + ], ) # Prometheus multiproc support diff --git a/src/common/gunicorn/logging.py b/src/common/gunicorn/logging.py index ffaba199..4efd991f 100644 --- a/src/common/gunicorn/logging.py +++ b/src/common/gunicorn/logging.py @@ -1,100 +1,87 @@ -import logging -import sys +import os from datetime import datetime, timedelta -from typing import TYPE_CHECKING, Any +from typing import Any -from django.conf import settings from gunicorn.config import Config # type: ignore[import-untyped] from gunicorn.http.message import Request # type: ignore[import-untyped] from gunicorn.http.wsgi import Response # type: ignore[import-untyped] from gunicorn.instrument.statsd import ( # type: ignore[import-untyped] Statsd as StatsdGunicornLogger, ) +from structlog.typing import EventDict, Processor, WrappedLogger -from common.core.logging import JsonFormatter, JsonRecord, build_processor_formatter from common.gunicorn import metrics from common.gunicorn.constants import ( - WSGI_EXTRA_PREFIX, WSGI_EXTRA_SUFFIX_TO_CATEGORY, wsgi_extra_key_regex, ) from common.gunicorn.utils import get_extra -class GunicornAccessLogJsonRecord(JsonRecord, extra_items=Any, total=False): # type: ignore[call-arg] # TODO https://github.com/python/mypy/issues/18176 - time: str - path: str - remote_ip: str - route: str - method: str - status: str - user_agent: str - duration_in_ms: int - response_size_in_bytes: int +def make_gunicorn_access_processor( + access_log_extra_items: list[str] | None = None, +) -> Processor: + """Create a processor that extracts structured fields from Gunicorn access logs. + Gunicorn populates ``record.args`` with a dict of request/response data + (keyed by format variables like ``h``, ``m``, ``s``, ``U``, etc.). This + processor detects those records and promotes the data into the event dict + so it flows through the normal rendering pipeline. -class GunicornAccessLogJsonFormatter(JsonFormatter): - """JSON formatter for Gunicorn access logs. - - Extends :class:`~common.core.logging.JsonFormatter` with request-specific - fields extracted from Gunicorn's ``LogRecord.args`` dict. - - This uses a stdlib ``Formatter`` rather than a structlog - ``ProcessorFormatter`` because Gunicorn populates ``record.args`` with - its own format variables — working with the ``LogRecord`` directly is - the natural abstraction for this data. + Pass the returned processor to :func:`~common.core.logging.setup_logging` + via ``extra_foreign_processors``. """ - def _get_extra(self, record_args: dict[str, Any]) -> dict[str, Any]: - ret: dict[str, dict[str, Any]] = {} - - extra_items_to_log: list[str] | None - if extra_items_to_log := getattr(settings, "ACCESS_LOG_EXTRA_ITEMS", None): - # We expect the extra items to be in the form of - # Gunicorn's access log format string for - # request headers, response headers and environ variables - # without the % prefix, e.g. "{origin}i" or "{flagsmith.environment_id}e" - # https://gunicorn.org/reference/settings/#access_log_format - for extra_key in extra_items_to_log: + def processor( + logger: WrappedLogger, + method_name: str, + event_dict: EventDict, + ) -> EventDict: + record = event_dict.get("_record") + if record is None or record.name != "gunicorn.access": + return event_dict + # ProcessorFormatter clears record.args before running + # foreign_pre_chain; the originals are stashed on the record + # by _SentryFriendlyProcessorFormatter.format(). + args = getattr(record, "_original_args", record.args) + if not isinstance(args, dict): + return event_dict + + url = args.get("U", "") + if q := args.get("q"): + url += f"?{q}" + + if t := args.get("t"): + event_dict["time"] = datetime.strptime( + t, "[%d/%b/%Y:%H:%M:%S %z]" + ).isoformat() + event_dict["path"] = url + event_dict["remote_ip"] = args.get("h", "") + event_dict["method"] = args.get("m", "") + event_dict["status"] = str(args.get("s", "")) + event_dict["user_agent"] = args.get("a", "") + event_dict["duration_in_ms"] = args.get("M", 0) + event_dict["response_size_in_bytes"] = args.get("B") or 0 + + if access_log_extra_items: + for extra_key in access_log_extra_items: extra_key_lower = extra_key.lower() if ( - (extra_value := record_args.get(extra_key_lower)) + (extra_value := args.get(extra_key_lower)) and (re_match := wsgi_extra_key_regex.match(extra_key_lower)) and ( - extra_category := WSGI_EXTRA_SUFFIX_TO_CATEGORY.get( + category := WSGI_EXTRA_SUFFIX_TO_CATEGORY.get( re_match.group("suffix") ) ) ): - ret.setdefault(extra_category, {})[re_match.group("key")] = ( + event_dict.setdefault(category, {})[re_match.group("key")] = ( extra_value ) - return ret - - def get_json_record(self, record: logging.LogRecord) -> GunicornAccessLogJsonRecord: - args = record.args + return event_dict - if TYPE_CHECKING: - assert isinstance(args, dict) - - url = args["U"] - if q := args["q"]: - url += f"?{q}" - - return { - **super().get_json_record(record), - **self._get_extra(args), # type: ignore[typeddict-item] # TODO https://github.com/python/mypy/issues/18176 - "time": datetime.strptime(args["t"], "[%d/%b/%Y:%H:%M:%S %z]").isoformat(), - "path": url, - "remote_ip": args["h"], - "route": args.get(f"{{{WSGI_EXTRA_PREFIX}route}}e") or "", - "method": args["m"], - "status": str(args["s"]), - "user_agent": args["a"], - "duration_in_ms": args["M"], - "response_size_in_bytes": args["B"] or 0, - } + return processor class PrometheusGunicornLogger(StatsdGunicornLogger): # type: ignore[misc] @@ -129,32 +116,19 @@ def access( class GunicornJsonCapableLogger(PrometheusGunicornLogger): - """Gunicorn logger that aligns formatting with the application logging setup. - - Gunicorn manages its own loggers (``gunicorn.error``, ``gunicorn.access``) - with ``propagate=False``, so they bypass the root handler configured by - :func:`~common.core.logging.setup_logging`. This class bridges that gap: - - * **Error log** — receives a ``ProcessorFormatter`` (via - :func:`~common.core.logging.build_processor_formatter`) so that - Gunicorn's operational messages share the same format and ISO timestamps - as application logs. - * **Access log (JSON mode)** — receives a - :class:`GunicornAccessLogJsonFormatter` that produces structured JSON - with request-specific fields. In generic mode the access log keeps - Gunicorn's default CLF format. - """ + """Gunicorn logger that integrates with the application logging setup.""" def setup(self, cfg: Config) -> None: super().setup(cfg) - log_format = getattr(settings, "LOG_FORMAT", "generic") - if log_format == "json": - self._set_handler( - self.access_log, - cfg.accesslog, - GunicornAccessLogJsonFormatter(), - stream=sys.stdout, - ) - formatter = build_processor_formatter(log_format) - for handler in self.error_log.handlers: - handler.setFormatter(formatter) + + # Error log always propagates to root. + for handler in self.error_log.handlers[:]: + self.error_log.removeHandler(handler) + self.error_log.propagate = True + + # Access log: JSON propagates to root for structured output; + # generic keeps Gunicorn's own CLF handler set by super().setup(). + if os.environ.get("LOG_FORMAT") == "json": + for handler in self.access_log.handlers[:]: + self.access_log.removeHandler(handler) + self.access_log.propagate = True diff --git a/tests/unit/common/core/test_main.py b/tests/unit/common/core/test_main.py index f8a40b98..c2eb9f35 100644 --- a/tests/unit/common/core/test_main.py +++ b/tests/unit/common/core/test_main.py @@ -1,5 +1,5 @@ import os -from unittest.mock import patch +from unittest.mock import ANY, patch from common.core.main import ensure_cli_env @@ -11,6 +11,7 @@ def test_ensure_cli_env__env_vars_set__calls_setup_logging_with_env_values() -> "LOG_FORMAT": "json", "LOGGING_CONFIGURATION_FILE": "/tmp/logging.json", "APPLICATION_LOGGERS": "myapp,mylib", + "ACCESS_LOG_EXTRA_ITEMS": "{flagsmith.route}e,{origin}i", } # When @@ -27,6 +28,7 @@ def test_ensure_cli_env__env_vars_set__calls_setup_logging_with_env_values() -> log_format="json", logging_configuration_file="/tmp/logging.json", application_loggers=["myapp", "mylib"], + extra_foreign_processors=ANY, ) @@ -45,4 +47,5 @@ def test_ensure_cli_env__no_env_vars__calls_setup_logging_with_defaults() -> Non log_format="generic", logging_configuration_file=None, application_loggers=None, + extra_foreign_processors=ANY, ) diff --git a/tests/unit/common/gunicorn/test_logging.py b/tests/unit/common/gunicorn/test_logging.py index e84990d4..9292edf6 100644 --- a/tests/unit/common/gunicorn/test_logging.py +++ b/tests/unit/common/gunicorn/test_logging.py @@ -1,100 +1,29 @@ +import json import logging import os -from datetime import datetime, timedelta +from datetime import timedelta import pytest -import structlog.stdlib -from gunicorn.config import AccessLogFormat, Config # type: ignore[import-untyped] -from pytest_django.fixtures import SettingsWrapper +import structlog +from gunicorn.config import Config # type: ignore[import-untyped] from pytest_mock import MockerFixture +from common.core.logging import setup_logging from common.gunicorn.logging import ( - GunicornAccessLogJsonFormatter, GunicornJsonCapableLogger, PrometheusGunicornLogger, + make_gunicorn_access_processor, ) +from common.gunicorn.utils import DEFAULT_ACCESS_LOG_FORMAT from common.test_tools import AssertMetricFixture -@pytest.mark.freeze_time("2023-12-08T06:05:47+00:00") -def test_gunicorn_access_log_json_formatter__format_log__outputs_expected( - settings: SettingsWrapper, -) -> None: - # Given - settings.ACCESS_LOG_EXTRA_ITEMS = [ - "{flagsmith.route}e", - "{X-LOG-ME-STATUS}o", - "{x-log-me}i", - ] - - gunicorn_access_log_json_formatter = GunicornAccessLogJsonFormatter() - log_record = logging.LogRecord( - name="gunicorn.access", - level=logging.INFO, - pathname="", - lineno=1, - msg=AccessLogFormat.default, - args={ - "{flagsmith.route}e": "/test/{test_id}", - "{wsgi.version}e": (1, 0), - "{x-log-me-status}o": "acked", - "{x-log-me}i": "42", - "a": "requests", - "b": "42", - "B": 42, - "D": 1000000, - "f": "-", - "h": "192.168.0.1", - "H": None, - "l": "-", - "L": "1.0", - "m": "GET", - "M": 1000, - "p": "<42>", - "q": "foo=bar", - "r": "GET", - "s": 200, - "T": 1, - "t": datetime.fromisoformat("2023-12-08T06:05:47+00:00").strftime( - "[%d/%b/%Y:%H:%M:%S %z]" - ), - "u": "-", - "U": "/test/42", - }, - exc_info=None, - ) - expected_pid = os.getpid() - - # When - json_log = gunicorn_access_log_json_formatter.get_json_record(log_record) - - # Then - assert json_log == { - "duration_in_ms": 1000, - "environ_variables": { - "flagsmith.route": "/test/{test_id}", - }, - "levelname": "INFO", - "logger_name": "gunicorn.access", - "message": '192.168.0.1 - - [08/Dec/2023:06:05:47 +0000] "GET" 200 42 "-" "requests"', - "method": "GET", - "path": "/test/42?foo=bar", - "pid": expected_pid, - "remote_ip": "192.168.0.1", - "request_headers": { - "x-log-me": "42", - }, - "response_headers": { - "x-log-me-status": "acked", - }, - "response_size_in_bytes": 42, - "route": "/test/{test_id}", - "status": "200", - "thread_name": "MainThread", - "time": "2023-12-08T06:05:47+00:00", - "timestamp": "2023-12-08 06:05:47,000", - "user_agent": "requests", - } +@pytest.fixture(autouse=True) +def _reset_logging() -> None: + root = logging.getLogger() + for handler in root.handlers[:]: + root.removeHandler(handler) + structlog.reset_defaults() def test_gunicorn_prometheus_gunicorn_logger__access_logged__expected_metrics( @@ -136,36 +65,180 @@ def test_gunicorn_prometheus_gunicorn_logger__access_logged__expected_metrics( ) -def test_gunicorn_json_capable_logger__json_log_format__sets_expected_formatters( - settings: SettingsWrapper, +def test_gunicorn_json_capable_logger__json_format__access_log_propagates( + monkeypatch: pytest.MonkeyPatch, ) -> None: # Given - settings.LOG_FORMAT = "json" + monkeypatch.setenv("LOG_FORMAT", "json") config = Config() config.set("accesslog", "-") # When logger = GunicornJsonCapableLogger(config) - # Then - assert isinstance( - logger.error_log.handlers[0].formatter, - structlog.stdlib.ProcessorFormatter, - ) - assert isinstance( - logger.access_log.handlers[0].formatter, - GunicornAccessLogJsonFormatter, + # Then — error log propagates + assert logger.error_log.handlers == [] + assert logger.error_log.propagate is True + + # And — access log propagates (structured fields via foreign_pre_chain) + assert logger.access_log.handlers == [] + assert logger.access_log.propagate is True + + +def test_gunicorn_json_capable_logger__generic_format__access_log_keeps_clf( + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Given + monkeypatch.delenv("LOG_FORMAT", raising=False) + config = Config() + config.set("accesslog", "-") + + # When + logger = GunicornJsonCapableLogger(config) + + # Then — error log propagates + assert logger.error_log.handlers == [] + assert logger.error_log.propagate is True + + # And — access log has its own CLF handler, does not propagate + assert len(logger.access_log.handlers) == 1 + assert logger.access_log.propagate is False + + +@pytest.mark.freeze_time("2023-12-08T06:05:47+00:00") +def test_gunicorn_access_processor__json_format__extracts_structured_fields( + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Given + monkeypatch.setenv("LOG_FORMAT", "json") + setup_logging( + log_level="DEBUG", + log_format="json", + application_loggers=["gunicorn"], + extra_foreign_processors=[ + make_gunicorn_access_processor( + [ + "{origin}i", + "{access-control-allow-origin}o", + "{flagsmith.route}e", + "{x-log-me-status}o", + "{x-log-me}i", + ] + ), + ], ) + config = Config() + config.set("accesslog", "-") + gunicorn_logger = GunicornJsonCapableLogger(config) + access_logger = gunicorn_logger.access_log + + record_args = { + "h": "192.168.0.1", + "l": "-", + "u": "-", + "t": "[08/Dec/2023:06:05:47 +0000]", + "r": "GET /test/42?foo=bar HTTP/1.1", + "m": "GET", + "U": "/test/42", + "q": "foo=bar", + "s": 200, + "B": 42, + "b": "42", + "f": "-", + "a": "requests", + "T": 1, + "M": 1000, + "D": 1000000, + "L": "1.0", + "p": "<42>", + "{origin}i": "https://app.flagsmith.com", + "{access-control-allow-origin}o": "https://app.flagsmith.com", + "{flagsmith.route}e": "/test/{test_id}", + "{x-log-me-status}o": "acked", + "{x-log-me}i": "42", + } + # When + access_logger.info(DEFAULT_ACCESS_LOG_FORMAT, record_args) -def test_gunicorn_json_capable_logger__non_existent_setting__not_raises( - settings: SettingsWrapper, + # Then + output = capsys.readouterr().out.strip() + parsed = json.loads(output) + assert parsed == { + "duration_in_ms": 1000, + "environ_variables": {"flagsmith.route": "/test/{test_id}"}, + "levelname": "INFO", + "logger_name": "gunicorn.access", + "message": ( + '192.168.0.1 - - [08/Dec/2023:06:05:47 +0000] "GET /test/42?foo=bar HTTP/1.1"' + ' 200 42 "-" "requests" https://app.flagsmith.com https://app.flagsmith.com' + ), + "method": "GET", + "path": "/test/42?foo=bar", + "pid": os.getpid(), + "remote_ip": "192.168.0.1", + "request_headers": { + "origin": "https://app.flagsmith.com", + "x-log-me": "42", + }, + "response_headers": { + "access-control-allow-origin": "https://app.flagsmith.com", + "x-log-me-status": "acked", + }, + "response_size_in_bytes": 42, + "status": "200", + "thread_name": "MainThread", + "time": "2023-12-08T06:05:47+00:00", + "timestamp": "2023-12-08T06:05:47Z", + "user_agent": "requests", + } + + +def test_gunicorn_json_capable_logger__generic_format__outputs_pure_clf( + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, ) -> None: # Given - del settings.LOG_FORMAT + monkeypatch.delenv("LOG_FORMAT", raising=False) + setup_logging( + log_level="DEBUG", + log_format="generic", + application_loggers=["gunicorn"], + ) config = Config() + config.set("accesslog", "-") + gunicorn_logger = GunicornJsonCapableLogger(config) + + record_args = { + "h": "192.168.0.1", + "l": "-", + "u": "-", + "t": "[08/Dec/2023:06:05:47 +0000]", + "r": "GET /api/flags HTTP/1.1", + "m": "GET", + "U": "/api/flags", + "q": "", + "s": 200, + "B": 1234, + "b": "1234", + "f": "-", + "a": "python-requests/2.31.0", + "T": 0, + "M": 42, + "D": 42000, + "L": "0.042", + "p": "<12345>", + "{origin}i": "-", + "{access-control-allow-origin}o": "-", + } - # When & Then - from common.gunicorn.logging import GunicornJsonCapableLogger + # When + gunicorn_logger.access_log.info(DEFAULT_ACCESS_LOG_FORMAT, record_args) - GunicornJsonCapableLogger(config) + # Then — pure CLF, no structlog metadata + output = capsys.readouterr().out.strip() + assert output == ( + '192.168.0.1 - - [08/Dec/2023:06:05:47 +0000] "GET /api/flags HTTP/1.1"' + ' 200 1234 "-" "python-requests/2.31.0" - -' + ) From f795bb93c2d44f0ce641dfbc2b590f2f4afb2cdf Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sat, 21 Mar 2026 13:58:47 +0000 Subject: [PATCH 08/15] respect ACCESS_LOG_LOCATION --- src/common/gunicorn/logging.py | 12 ++-- tests/unit/common/gunicorn/test_logging.py | 70 ++++++++++++++++++++-- 2 files changed, 72 insertions(+), 10 deletions(-) diff --git a/src/common/gunicorn/logging.py b/src/common/gunicorn/logging.py index 4efd991f..94f05e23 100644 --- a/src/common/gunicorn/logging.py +++ b/src/common/gunicorn/logging.py @@ -1,3 +1,4 @@ +import logging import os from datetime import datetime, timedelta from typing import Any @@ -126,9 +127,10 @@ def setup(self, cfg: Config) -> None: self.error_log.removeHandler(handler) self.error_log.propagate = True - # Access log: JSON propagates to root for structured output; - # generic keeps Gunicorn's own CLF handler set by super().setup(). + # In JSON mode, replace the access log formatter with the root's + # ProcessorFormatter. In generic mode, keep Gunicorn's CLF formatter. + # The handler itself is preserved so ACCESS_LOG_LOCATION is respected. if os.environ.get("LOG_FORMAT") == "json": - for handler in self.access_log.handlers[:]: - self.access_log.removeHandler(handler) - self.access_log.propagate = True + root_formatter = logging.getLogger().handlers[0].formatter + for handler in self.access_log.handlers: + handler.setFormatter(root_formatter) diff --git a/tests/unit/common/gunicorn/test_logging.py b/tests/unit/common/gunicorn/test_logging.py index 9292edf6..b6cfaa48 100644 --- a/tests/unit/common/gunicorn/test_logging.py +++ b/tests/unit/common/gunicorn/test_logging.py @@ -2,6 +2,7 @@ import logging import os from datetime import timedelta +from pathlib import Path import pytest import structlog @@ -65,24 +66,28 @@ def test_gunicorn_prometheus_gunicorn_logger__access_logged__expected_metrics( ) -def test_gunicorn_json_capable_logger__json_format__access_log_propagates( +def test_gunicorn_json_capable_logger__json_format__access_log_uses_processor_formatter( monkeypatch: pytest.MonkeyPatch, ) -> None: # Given monkeypatch.setenv("LOG_FORMAT", "json") + setup_logging(log_level="INFO", log_format="json") config = Config() config.set("accesslog", "-") # When logger = GunicornJsonCapableLogger(config) - # Then — error log propagates + # Then — error log propagates to root assert logger.error_log.handlers == [] assert logger.error_log.propagate is True - # And — access log propagates (structured fields via foreign_pre_chain) - assert logger.access_log.handlers == [] - assert logger.access_log.propagate is True + # And — access log keeps its handler (respecting accesslog destination) + # but uses the root's ProcessorFormatter for structured JSON output + assert len(logger.access_log.handlers) == 1 + assert logger.access_log.propagate is False + root_formatter = logging.getLogger().handlers[0].formatter + assert logger.access_log.handlers[0].formatter is root_formatter def test_gunicorn_json_capable_logger__generic_format__access_log_keeps_clf( @@ -105,6 +110,61 @@ def test_gunicorn_json_capable_logger__generic_format__access_log_keeps_clf( assert logger.access_log.propagate is False +@pytest.mark.freeze_time("2023-12-08T06:05:47+00:00") +def test_gunicorn_json_capable_logger__json_format_file__writes_to_access_log_location( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Given + monkeypatch.setenv("LOG_FORMAT", "json") + access_log_file = tmp_path / "access.log" + setup_logging( + log_level="DEBUG", + log_format="json", + application_loggers=["gunicorn"], + extra_foreign_processors=[make_gunicorn_access_processor()], + ) + config = Config() + config.set("accesslog", str(access_log_file)) + gunicorn_logger = GunicornJsonCapableLogger(config) + + # When + gunicorn_logger.access_log.info( + '%(h)s "%(r)s" %(s)s', + { + "h": "10.0.0.1", + "r": "GET /health HTTP/1.1", + "s": 200, + "m": "GET", + "U": "/health", + "q": "", + "a": "curl", + "M": 5, + "B": 2, + "t": "[08/Dec/2023:06:05:47 +0000]", + }, + ) + + # Then — output went to the file, not stdout + content = access_log_file.read_text() + assert json.loads(content) == { + "duration_in_ms": 5, + "levelname": "INFO", + "logger_name": "gunicorn.access", + "message": '10.0.0.1 "GET /health HTTP/1.1" 200', + "method": "GET", + "path": "/health", + "pid": os.getpid(), + "remote_ip": "10.0.0.1", + "response_size_in_bytes": 2, + "status": "200", + "thread_name": "MainThread", + "time": "2023-12-08T06:05:47+00:00", + "timestamp": "2023-12-08T06:05:47Z", + "user_agent": "curl", + } + + @pytest.mark.freeze_time("2023-12-08T06:05:47+00:00") def test_gunicorn_access_processor__json_format__extracts_structured_fields( capsys: pytest.CaptureFixture[str], From 0391a59ff718a38103ff99a1a2758e6035d9fc66 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sat, 21 Mar 2026 14:19:06 +0000 Subject: [PATCH 09/15] retire custom Histogram --- src/common/gunicorn/metrics.py | 7 +------ src/common/prometheus/utils.py | 5 +---- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/common/gunicorn/metrics.py b/src/common/gunicorn/metrics.py index 3a3c6ded..4b8711d8 100644 --- a/src/common/gunicorn/metrics.py +++ b/src/common/gunicorn/metrics.py @@ -1,5 +1,4 @@ import prometheus_client -from django.conf import settings from common.gunicorn.constants import HTTP_SERVER_RESPONSE_SIZE_DEFAULT_BUCKETS from common.prometheus import Histogram @@ -18,9 +17,5 @@ "flagsmith_http_server_response_size_bytes", "HTTP response size in bytes.", ["route", "method", "response_status"], - buckets=getattr( - settings, - "PROMETHEUS_HTTP_SERVER_RESPONSE_SIZE_HISTOGRAM_BUCKETS", - HTTP_SERVER_RESPONSE_SIZE_DEFAULT_BUCKETS, - ), + buckets=HTTP_SERVER_RESPONSE_SIZE_DEFAULT_BUCKETS, ) diff --git a/src/common/prometheus/utils.py b/src/common/prometheus/utils.py index c1793c54..4e45b3ff 100644 --- a/src/common/prometheus/utils.py +++ b/src/common/prometheus/utils.py @@ -1,13 +1,10 @@ import importlib import prometheus_client -from django.conf import settings from prometheus_client.metrics import MetricWrapperBase from prometheus_client.multiprocess import MultiProcessCollector - -class Histogram(prometheus_client.Histogram): - DEFAULT_BUCKETS = settings.PROMETHEUS_HISTOGRAM_BUCKETS +Histogram = prometheus_client.Histogram def get_registry() -> prometheus_client.CollectorRegistry: From b355ee17f868e983aa3c4b13812f45308d92f085 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sat, 21 Mar 2026 14:28:13 +0000 Subject: [PATCH 10/15] fix imports --- src/common/core/main.py | 3 +- src/common/gunicorn/logging.py | 73 +-------------------- src/common/gunicorn/processors.py | 74 ++++++++++++++++++++++ tests/unit/common/gunicorn/test_logging.py | 2 +- 4 files changed, 77 insertions(+), 75 deletions(-) create mode 100644 src/common/gunicorn/processors.py diff --git a/src/common/core/main.py b/src/common/core/main.py index dacfe786..964fc817 100644 --- a/src/common/core/main.py +++ b/src/common/core/main.py @@ -12,6 +12,7 @@ from common.core.cli import healthcheck from common.core.logging import setup_logging +from common.gunicorn.processors import make_gunicorn_access_processor env = Env() @@ -37,8 +38,6 @@ def ensure_cli_env() -> typing.Generator[None, None, None]: ctx = contextlib.ExitStack() # Set up logging early, before Django settings are loaded. - from common.gunicorn.logging import make_gunicorn_access_processor - setup_logging( log_level=env.str("LOG_LEVEL", "INFO"), log_format=env.str("LOG_FORMAT", "generic"), diff --git a/src/common/gunicorn/logging.py b/src/common/gunicorn/logging.py index 94f05e23..6c7096d6 100644 --- a/src/common/gunicorn/logging.py +++ b/src/common/gunicorn/logging.py @@ -1,6 +1,6 @@ import logging import os -from datetime import datetime, timedelta +from datetime import timedelta from typing import Any from gunicorn.config import Config # type: ignore[import-untyped] @@ -9,82 +9,11 @@ from gunicorn.instrument.statsd import ( # type: ignore[import-untyped] Statsd as StatsdGunicornLogger, ) -from structlog.typing import EventDict, Processor, WrappedLogger from common.gunicorn import metrics -from common.gunicorn.constants import ( - WSGI_EXTRA_SUFFIX_TO_CATEGORY, - wsgi_extra_key_regex, -) from common.gunicorn.utils import get_extra -def make_gunicorn_access_processor( - access_log_extra_items: list[str] | None = None, -) -> Processor: - """Create a processor that extracts structured fields from Gunicorn access logs. - - Gunicorn populates ``record.args`` with a dict of request/response data - (keyed by format variables like ``h``, ``m``, ``s``, ``U``, etc.). This - processor detects those records and promotes the data into the event dict - so it flows through the normal rendering pipeline. - - Pass the returned processor to :func:`~common.core.logging.setup_logging` - via ``extra_foreign_processors``. - """ - - def processor( - logger: WrappedLogger, - method_name: str, - event_dict: EventDict, - ) -> EventDict: - record = event_dict.get("_record") - if record is None or record.name != "gunicorn.access": - return event_dict - # ProcessorFormatter clears record.args before running - # foreign_pre_chain; the originals are stashed on the record - # by _SentryFriendlyProcessorFormatter.format(). - args = getattr(record, "_original_args", record.args) - if not isinstance(args, dict): - return event_dict - - url = args.get("U", "") - if q := args.get("q"): - url += f"?{q}" - - if t := args.get("t"): - event_dict["time"] = datetime.strptime( - t, "[%d/%b/%Y:%H:%M:%S %z]" - ).isoformat() - event_dict["path"] = url - event_dict["remote_ip"] = args.get("h", "") - event_dict["method"] = args.get("m", "") - event_dict["status"] = str(args.get("s", "")) - event_dict["user_agent"] = args.get("a", "") - event_dict["duration_in_ms"] = args.get("M", 0) - event_dict["response_size_in_bytes"] = args.get("B") or 0 - - if access_log_extra_items: - for extra_key in access_log_extra_items: - extra_key_lower = extra_key.lower() - if ( - (extra_value := args.get(extra_key_lower)) - and (re_match := wsgi_extra_key_regex.match(extra_key_lower)) - and ( - category := WSGI_EXTRA_SUFFIX_TO_CATEGORY.get( - re_match.group("suffix") - ) - ) - ): - event_dict.setdefault(category, {})[re_match.group("key")] = ( - extra_value - ) - - return event_dict - - return processor - - class PrometheusGunicornLogger(StatsdGunicornLogger): # type: ignore[misc] """Gunicorn logger that records Prometheus metrics on each access log entry.""" diff --git a/src/common/gunicorn/processors.py b/src/common/gunicorn/processors.py new file mode 100644 index 00000000..28029a1c --- /dev/null +++ b/src/common/gunicorn/processors.py @@ -0,0 +1,74 @@ +from datetime import datetime + +from structlog.typing import EventDict, Processor, WrappedLogger + +from common.gunicorn.constants import ( + WSGI_EXTRA_SUFFIX_TO_CATEGORY, + wsgi_extra_key_regex, +) + + +def make_gunicorn_access_processor( + access_log_extra_items: list[str] | None = None, +) -> Processor: + """Create a processor that extracts structured fields from Gunicorn access logs. + + Gunicorn populates ``record.args`` with a dict of request/response data + (keyed by format variables like ``h``, ``m``, ``s``, ``U``, etc.). This + processor detects those records and promotes the data into the event dict + so it flows through the normal rendering pipeline. + + Pass the returned processor to :func:`~common.core.logging.setup_logging` + via ``extra_foreign_processors``. + """ + + def processor( + logger: WrappedLogger, + method_name: str, + event_dict: EventDict, + ) -> EventDict: + record = event_dict.get("_record") + if record is None or record.name != "gunicorn.access": + return event_dict + # ProcessorFormatter clears record.args before running + # foreign_pre_chain; the originals are stashed on the record + # by _SentryFriendlyProcessorFormatter.format(). + args = getattr(record, "_original_args", record.args) + if not isinstance(args, dict): + return event_dict + + url = args.get("U", "") + if q := args.get("q"): + url += f"?{q}" + + if t := args.get("t"): + event_dict["time"] = datetime.strptime( + t, "[%d/%b/%Y:%H:%M:%S %z]" + ).isoformat() + event_dict["path"] = url + event_dict["remote_ip"] = args.get("h", "") + event_dict["method"] = args.get("m", "") + event_dict["status"] = str(args.get("s", "")) + event_dict["user_agent"] = args.get("a", "") + event_dict["duration_in_ms"] = args.get("M", 0) + event_dict["response_size_in_bytes"] = args.get("B") or 0 + + if access_log_extra_items: + for extra_key in access_log_extra_items: + extra_key_lower = extra_key.lower() + if ( + (extra_value := args.get(extra_key_lower)) + and (re_match := wsgi_extra_key_regex.match(extra_key_lower)) + and ( + category := WSGI_EXTRA_SUFFIX_TO_CATEGORY.get( + re_match.group("suffix") + ) + ) + ): + event_dict.setdefault(category, {})[re_match.group("key")] = ( + extra_value + ) + + return event_dict + + return processor diff --git a/tests/unit/common/gunicorn/test_logging.py b/tests/unit/common/gunicorn/test_logging.py index b6cfaa48..49151e67 100644 --- a/tests/unit/common/gunicorn/test_logging.py +++ b/tests/unit/common/gunicorn/test_logging.py @@ -13,8 +13,8 @@ from common.gunicorn.logging import ( GunicornJsonCapableLogger, PrometheusGunicornLogger, - make_gunicorn_access_processor, ) +from common.gunicorn.processors import make_gunicorn_access_processor from common.gunicorn.utils import DEFAULT_ACCESS_LOG_FORMAT from common.test_tools import AssertMetricFixture From 852780f06c77101d841ca3a13e822c096f827cc4 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sun, 22 Mar 2026 13:04:11 +0000 Subject: [PATCH 11/15] fix coverage --- tests/unit/common/gunicorn/test_logging.py | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/unit/common/gunicorn/test_logging.py b/tests/unit/common/gunicorn/test_logging.py index 49151e67..317c6458 100644 --- a/tests/unit/common/gunicorn/test_logging.py +++ b/tests/unit/common/gunicorn/test_logging.py @@ -255,6 +255,31 @@ def test_gunicorn_access_processor__json_format__extracts_structured_fields( } +@pytest.mark.freeze_time("2023-12-08T06:05:47+00:00") +def test_gunicorn_access_processor__non_dict_args__passes_through( + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Given + monkeypatch.setenv("LOG_FORMAT", "json") + setup_logging( + log_level="DEBUG", + log_format="json", + application_loggers=["gunicorn"], + extra_foreign_processors=[make_gunicorn_access_processor()], + ) + access_logger = logging.getLogger("gunicorn.access") + + # When + access_logger.info("Worker %s booting on port %d", "web", 8000) + + # Then + output = capsys.readouterr().out.strip() + parsed = json.loads(output) + assert parsed["message"] == "Worker web booting on port 8000" + assert "method" not in parsed + + def test_gunicorn_json_capable_logger__generic_format__outputs_pure_clf( capsys: pytest.CaptureFixture[str], monkeypatch: pytest.MonkeyPatch, From 297026f173ed7fa3b348cb30ddc91aeaf683890d Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Thu, 26 Mar 2026 18:04:13 +0000 Subject: [PATCH 12/15] refactor: use del instead of __dict__.pop for _original_args cleanup --- src/common/core/logging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/core/logging.py b/src/common/core/logging.py index e025940b..68e602de 100644 --- a/src/common/core/logging.py +++ b/src/common/core/logging.py @@ -155,7 +155,7 @@ def format(self, record: logging.LogRecord) -> str: # processors (e.g. the Gunicorn access log extractor) can # access them — ProcessorFormatter clears record.args to () # before running the chain. - record._original_args = original_args # noqa: SLF001 + record._original_args = original_args formatted = super().format(record) @@ -163,7 +163,7 @@ def format(self, record: logging.LogRecord) -> str: # the original message template and substitution args. record.msg = original_msg record.args = original_args - record.__dict__.pop("_original_args", None) + del record._original_args # type: ignore[attr-defined] return formatted From 1d30cfb940f54222f04b2a48dc4c91b965a11d5f Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Thu, 26 Mar 2026 18:06:04 +0000 Subject: [PATCH 13/15] refactor: move drop_internal_keys to _SentryFriendlyProcessorFormatter staticmethod --- src/common/core/logging.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/common/core/logging.py b/src/common/core/logging.py index 68e602de..ecb04e1b 100644 --- a/src/common/core/logging.py +++ b/src/common/core/logging.py @@ -167,13 +167,13 @@ def format(self, record: logging.LogRecord) -> str: return formatted - -def _drop_internal_keys( - logger: WrappedLogger, method_name: str, event_dict: EventDict -) -> EventDict: - """Remove internal attributes that leak via ``ExtraAdder``.""" - event_dict.pop("_original_args", None) - return event_dict + @staticmethod + def drop_internal_keys( + _: WrappedLogger, __: str, event_dict: EventDict + ) -> EventDict: + """Remove internal attributes that leak via ``ExtraAdder``.""" + event_dict.pop("_original_args", None) + return event_dict def build_processor_formatter( @@ -201,7 +201,7 @@ def build_processor_formatter( structlog.stdlib.add_log_level, structlog.processors.TimeStamper(fmt="iso"), structlog.stdlib.ExtraAdder(), - _drop_internal_keys, + _SentryFriendlyProcessorFormatter.drop_internal_keys, *(extra_foreign_processors or []), ] From fb584456eefe964539988ab0b22422b97c17d6cf Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Thu, 26 Mar 2026 18:08:00 +0000 Subject: [PATCH 14/15] refactor: move build_processor_formatter logic to __init__ --- src/common/core/logging.py | 78 +++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/src/common/core/logging.py b/src/common/core/logging.py index ecb04e1b..121dede9 100644 --- a/src/common/core/logging.py +++ b/src/common/core/logging.py @@ -145,6 +145,42 @@ class _SentryFriendlyProcessorFormatter(structlog.stdlib.ProcessorFormatter): and avoids failed event deduplication. """ + def __init__( + self, + log_format: str = "generic", + extra_foreign_processors: list[Processor] | None = None, + **kwargs: Any, + ) -> None: + if log_format == "json": + renderer_processors: list[Processor] = [ + map_event_to_json_record, + structlog.processors.JSONRenderer(), + ] + else: + colors = sys.stdout.isatty() and structlog.dev._has_colors + renderer_processors = [ + structlog.dev.ConsoleRenderer(colors=colors), + ] + + foreign_pre_chain: list[Processor] = [ + structlog.contextvars.merge_contextvars, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.processors.TimeStamper(fmt="iso"), + structlog.stdlib.ExtraAdder(), + self.drop_internal_keys, + *(extra_foreign_processors or []), + ] + + super().__init__( + processors=[ + structlog.stdlib.ProcessorFormatter.remove_processors_meta, + *renderer_processors, + ], + foreign_pre_chain=foreign_pre_chain, + **kwargs, + ) + def format(self, record: logging.LogRecord) -> str: # Snapshot the original fields before ProcessorFormatter # replaces them with rendered output. @@ -176,44 +212,6 @@ def drop_internal_keys( return event_dict -def build_processor_formatter( - log_format: str, - extra_foreign_processors: list[Processor] | None = None, -) -> _SentryFriendlyProcessorFormatter: - """Build a ``_SentryFriendlyProcessorFormatter`` for the given log format. - - This is used by :func:`setup_structlog` for the root logger. - """ - if log_format == "json": - renderer_processors: list[Processor] = [ - map_event_to_json_record, - structlog.processors.JSONRenderer(), - ] - else: - colors = sys.stdout.isatty() and structlog.dev._has_colors - renderer_processors = [ - structlog.dev.ConsoleRenderer(colors=colors), - ] - - foreign_pre_chain: list[Processor] = [ - structlog.contextvars.merge_contextvars, - structlog.stdlib.add_logger_name, - structlog.stdlib.add_log_level, - structlog.processors.TimeStamper(fmt="iso"), - structlog.stdlib.ExtraAdder(), - _SentryFriendlyProcessorFormatter.drop_internal_keys, - *(extra_foreign_processors or []), - ] - - return _SentryFriendlyProcessorFormatter( - processors=[ - structlog.stdlib.ProcessorFormatter.remove_processors_meta, - *renderer_processors, - ], - foreign_pre_chain=foreign_pre_chain, - ) - - def setup_structlog( log_format: str, extra_foreign_processors: list[Processor] | None = None, @@ -221,8 +219,8 @@ def setup_structlog( """Configure structlog to route through stdlib logging.""" from common.core.sentry import sentry_processor - formatter = build_processor_formatter( - log_format, extra_foreign_processors=extra_foreign_processors + formatter = _SentryFriendlyProcessorFormatter( + log_format=log_format, extra_foreign_processors=extra_foreign_processors ) # Replace the formatter on existing root handlers with ProcessorFormatter. From 394df5904cfe2e83024f01575fd761d7440c3aea Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Thu, 26 Mar 2026 18:08:33 +0000 Subject: [PATCH 15/15] refactor: guard against empty root handlers in GunicornJsonCapableLogger --- src/common/gunicorn/logging.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/gunicorn/logging.py b/src/common/gunicorn/logging.py index 6c7096d6..00bc1792 100644 --- a/src/common/gunicorn/logging.py +++ b/src/common/gunicorn/logging.py @@ -59,7 +59,8 @@ def setup(self, cfg: Config) -> None: # In JSON mode, replace the access log formatter with the root's # ProcessorFormatter. In generic mode, keep Gunicorn's CLF formatter. # The handler itself is preserved so ACCESS_LOG_LOCATION is respected. - if os.environ.get("LOG_FORMAT") == "json": - root_formatter = logging.getLogger().handlers[0].formatter + root_handlers = logging.getLogger().handlers + if os.environ.get("LOG_FORMAT") == "json" and root_handlers: + root_formatter = root_handlers[0].formatter for handler in self.access_log.handlers: handler.setFormatter(root_formatter)