Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/run-end-to-end.yml
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ jobs:
- name: Run TRACE_STATS_COMPUTATION scenario
if: always() && steps.build.outcome == 'success' && contains(inputs.scenarios, '"TRACE_STATS_COMPUTATION"')
run: ./run.sh TRACE_STATS_COMPUTATION
- name: Run TRACE_STATS_COMPUTATION_OBFUSCATION_DISABLED scenario
if: always() && steps.build.outcome == 'success' && contains(inputs.scenarios, '"TRACE_STATS_COMPUTATION_OBFUSCATION_DISABLED"')
run: ./run.sh TRACE_STATS_COMPUTATION_OBFUSCATION_DISABLED
- name: Run IAST_STANDALONE scenario
if: always() && steps.build.outcome == 'success' && contains(inputs.scenarios, '"IAST_STANDALONE"')
run: ./run.sh IAST_STANDALONE
Expand Down
143 changes: 143 additions & 0 deletions tests/stats/test_stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest

from utils import features, interfaces, logger, scenarios, weblog

"""
Expand Down Expand Up @@ -133,6 +134,148 @@ def test_grpc_status_code(self):
)


@features.client_side_stats_supported
@scenarios.trace_stats_computation
class Test_Client_Stats_With_Client_Obfuscation:
"""Test client-side stats do the obfuscation before-hand when available"""

def setup_obfuscation(self):
"""Setup for obfuscation test - generates SQL spans for obfuscation testing"""
test_user_ids = ["1", "2", "admin", "test"]
for user_id in test_user_ids:
weblog.get(f"/rasp/sqli?user_id={user_id}")

def test_obfuscation(self):
"""Test that SQL resources are obfuscated before stats aggregation.

Validates:
- Datadog-Obfuscation-Version header is present on stats payloads
- SQL resource names are obfuscated (literals replaced with ?)
- All 4 distinct queries are aggregated into a single obfuscated resource bucket
"""
want = "SELECT * FROM users WHERE id = ?"
sql_stats = []
obfuscation_header_found = False

for data in interfaces.library.get_data("/v0.6/stats"):
headers = {h[0].lower(): h[1] for h in data["request"]["headers"]}
if "datadog-obfuscation-version" in headers:
obfuscation_header_found = True
assert int(headers["datadog-obfuscation-version"]) >= 1, (
f"Expected obfuscation version to be >= 1, got '{headers['datadog-obfuscation-version']}'"
)

payload = data["request"]["content"]
for bucket in payload.get("Stats", []):
for stat in bucket.get("Stats", []):
if stat.get("Type") == "sql" and stat["Resource"].startswith("SELECT"):
sql_stats.append(stat)

assert obfuscation_header_found, "Datadog-Obfuscation-Version header not found on any stats payload"

assert len(sql_stats) == 4, "Expected at least one SQL stats entry"
for stat in sql_stats:
assert stat["Resource"] == want, f"Expected obfuscated resource '{want}', got '{stat['Resource']}'"


@features.client_side_stats_supported
@scenarios.trace_stats_computation_obfuscation_disabled
class Test_Client_Stats_With_Client_Obfuscation_Disabled:
"""Test that libraries read the agent /info to respect the obfuscation config"""

TEST_USER_IDS = ["1", "2", "admin", "test"]

def setup_obfuscation(self):
"""Setup for obfuscation test - generates SQL spans for obfuscation testing"""
for user_id in self.TEST_USER_IDS:
weblog.get(f"/rasp/sqli?user_id={user_id}")

def test_obfuscation(self):
"""Test that SQL resources are obfuscated before stats aggregation.

Validates:
- Datadog-Obfuscation-Version header is present on stats payloads
- SQL resource names are not obfuscated, only normalized
"""
want_prefix = "SELECT * FROM users WHERE id = "
sql_stats = []
obfuscation_header_found = False

for data in interfaces.library.get_data("/v0.6/stats"):
headers = {h[0].lower(): h[1] for h in data["request"]["headers"]}
if "datadog-obfuscation-version" in headers:
obfuscation_header_found = True
assert int(headers["datadog-obfuscation-version"]) >= 1, (
f"Expected obfuscation version to be >= 1, got '{headers['datadog-obfuscation-version']}'"
)

payload = data["request"]["content"]
for bucket in payload.get("Stats", []):
for stat in bucket.get("Stats", []):
if stat.get("Type") == "sql" and stat["Resource"].startswith("SELECT"):
sql_stats.append(stat)

assert obfuscation_header_found, "Datadog-Obfuscation-Version header not found on any stats payload"

assert len(sql_stats) == 4, "Expected at least one SQL stats entry"
# NormalizeOnly mode preserves string literals including surrounding single quotes.
# The SQL uses string-quoted IDs (e.g. WHERE id='1'), so after normalization the
# suffix appears as e.g. "'1'" (with quotes). Accept both quoted and unquoted forms
# to be compatible with tracers that may strip the quotes.
quoted_user_ids = {f"'{uid}'" for uid in self.TEST_USER_IDS}
accepted_suffixes = set(self.TEST_USER_IDS) | quoted_user_ids
for stat in sql_stats:
query = stat["Resource"]
# assert that query is in the form SELECT * FROM users WHERE id = [one of the user ids]
assert query.startswith(want_prefix)
assert query.removeprefix(want_prefix) in accepted_suffixes


@features.client_side_stats_supported
@scenarios.trace_stats_computation_future_obfuscation_version
class Test_Client_Stats_Future_Obfuscation_Version:
"""Test that the SDK skips client-side obfuscation when the agent advertises a future/unknown obfuscation version"""

def setup_no_obfuscation(self):
"""Setup for future obfuscation version test - generates SQL spans"""
test_user_ids = ["1", "2", "admin", "test"]
for user_id in test_user_ids:
weblog.get(f"/rasp/sqli?user_id={user_id}")

def test_no_obfuscation(self):
"""Test that the SDK does not obfuscate stats and does not send the obfuscation header
when the agent reports an obfuscation_version higher than what the SDK supports (99).

Validates:
- Datadog-Obfuscation-Version header is NOT present on any stats payload
- SQL resource names are NOT obfuscated (raw literals still present)
"""
sql_stats = []
obfuscation_header_found = False

for data in interfaces.library.get_data("/v0.6/stats"):
headers = {h[0].lower(): h[1] for h in data["request"]["headers"]}
if "datadog-obfuscation-version" in headers:
obfuscation_header_found = True

payload = data["request"]["content"]
for bucket in payload.get("Stats", []):
for stat in bucket.get("Stats", []):
if stat.get("Type") == "sql" and stat["Resource"].startswith("SELECT"):
sql_stats.append(stat)

assert not obfuscation_header_found, (
"Datadog-Obfuscation-Version header should NOT be present when agent reports a future obfuscation version"
)

assert len(sql_stats) == 4, "Expected at least one SQL stats entry"
for stat in sql_stats:
assert "?" not in stat["Resource"], (
f"SQL resource should NOT be obfuscated when agent reports a future obfuscation version, "
f"but got: '{stat['Resource']}'"
)


@features.service_override_source
@scenarios.trace_stats_computation
class Test_Stats_Service_Source:
Expand Down
37 changes: 37 additions & 0 deletions utils/_context/_scenarios/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,43 @@ class _Scenarios:
scenario_groups=[scenario_groups.appsec],
)

trace_stats_computation_future_obfuscation_version = EndToEndScenario(
name="TRACE_STATS_COMPUTATION_FUTURE_OBFUSCATION_VERSION",
# Same as trace_stats_computation but with the agent advertising an obfuscation_version
# higher than what any current SDK supports (99), to test that the SDK correctly falls
# back to no client-side obfuscation when it encounters an unknown/future version.
weblog_env={
"DD_TRACE_STATS_COMPUTATION_ENABLED": "true", # default env var for CSS
"DD_TRACE_COMPUTE_STATS": "true",
"DD_TRACE_FEATURES": "discovery",
"DD_TRACE_TRACER_METRICS_ENABLED": "true", # java
},
obfuscation_version=99,
doc=(
"End to end testing with DD_TRACE_COMPUTE_STATS=1 and agent reporting obfuscation_version: 99. "
"Tests that tracers correctly skip client-side obfuscation and omit the Datadog-Obfuscation-Version "
"header when the agent advertises an obfuscation version higher than what the SDK supports."
),
scenario_groups=[scenario_groups.appsec],
)

trace_stats_computation_obfuscation_disabled = EndToEndScenario(
name="TRACE_STATS_COMPUTATION_OBFUSCATION_DISABLED",
# Same as trace_stats_computation but with the agent being configured with obfuscation disabled, to test that
# the SDK correctly reads the obfuscation config from agent's /info and respects it.
weblog_env={
"DD_TRACE_STATS_COMPUTATION_ENABLED": "true", # default env var for CSS
"DD_TRACE_COMPUTE_STATS": "true",
"DD_TRACE_FEATURES": "discovery",
"DD_TRACE_TRACER_METRICS_ENABLED": "true", # java
},
agent_env={
"DD_APM_SQL_OBFUSCATION_MODE": "normalize_only",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see anything about disabling the obfuscation in the spec is it done by setting the obfuscation version to 0 (if not maybe we need a scenario for the missing/version 0 case)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not a setting from the CSS spec but to avoid breaking changes in the obfuscation behavior when switching from agent obfuscation to client obfuscation I decided that following as much of the config as possible would be preferable.

https://github.com/DataDog/datadog-agent/blob/main/pkg/config/config_template.yaml#L1633

},
doc=("End to end testing with DD_TRACE_COMPUTE_STATS=1 and obfuscation disabled."),
scenario_groups=[scenario_groups.appsec],
)

sampling = EndToEndScenario(
"SAMPLING",
tracer_sampling_rate=0.5,
Expand Down
5 changes: 5 additions & 0 deletions utils/_context/_scenarios/endtoend.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(
meta_structs_disabled: bool = False,
span_events: bool = True,
client_drop_p0s: bool | None = None,
obfuscation_version: int | None = None,
extra_containers: tuple[type[TestedContainer], ...] = (),
) -> None:
super().__init__(name, doc=doc, github_workflow=github_workflow, scenario_groups=scenario_groups)
Expand All @@ -57,6 +58,7 @@ def __init__(
self.meta_structs_disabled = False
self.span_events = span_events
self.client_drop_p0s = client_drop_p0s
self.obfuscation_version = obfuscation_version

if not self.use_proxy and self.rc_api_enabled:
raise ValueError("rc_api_enabled requires use_proxy")
Expand All @@ -74,6 +76,7 @@ def __init__(
meta_structs_disabled=meta_structs_disabled,
span_events=span_events,
client_drop_p0s=client_drop_p0s,
obfuscation_version=obfuscation_version,
enable_ipv6=enable_ipv6,
mocked_backend=mocked_backend,
)
Expand Down Expand Up @@ -201,6 +204,7 @@ def __init__(
meta_structs_disabled: bool = False,
span_events: bool = True,
client_drop_p0s: bool | None = None,
obfuscation_version: int | None = None,
runtime_metrics_enabled: bool = False,
backend_interface_timeout: int = 0,
include_buddies: bool = False,
Expand All @@ -225,6 +229,7 @@ def __init__(
meta_structs_disabled=meta_structs_disabled,
span_events=span_events,
client_drop_p0s=client_drop_p0s,
obfuscation_version=obfuscation_version,
)

self._use_proxy_for_agent = use_proxy_for_agent
Expand Down
5 changes: 5 additions & 0 deletions utils/_context/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
MockedBackendResponse,
SetSpanEventFlags,
SetClientDropP0s,
SetObfuscationVersion,
AddRemoteConfigEndpoint,
StaticJsonMockedTracerResponse,
)
Expand Down Expand Up @@ -589,6 +590,7 @@ def __init__(
meta_structs_disabled: bool,
span_events: bool,
client_drop_p0s: bool | None = None,
obfuscation_version: int | None = None,
enable_ipv6: bool,
mocked_backend: bool = True,
) -> None:
Expand Down Expand Up @@ -635,6 +637,9 @@ def __init__(
if client_drop_p0s is not None:
self.internal_mocked_tracer_responses.append(SetClientDropP0s(client_drop_p0s=client_drop_p0s))

if obfuscation_version is not None:
self.internal_mocked_tracer_responses.append(SetObfuscationVersion(obfuscation_version=obfuscation_version))

if rc_api_enabled:
# add the remote config endpoint on available agent endpoints
self.internal_mocked_tracer_responses.append(AddRemoteConfigEndpoint())
Expand Down
25 changes: 25 additions & 0 deletions utils/proxy/mocked_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,31 @@ def to_json(self) -> dict:
}


class SetObfuscationVersion(_InternalMockedTracerResponse):
"""Override the obfuscation_version field in the agent's /info response.

This controls which obfuscation version the agent advertises. When set to a version
higher than what the SDK supports, the SDK should skip client-side obfuscation and
omit the Datadog-Obfuscation-Version header from stats payloads.
"""

def __init__(self, *, obfuscation_version: int):
super().__init__(path="/info")
self.obfuscation_version = obfuscation_version

def execute(self, flow: HTTPFlow) -> None:
if flow.response.status_code == HTTPStatus.OK:
c = json.loads(flow.response.content)
c["obfuscation_version"] = self.obfuscation_version
flow.response.content = json.dumps(c).encode()

def to_json(self) -> dict:
return {
"type": self.__class__.__name__,
"obfuscation_version": self.obfuscation_version,
}


class MockedBackendResponse(MockedResponse):
"""Base class for mocking responses from backend to agent.

Expand Down
Loading