Skip to content

Commit 1b586e4

Browse files
author
Andrzej Pijanowski
committed
fix: improve queryables handling and CQL2 filter property normalization
Combines the following fixes: - Normalize property names by stripping 'properties.' prefix in CQL2 filter - Enhance field mapping to handle 'properties.' prefix in Elasticsearch queries - Fix nested property traversal and improve error messages - Implement exclusion of fields from queryables based on environment variable - Enhance exclusion logic for queryables to support top-level fields and properties prefix
1 parent 405a4df commit 1b586e4

File tree

7 files changed

+610
-38
lines changed

7 files changed

+610
-38
lines changed

stac_fastapi/core/stac_fastapi/core/queryables.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ def __init__(self, database_logic: Any):
2525
self._lock = asyncio.Lock()
2626
self.validation_enabled: bool = False
2727
self.cache_ttl: int = 1800 # How often to refresh cache (in seconds)
28-
self.excluded_queryables: Set[str] = set()
2928
self.reload_settings()
3029

3130
def reload_settings(self):
@@ -35,17 +34,6 @@ def reload_settings(self):
3534
)
3635
self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "1800"))
3736

38-
excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "")
39-
self.excluded_queryables = set()
40-
if excluded:
41-
for field in excluded.split(","):
42-
field = field.strip()
43-
if field:
44-
# Remove 'properties.' prefix if present
45-
if field.startswith("properties."):
46-
field = field[11:]
47-
self.excluded_queryables.add(field)
48-
4937
async def _update_cache(self):
5038
"""Update the cache with the latest queryables from the database."""
5139
if not self.validation_enabled:
@@ -58,9 +46,6 @@ async def _update_cache(self):
5846
queryables_mapping = await self._db_logic.get_queryables_mapping()
5947
all_queryables_set = set(queryables_mapping.keys())
6048

61-
if self.excluded_queryables:
62-
all_queryables_set = all_queryables_set - self.excluded_queryables
63-
6449
self._all_queryables = all_queryables_set
6550

6651
self._cache = {"*": list(all_queryables_set)}
@@ -93,18 +78,28 @@ async def validate(self, fields: Set[str]) -> None:
9378
if invalid_fields:
9479
raise HTTPException(
9580
status_code=400,
96-
detail=f"Invalid query fields: {', '.join(invalid_fields)}.",
81+
detail=f"Invalid query fields: {', '.join(sorted(invalid_fields))}. "
82+
"These fields are not defined in the collection's queryables. "
83+
"Use the /queryables endpoint to see available fields.",
9784
)
9885

9986

10087
def get_properties_from_cql2_filter(cql2_filter: Dict[str, Any]) -> Set[str]:
101-
"""Recursively extract property names from a CQL2 filter."""
88+
"""Recursively extract property names from a CQL2 filter.
89+
90+
Property names are normalized by stripping the 'properties.' prefix
91+
if present, to match queryables stored without the prefix.
92+
"""
10293
props: Set[str] = set()
10394
if "op" in cql2_filter and "args" in cql2_filter:
10495
for arg in cql2_filter["args"]:
10596
if isinstance(arg, dict):
10697
if "op" in arg:
10798
props.update(get_properties_from_cql2_filter(arg))
10899
elif "property" in arg:
109-
props.add(arg["property"])
100+
prop_name = arg["property"]
101+
# Strip 'properties.' prefix if present
102+
if prop_name.startswith("properties."):
103+
prop_name = prop_name[11:]
104+
props.add(prop_name)
110105
return props

stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py

Lines changed: 83 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,62 @@
33
This module provides functions for working with Elasticsearch/OpenSearch mappings.
44
"""
55

6-
from typing import Any, Dict
6+
import os
7+
from collections import deque
8+
from typing import Any, Dict, Set
9+
10+
11+
def _get_excluded_from_queryables() -> Set[str]:
12+
"""Get fields to exclude from queryables endpoint and filtering.
13+
14+
Reads from EXCLUDED_FROM_QUERYABLES environment variable.
15+
Supports comma-separated list of field names.
16+
17+
For each exclusion pattern, both the original and the version with/without
18+
'properties.' prefix are included. This ensures fields are excluded regardless
19+
of whether they appear at the top level or under 'properties' in the mapping.
20+
21+
Example:
22+
EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,storage:schemes"
23+
24+
This will exclude:
25+
- properties.auth:schemes (and children like properties.auth:schemes.s3.type)
26+
- auth:schemes (and children like auth:schemes.s3.type)
27+
- storage:schemes (and children)
28+
- properties.storage:schemes (and children)
29+
30+
Returns:
31+
Set[str]: Set of field names to exclude from queryables
32+
"""
33+
excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "")
34+
if not excluded:
35+
return set()
36+
37+
result = set()
38+
for field in excluded.split(","):
39+
field = field.strip()
40+
if not field:
41+
continue
42+
43+
result.add(field)
44+
45+
if field.startswith("properties."):
46+
result.add(field.removeprefix("properties."))
47+
else:
48+
result.add(f"properties.{field}")
49+
50+
return result
751

852

953
async def get_queryables_mapping_shared(
10-
mappings: Dict[str, Dict[str, Any]], collection_id: str = "*"
54+
mappings: Dict[str, Dict[str, Any]],
55+
collection_id: str = "*",
1156
) -> Dict[str, str]:
1257
"""Retrieve mapping of Queryables for search.
1358
59+
Fields listed in the EXCLUDED_FROM_QUERYABLES environment variable will be
60+
excluded from the result, along with their children.
61+
1462
Args:
1563
mappings (Dict[str, Dict[str, Any]]): The mapping information returned from
1664
Elasticsearch/OpenSearch client's indices.get_mapping() method.
@@ -20,19 +68,44 @@ async def get_queryables_mapping_shared(
2068
2169
Returns:
2270
Dict[str, str]: A dictionary containing the Queryables mappings, where keys are
23-
field names and values are the corresponding paths in the Elasticsearch/OpenSearch
24-
document structure.
71+
field names (with 'properties.' prefix removed) and values are the
72+
corresponding paths in the Elasticsearch/OpenSearch document structure.
2573
"""
2674
queryables_mapping = {}
75+
excluded = _get_excluded_from_queryables()
76+
77+
def is_excluded(path: str) -> bool:
78+
"""Check if the path starts with any excluded prefix."""
79+
return any(
80+
path == prefix or path.startswith(prefix + ".") for prefix in excluded
81+
)
2782

2883
for mapping in mappings.values():
29-
fields = mapping["mappings"].get("properties", {})
30-
properties = fields.pop("properties", {}).get("properties", {}).keys()
84+
mapping_properties = mapping["mappings"].get("properties", {})
85+
86+
stack: deque[tuple[str, Dict[str, Any]]] = deque(mapping_properties.items())
87+
88+
while stack:
89+
field_fqn, field_def = stack.popleft()
90+
91+
nested_properties = field_def.get("properties")
92+
if nested_properties:
93+
stack.extend(
94+
(f"{field_fqn}.{k}", v)
95+
for k, v in nested_properties.items()
96+
if v.get("enabled", True) and not is_excluded(f"{field_fqn}.{k}")
97+
)
98+
99+
field_type = field_def.get("type")
100+
if (
101+
not field_type
102+
or not field_def.get("enabled", True)
103+
or is_excluded(field_fqn)
104+
):
105+
continue
31106

32-
for field_key in fields:
33-
queryables_mapping[field_key] = field_key
107+
field_name = field_fqn.removeprefix("properties.")
34108

35-
for property_key in properties:
36-
queryables_mapping[property_key] = f"properties.{property_key}"
109+
queryables_mapping[field_name] = field_fqn
37110

38111
return queryables_mapping

stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,54 @@ def _get_excluded_from_queryables() -> set[str]:
2626
Reads from EXCLUDED_FROM_QUERYABLES environment variable.
2727
Supports comma-separated list of field names.
2828
29+
For each exclusion pattern, both the original and the version with/without
30+
'properties.' prefix are included. This ensures fields are excluded regardless
31+
of whether they appear at the top level or under 'properties' in the mapping.
32+
2933
Example:
30-
EXCLUDED_FROM_QUERYABLES="auth:schemes,storage:schemes"
34+
EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,storage:schemes"
3135
3236
Returns:
3337
Set[str]: Set of field names to exclude from queryables
3438
"""
3539
excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "")
3640
if not excluded:
3741
return set()
38-
return {field.strip() for field in excluded.split(",") if field.strip()}
42+
43+
result = set()
44+
for field in excluded.split(","):
45+
field = field.strip()
46+
if not field:
47+
continue
48+
49+
result.add(field)
50+
51+
if field.startswith("properties."):
52+
result.add(field.removeprefix("properties."))
53+
else:
54+
result.add(f"properties.{field}")
55+
56+
return result
57+
58+
@staticmethod
59+
def _is_excluded(field_fqn: str, excluded: set[str]) -> bool:
60+
"""Check if a field should be excluded based on prefix matching.
61+
62+
A field is excluded if:
63+
- It exactly matches an exclusion pattern
64+
- It starts with an exclusion pattern followed by a dot (nested child)
65+
66+
Args:
67+
field_fqn: Fully qualified field name (e.g., "properties.auth:schemes.s3.type")
68+
excluded: Set of exclusion patterns
69+
70+
Returns:
71+
True if field should be excluded, False otherwise
72+
"""
73+
for prefix in excluded:
74+
if field_fqn == prefix or field_fqn.startswith(prefix + "."):
75+
return True
76+
return False
3977

4078
async def get_queryables(
4179
self,
@@ -92,23 +130,20 @@ async def get_queryables(
92130
while stack:
93131
field_fqn, field_def = stack.popleft()
94132

95-
# Iterate over nested fields
133+
if self._is_excluded(field_fqn, excluded_fields):
134+
continue
135+
96136
field_properties = field_def.get("properties")
97137
if field_properties:
98138
stack.extend(
99139
(f"{field_fqn}.{k}", v)
100140
for k, v in field_properties.items()
101141
if v.get("enabled", True)
102-
and f"{field_fqn}.{k}" not in excluded_fields
103142
)
104143

105144
# Skip non-indexed or disabled fields
106145
field_type = field_def.get("type")
107-
if (
108-
not field_type
109-
or not field_def.get("enabled", True)
110-
or field_fqn in excluded_fields
111-
):
146+
if not field_type or not field_def.get("enabled", True):
112147
continue
113148

114149
# Fields in Item Properties should be exposed with their un-prefixed names,

stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,20 @@ def to_es_field(queryables_mapping: Dict[str, Any], field: str) -> str:
2222
Returns:
2323
str: The mapped field name suitable for Elasticsearch queries.
2424
"""
25-
return queryables_mapping.get(field, field)
25+
# First, try to find the field as-is in the mapping
26+
if field in queryables_mapping:
27+
return queryables_mapping[field]
28+
29+
# If field has 'properties.' prefix, try without it
30+
# This handles cases where users specify 'properties.eo:cloud_cover'
31+
# but queryables_mapping uses 'eo:cloud_cover' as the key
32+
if field.startswith("properties."):
33+
normalized_field = field[11:] # len("properties.") == 11
34+
if normalized_field in queryables_mapping:
35+
return queryables_mapping[normalized_field]
36+
37+
# If not found, return the original field
38+
return field
2639

2740

2841
def to_es(queryables_mapping: Dict[str, Any], query: Dict[str, Any]) -> Dict[str, Any]:

stac_fastapi/tests/api/test_api_query_validation.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ def enable_validation():
3030
client = app_config["client"]
3131
with mock.patch.dict(os.environ, {"VALIDATE_QUERYABLES": "true"}):
3232
client.queryables_cache.reload_settings()
33+
client.queryables_cache._cache = {}
34+
client.queryables_cache._last_updated = 0
3335
yield
3436
client.queryables_cache.reload_settings()
3537

@@ -114,3 +116,56 @@ async def test_validate_queryables_excluded(app_client, ctx):
114116
assert resp.status_code == 200
115117

116118
client.queryables_cache.reload_settings()
119+
120+
121+
@pytest.mark.asyncio
122+
async def test_search_get_cql2_text_invalid_param(app_client, ctx):
123+
"""Test GET /search with an invalid cql2-text filter parameter."""
124+
params = {
125+
"filter-lang": "cql2-text",
126+
"filter": "properties.invalid_param < 5",
127+
}
128+
resp = await app_client.get("/search", params=params)
129+
assert resp.status_code == 400
130+
resp_json = resp.json()
131+
assert "Invalid query fields: invalid_param" in resp_json["detail"]
132+
133+
134+
@pytest.mark.asyncio
135+
async def test_search_get_cql2_text_valid_param(app_client, ctx):
136+
"""Test GET /search with a valid cql2-text filter parameter."""
137+
params = {
138+
"filter-lang": "cql2-text",
139+
"filter": "eo:cloud_cover < 10",
140+
}
141+
resp = await app_client.get("/search", params=params)
142+
assert resp.status_code == 200
143+
144+
145+
@pytest.mark.asyncio
146+
async def test_item_collection_get_cql2_text_invalid_param(app_client, ctx):
147+
"""Test GET /collections/{collection_id}/items with invalid cql2-text filter."""
148+
collection_id = ctx.item["collection"]
149+
params = {
150+
"filter-lang": "cql2-text",
151+
"filter": "properties.invalid_param < 5",
152+
}
153+
resp = await app_client.get(f"/collections/{collection_id}/items", params=params)
154+
assert resp.status_code == 400
155+
resp_json = resp.json()
156+
assert "Invalid query fields: invalid_param" in resp_json["detail"]
157+
158+
159+
@pytest.mark.asyncio
160+
async def test_search_get_cql2_text_with_properties_prefix(app_client, ctx):
161+
"""Test GET /search with a valid cql2-text filter using properties. prefix.
162+
163+
This tests the case where users specify 'properties.eo:cloud_cover' instead of
164+
just 'eo:cloud_cover'. Both formats should work correctly.
165+
"""
166+
params = {
167+
"filter-lang": "cql2-text",
168+
"filter": "properties.eo:cloud_cover < 10",
169+
}
170+
resp = await app_client.get("/search", params=params)
171+
assert resp.status_code == 200

stac_fastapi/tests/core/test_queryables.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,22 @@ def test_get_properties_from_cql2_filter():
116116

117117
# Empty/invalid
118118
assert get_properties_from_cql2_filter({}) == set()
119+
120+
121+
def test_get_properties_from_cql2_filter_strips_properties_prefix():
122+
"""Test that 'properties.' prefix is stripped from property names."""
123+
# Single property with prefix
124+
cql2 = {"op": "<", "args": [{"property": "properties.none"}, 5]}
125+
props = get_properties_from_cql2_filter(cql2)
126+
assert props == {"none"}
127+
128+
# Mixed with and without prefix
129+
cql2_nested = {
130+
"op": "and",
131+
"args": [
132+
{"op": "=", "args": [{"property": "properties.test"}, "v1"]},
133+
{"op": "<", "args": [{"property": "eo:cloud_cover"}, 10]},
134+
],
135+
}
136+
props = get_properties_from_cql2_filter(cql2_nested)
137+
assert props == {"test", "eo:cloud_cover"}

0 commit comments

Comments
 (0)