From e02c29e0ef6cd53b2b113492dbb3367c5e3bcb7a Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Mon, 29 Dec 2025 16:35:22 -0800 Subject: [PATCH 01/26] Fix unhandled ValueError parsing X-Workspace header --- api/main.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/api/main.py b/api/main.py index 91c7f73..af66673 100644 --- a/api/main.py +++ b/api/main.py @@ -90,7 +90,13 @@ async def catch_all( authorizedWorkspace = None if request.headers.get("X-Workspace") is not None: - workspace_id = int(request.headers.get("X-Workspace") or "-1") + try: + workspace_id = int(request.headers.get("X-Workspace") or "-1") + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="X-Workspace header must be a valid integer", + ) if not current_user.isWorkspaceContributor(workspace_id): raise HTTPException( From dd81b6c399ffce8009b42c157f670c5de5f1091e Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Mon, 29 Dec 2025 16:47:29 -0800 Subject: [PATCH 02/26] Fix type mismatch in loading project group/workspace access --- api/core/security.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/security.py b/api/core/security.py index cc16b30..7531677 100644 --- a/api/core/security.py +++ b/api/core/security.py @@ -226,7 +226,7 @@ async def _validate_token_uncached( accessibleWorkspaces = list(result.mappings().all()) r.accessibleWorkspaceIds = {} for i in accessibleWorkspaces: - pgid = i["tdeiProjectGroupId"] + pgid = str(i["tdeiProjectGroupId"]) # SQLAlchemy outputs UUID wsid = i["id"] if pgid not in r.accessibleWorkspaceIds: r.accessibleWorkspaceIds[pgid] = [] From ea3a35982f1802ae2f8a53f99fdccd041011153d Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Tue, 30 Dec 2025 12:22:49 -0800 Subject: [PATCH 03/26] Switch JWKS client to singleton for cert/key caching --- api/core/jwt.py | 33 ++++++++++++++++++++++++++++++++ api/core/security.py | 45 ++++++++++++++++++++++---------------------- 2 files changed, 56 insertions(+), 22 deletions(-) create mode 100644 api/core/jwt.py diff --git a/api/core/jwt.py b/api/core/jwt.py new file mode 100644 index 0000000..1f2c268 --- /dev/null +++ b/api/core/jwt.py @@ -0,0 +1,33 @@ +import jwt + +from api.core.config import settings + +# Singleton JWKS client reused to take advantage of internal cert/key caching: +_jwks_client: jwt.PyJWKClient | None = None + + +def _get_jwks_client() -> jwt.PyJWKClient: + global _jwks_client + + if _jwks_client is None: + _jwks_client = jwt.PyJWKClient( + f"{settings.TDEI_OIDC_URL}realms/{settings.TDEI_OIDC_REALM}" + f"/protocol/openid-connect/certs" + ) + + return _jwks_client + + +def validate_and_decode_token(token: str) -> dict: + # TODO: use an async client like pyjwt-key-fetcher + signing_key = _get_jwks_client().get_signing_key_from_jwt(token) + + decoded = jwt.decode_complete( + token, + key=signing_key.key, + algorithms=["RS256"], + # OIDC server does not currently differentiate tokens by audience + options={"verify_aud": False}, + ) + + return decoded.get("payload", {}) diff --git a/api/core/security.py b/api/core/security.py index 7531677..02e3e9e 100644 --- a/api/core/security.py +++ b/api/core/security.py @@ -3,7 +3,6 @@ from uuid import UUID import cachetools -import jwt import requests from fastapi import Depends, HTTPException, status from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer @@ -12,6 +11,7 @@ from api.core.config import settings from api.core.database import get_osm_session, get_task_session +from api.core.jwt import validate_and_decode_token from api.core.logging import get_logger from api.src.workspaces.schemas import WorkspaceUserRoleType @@ -129,19 +129,39 @@ async def validate_token( """ token = credentials.credentials + credentials_exception = HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authentication credentials", + headers={"WWW-Authenticate": "Bearer"}, + ) + + try: + payload = validate_and_decode_token(token) + except Exception: + raise credentials_exception + + user_id: str | None = payload.get("sub") + if user_id is None: + raise credentials_exception + # Check cache first if token in _token_cache: logger.info("Token validation cache hit") return _token_cache[token] # Cache miss - perform full validation - user_info = await _validate_token_uncached(token, osm_db_session, task_db_session) + user_info = await _validate_token_uncached( + token, user_id, payload, osm_db_session, task_db_session + ) _token_cache[token] = user_info + return user_info async def _validate_token_uncached( token: str, + user_id: str, + payload: dict, osm_db_session: AsyncSession, task_db_session: AsyncSession, ) -> UserInfo: @@ -153,25 +173,6 @@ async def _validate_token_uncached( headers={"WWW-Authenticate": "Bearer"}, ) - jwks_client = jwt.PyJWKClient( - f"{settings.TDEI_OIDC_URL}realms/{settings.TDEI_OIDC_REALM}/protocol/openid-connect/certs" - ) - - signing_key = jwks_client.get_signing_key_from_jwt(token) - - jwtDecoded = jwt.decode_complete( - token, - key=signing_key.key, - algorithms=["RS256"], - # OIDC server does not currently differentiate tokens by audience - options={"verify_aud": False} - ) - payload = jwtDecoded.get("payload", {}) - - user_id: str | None = payload.get("sub") - if user_id is None: - raise credentials_exception - headers = { "Authorization": "Bearer " + token, "Content-Type": "application/json", @@ -200,7 +201,7 @@ async def _validate_token_uncached( r = UserInfo() r.credentials = token - r.user_uuid = UUID(payload.get("sub", "unknown")) + r.user_uuid = UUID(user_id) r.user_name = payload.get("preferred_username", "unknown") # project groups and roles from TDEI KeyCloak From 6e4f03e09c35fd3d3e3e203c7ebd13a273706321 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Tue, 30 Dec 2025 14:31:19 -0800 Subject: [PATCH 04/26] Fix ValueError raised in project group verification --- api/core/security.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/core/security.py b/api/core/security.py index 02e3e9e..10cef9e 100644 --- a/api/core/security.py +++ b/api/core/security.py @@ -84,7 +84,9 @@ def isWorkspaceLead(self, workspaceId: int) -> bool: for pg in self.projectGroups: if TdeiProjectGroupRole.POINT_OF_CONTACT in pg.tdeiRoles: - if workspaceId in self.accessibleWorkspaceIds[pg.project_group_id]: + if workspaceId in self.accessibleWorkspaceIds.get( + pg.project_group_id, [] + ): return True return False From 3a8cb57ff75176cd57ab9158a5f6ea070085d611 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Wed, 31 Dec 2025 18:03:11 -0800 Subject: [PATCH 05/26] Fix the blocking requests for a user's project groups --- api/core/security.py | 58 +++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/api/core/security.py b/api/core/security.py index 10cef9e..4c36ce6 100644 --- a/api/core/security.py +++ b/api/core/security.py @@ -1,9 +1,8 @@ -import json from enum import StrEnum from uuid import UUID import cachetools -import requests +import httpx from fastapi import Depends, HTTPException, status from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from sqlalchemy import text @@ -180,42 +179,39 @@ async def _validate_token_uncached( "Content-Type": "application/json", } - # get user's project groups and roles from TDEI - # TODO: fix if user has > 50 PGs - authorizationUrl = ( - settings.TDEI_BACKEND_URL - + "/project-group-roles/" - + user_id - + "?page_no=1&page_size=50" - ) - - response = requests.get(authorizationUrl, headers=headers) - - # token is not valid or server unavailable - if response.status_code != 200: - raise credentials_exception - - try: - content = response.text - j = json.loads(content) - except json.JSONDecodeError: - raise credentials_exception - r = UserInfo() r.credentials = token r.user_uuid = UUID(user_id) r.user_name = payload.get("preferred_username", "unknown") - # project groups and roles from TDEI KeyCloak + # get user's project groups and roles from TDEI + pg_base_url = f"{settings.TDEI_BACKEND_URL}/project-group-roles/{user_id}" pgs = [] - for i in j: - pgs.append( - UserInfoPGMembership( - project_group_id=i["tdei_project_group_id"], - project_group_name=i["project_group_name"], - tdeiRoles=i["roles"], - ) + async with httpx.AsyncClient() as http_client: + response = await http_client.get( + pg_base_url, + headers=headers, + params={"page_no": 1, "page_size": 1000}, ) + + # token is not valid or server unavailable + if response.status_code != 200: + raise credentials_exception + + try: + pg_data = response.json() + except Exception: + raise credentials_exception + + for i in pg_data: + pgs.append( + UserInfoPGMembership( + project_group_id=i["tdei_project_group_id"], + project_group_name=i["project_group_name"], + tdeiRoles=i["roles"], + ) + ) + r.projectGroups = pgs # workspaces within our set of PGs from tasking manager DB From 06c3bbe6e3227bcd109c929ae32d4b7112a0b873 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Mon, 15 Dec 2025 11:19:33 -0800 Subject: [PATCH 06/26] Plug potential SQL injection issue --- api/src/workspaces/repository.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/api/src/workspaces/repository.py b/api/src/workspaces/repository.py index 080f103..ada15ba 100644 --- a/api/src/workspaces/repository.py +++ b/api/src/workspaces/repository.py @@ -211,8 +211,13 @@ async def getWorkspaceBBox( current_user: UserInfo, workspace_id: int, ): + # Postgres does not support parameter binding for `SET search_path`, so + # workspace_id is interpolated directly. The explicit int() cast guards + # against SQL injection if this method is ever called from outside of a + # FastAPI path handler (where the type annotation acts as a safeguard). + # await self.session.execute( - text(f"SET search_path TO 'workspace-{workspace_id}', public") + text(f"SET search_path TO 'workspace-{int(workspace_id)}', public") ) sql_query = text( From 2001befc4e2b9797a634822885925a84fff46663 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Mon, 15 Dec 2025 11:22:41 -0800 Subject: [PATCH 07/26] Fix OSM proxy routing for CGImap --- api/core/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/core/config.py b/api/core/config.py index 94edba6..a861236 100644 --- a/api/core/config.py +++ b/api/core/config.py @@ -18,8 +18,8 @@ class Settings(BaseSettings): "https://raw.githubusercontent.com/TaskarCenterAtUW/asr-quests/refs/heads/main/schema/schema.json" ) - # proxy destination--"osm-rails" is a virtual docker network endpoint - WS_OSM_HOST: str = "http://osm-rails:3000" + # proxy destination--"osm-web" is a virtual docker network endpoint + WS_OSM_HOST: str = "http://osm-web" #WS_OSM_HOST: str = "https://osm.workspaces-dev.sidewalks.washington.edu" SENTRY_DSN: str = "" From da0548e50434512aa4f3d1b2251f27799cdb50b7 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Wed, 17 Dec 2025 13:12:51 -0800 Subject: [PATCH 08/26] Fix missing X-Workspace header in OSM proxy --- api/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/main.py b/api/main.py index af66673..d6f112f 100644 --- a/api/main.py +++ b/api/main.py @@ -104,7 +104,8 @@ async def catch_all( detail="Invalid authentication credentials", headers={"WWW-Authenticate": "Bearer"}, ) - return + + authorizedWorkspace = workspace_id else: if not any( re.search(pattern, request.url.path) for pattern in AUTH_WHITELIST_PATHS From 6790c451a21d2af63be2efbd18db95f62d6a5f6a Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Thu, 18 Dec 2025 10:16:30 -0800 Subject: [PATCH 09/26] Fix improper use of HTTP 401 in OSM proxy --- api/main.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/api/main.py b/api/main.py index d6f112f..49608f5 100644 --- a/api/main.py +++ b/api/main.py @@ -100,9 +100,8 @@ async def catch_all( if not current_user.isWorkspaceContributor(workspace_id): raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Invalid authentication credentials", - headers={"WWW-Authenticate": "Bearer"}, + status_code=status.HTTP_403_FORBIDDEN, + detail="You do not have access to this workspace", ) authorizedWorkspace = workspace_id @@ -111,10 +110,9 @@ async def catch_all( re.search(pattern, request.url.path) for pattern in AUTH_WHITELIST_PATHS ): raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="You must set your workspace in the X-Workspace header to access OSM methods.", + status_code=status.HTTP_400_BAD_REQUEST, + detail="No X-Workspace header supplied", ) - return url = httpx.URL( path=request.url.path.strip(), query=request.url.query.encode("utf-8") From bc3e19a3cc3b7738add7336e16035f7452888efd Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Fri, 19 Dec 2025 14:46:08 -0800 Subject: [PATCH 10/26] Fix bogus regex for OSM proxy auth path whitelist --- api/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/main.py b/api/main.py index 49608f5..35b7831 100644 --- a/api/main.py +++ b/api/main.py @@ -70,8 +70,8 @@ def get_workspace_repository( # Define paths that do not require X-Workspace header AUTH_WHITELIST_PATHS = [ - "/api/0.6/user/*", # used during authentication - "/api/0.6/workspaces/[0-9]*/bbox.json", # used to get workspace bbox without workspace header, to be removed + r"^/api/0\.6/user/.*$", # used during authentication + r"^/api/0\.6/workspaces/[0-9]+/bbox\.json$", # used to get workspace bbox without workspace header, to be removed ] @@ -107,7 +107,7 @@ async def catch_all( authorizedWorkspace = workspace_id else: if not any( - re.search(pattern, request.url.path) for pattern in AUTH_WHITELIST_PATHS + re.fullmatch(pattern, request.url.path) for pattern in AUTH_WHITELIST_PATHS ): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, From 205770e1a4ea1a22d2388abebbc912e456f6d917 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Tue, 30 Dec 2025 20:15:44 -0800 Subject: [PATCH 11/26] Fix resource leak/exhaustion in OSM proxy http client --- api/main.py | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/api/main.py b/api/main.py index 35b7831..56127df 100644 --- a/api/main.py +++ b/api/main.py @@ -1,5 +1,6 @@ import os import re +from contextlib import asynccontextmanager import httpx import sentry_sdk @@ -35,10 +36,28 @@ # Set up logger for this module logger = get_logger(__name__) +# Shared HTTP client for OSM proxy. Reuses connection pool across requests: +_osm_client: httpx.AsyncClient | None = None + + +@asynccontextmanager +async def lifespan(_app: FastAPI): + # Run before app bootstrap: + global _osm_client + _osm_client = httpx.AsyncClient(base_url=settings.WS_OSM_HOST) + + yield # App runs + + # Run after app cleanup: + await _osm_client.aclose() + _osm_client = None + + app = FastAPI( title=settings.PROJECT_NAME, debug=settings.DEBUG, swagger_ui_parameters={"syntaxHighlight": False}, + lifespan=lifespan, ) # Include routers @@ -117,8 +136,8 @@ async def catch_all( url = httpx.URL( path=request.url.path.strip(), query=request.url.query.encode("utf-8") ) - client = httpx.AsyncClient(base_url=settings.WS_OSM_HOST) + client = _osm_client new_headers = list() new_headers.append( (bytes("Authorization", "utf-8"), request.headers.get("Authorization")) @@ -126,24 +145,27 @@ async def catch_all( if authorizedWorkspace is not None: new_headers.append( - (bytes("X-Workspace", "utf-8"), bytes(str(authorizedWorkspace.id), "utf-8")) + ( + bytes("X-Workspace", "utf-8"), + bytes(str(authorizedWorkspace), "utf-8"), + ) ) - new_headers.append((bytes("Host", "utf-8"), bytes(client.base_url.host, "utf-8"))) + new_headers.append( + (bytes("Host", "utf-8"), bytes(client.base_url.host, "utf-8")) + ) rp_req = client.build_request( request.method, url, headers=new_headers, content=await request.body() ) - rp_resp = await client.send(rp_req, stream=True) if rp_resp.status_code >= 400 and rp_resp.status_code < 600: - sentry_sdk.capture_message( - f"Upstream request to {rp_req.url} returned status code {rp_resp.status_code}" - ) - - logger.warning( - f"Upstream request to {rp_req.url} returned status code {rp_resp.status_code}" + msg = ( + f"Upstream request to {rp_req.url} returned " + f"status code {rp_resp.status_code}" ) + sentry_sdk.capture_message(msg) + logger.warning(msg) return StreamingResponse( rp_resp.aiter_raw(), From d44a810f652ce24b6a02c401fc1ea6973c349c72 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Sat, 10 Jan 2026 16:55:20 -0800 Subject: [PATCH 12/26] Avoid buffering entire request body in OSM proxy --- api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/main.py b/api/main.py index 56127df..c07d22a 100644 --- a/api/main.py +++ b/api/main.py @@ -155,7 +155,7 @@ async def catch_all( (bytes("Host", "utf-8"), bytes(client.base_url.host, "utf-8")) ) rp_req = client.build_request( - request.method, url, headers=new_headers, content=await request.body() + request.method, url, headers=new_headers, content=request.stream() ) rp_resp = await client.send(rp_req, stream=True) From 07afd9ade8e582f3790beccaa2f7ca4212155ad9 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Mon, 12 Jan 2026 12:10:31 -0800 Subject: [PATCH 13/26] Fix broken chunked encoding in OSM proxy by omitting HBHHs --- api/main.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/api/main.py b/api/main.py index c07d22a..1b3dd18 100644 --- a/api/main.py +++ b/api/main.py @@ -87,6 +87,20 @@ def get_workspace_repository( # h/t: https://stackoverflow.com/questions/70610266/proxy-an-external-website-using-python-fast-api-not-supporting-query-params # +# According to HTTP/1.1, a proxy must not forward these "hop-by-hop" headers: +HOP_BY_HOP_HEADERS = frozenset( + [ + "connection", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailers", + "transfer-encoding", + "upgrade", + ] +) + # Define paths that do not require X-Workspace header AUTH_WHITELIST_PATHS = [ r"^/api/0\.6/user/.*$", # used during authentication @@ -151,9 +165,7 @@ async def catch_all( ) ) - new_headers.append( - (bytes("Host", "utf-8"), bytes(client.base_url.host, "utf-8")) - ) + new_headers.append((bytes("Host", "utf-8"), bytes(client.base_url.host, "utf-8"))) rp_req = client.build_request( request.method, url, headers=new_headers, content=request.stream() ) @@ -167,9 +179,13 @@ async def catch_all( sentry_sdk.capture_message(msg) logger.warning(msg) + forwarded_headers = { + k: v for k, v in rp_resp.headers.items() if k.lower() not in HOP_BY_HOP_HEADERS + } + return StreamingResponse( rp_resp.aiter_raw(), status_code=rp_resp.status_code, - headers=rp_resp.headers, + headers=forwarded_headers, background=BackgroundTask(rp_resp.aclose), ) From e9b705ed78b3f79ca1053af58c25f4118dee8519 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Fri, 16 Jan 2026 11:15:01 -0800 Subject: [PATCH 14/26] Increase OSM proxy timeout for large imports --- api/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/api/main.py b/api/main.py index 1b3dd18..cd3be2a 100644 --- a/api/main.py +++ b/api/main.py @@ -44,7 +44,11 @@ async def lifespan(_app: FastAPI): # Run before app bootstrap: global _osm_client - _osm_client = httpx.AsyncClient(base_url=settings.WS_OSM_HOST) + _osm_client = httpx.AsyncClient( + base_url=settings.WS_OSM_HOST, + # 2 hour timeout for long-running OSM imports: + timeout=httpx.Timeout(connect=10, read=7200, write=7200, pool=10), + ) yield # App runs From 20fa5216d79926cb37ee2d16e1b91e8b11d35647 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Sun, 25 Jan 2026 14:49:52 -0800 Subject: [PATCH 15/26] Refuse TRACE proxy requests to avoid cross-site tracing attacks --- api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/main.py b/api/main.py index cd3be2a..c5b196b 100644 --- a/api/main.py +++ b/api/main.py @@ -114,7 +114,7 @@ def get_workspace_repository( @app.api_route( "/{full_path:path}", - methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH", "TRACE"], + methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"], ) async def catch_all( request: Request, From a38e82e99b3ffd385606d8dcd83edb14d57a1fa8 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Sun, 25 Jan 2026 15:11:23 -0800 Subject: [PATCH 16/26] Avoid compiling auth regex for every request --- api/main.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/api/main.py b/api/main.py index c5b196b..69ea5ce 100644 --- a/api/main.py +++ b/api/main.py @@ -106,9 +106,12 @@ def get_workspace_repository( ) # Define paths that do not require X-Workspace header -AUTH_WHITELIST_PATHS = [ - r"^/api/0\.6/user/.*$", # used during authentication - r"^/api/0\.6/workspaces/[0-9]+/bbox\.json$", # used to get workspace bbox without workspace header, to be removed +AUTH_WHITELIST_PATTERNS = [ + re.compile(p) + for p in [ + r"^/api/0\.6/user/.*$", # used during authentication + r"^/api/0\.6/workspaces/[0-9]+/bbox\.json$", # used to get workspace bbox without workspace header, to be removed + ] ] @@ -143,9 +146,7 @@ async def catch_all( authorizedWorkspace = workspace_id else: - if not any( - re.fullmatch(pattern, request.url.path) for pattern in AUTH_WHITELIST_PATHS - ): + if not any(p.fullmatch(request.url.path) for p in AUTH_WHITELIST_PATTERNS): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="No X-Workspace header supplied", From bc8c182521b97027475d832af36515c2a8ed3fb5 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Mon, 2 Feb 2026 16:08:53 -0800 Subject: [PATCH 17/26] Surface OSM proxy gateway issues instead of generic 500 --- api/main.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/api/main.py b/api/main.py index 69ea5ce..5632f84 100644 --- a/api/main.py +++ b/api/main.py @@ -174,7 +174,18 @@ async def catch_all( rp_req = client.build_request( request.method, url, headers=new_headers, content=request.stream() ) - rp_resp = await client.send(rp_req, stream=True) + try: + rp_resp = await client.send(rp_req, stream=True) + except httpx.TimeoutException: + raise HTTPException( + status_code=status.HTTP_504_GATEWAY_TIMEOUT, + detail="Upstream OSM service timed out", + ) + except httpx.ConnectError: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail="Could not connect to upstream OSM service", + ) if rp_resp.status_code >= 400 and rp_resp.status_code < 600: msg = ( From 508bd3ea61b33b5aad2f282321e2f12ea2cdae49 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Tue, 17 Feb 2026 11:10:59 -0800 Subject: [PATCH 18/26] Fix OSM proxy for no-auth on capabilities.json --- api/main.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/api/main.py b/api/main.py index 5632f84..c169ad8 100644 --- a/api/main.py +++ b/api/main.py @@ -115,6 +115,38 @@ def get_workspace_repository( ] +@app.get("/api/capabilities.json") +async def capabilities(): + """Proxy OSM capabilities manifest without requiring authentication.""" + + url = httpx.URL(path="/api/capabilities.json") + rp_req = _osm_client.build_request("GET", url) + + try: + rp_resp = await _osm_client.send(rp_req, stream=True) + except httpx.TimeoutException: + raise HTTPException( + status_code=status.HTTP_504_GATEWAY_TIMEOUT, + detail="Upstream OSM service timed out", + ) + except httpx.ConnectError: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail="Could not connect to upstream OSM service", + ) + + forwarded_headers = { + k: v for k, v in rp_resp.headers.items() if k.lower() not in HOP_BY_HOP_HEADERS + } + + return StreamingResponse( + rp_resp.aiter_raw(), + status_code=rp_resp.status_code, + headers=forwarded_headers, + background=BackgroundTask(rp_resp.aclose), + ) + + @app.api_route( "/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"], From b92d706e37c68f3d6662f37489d843bb72d9098e Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Wed, 18 Feb 2026 09:49:17 -0800 Subject: [PATCH 19/26] Fix OSM proxy response format by forwarding all headers --- api/main.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/api/main.py b/api/main.py index c169ad8..6567d70 100644 --- a/api/main.py +++ b/api/main.py @@ -189,20 +189,14 @@ async def catch_all( ) client = _osm_client - new_headers = list() - new_headers.append( - (bytes("Authorization", "utf-8"), request.headers.get("Authorization")) - ) - if authorizedWorkspace is not None: - new_headers.append( - ( - bytes("X-Workspace", "utf-8"), - bytes(str(authorizedWorkspace), "utf-8"), - ) - ) + # Forward all request headers except the hop-by-hops: + new_headers = [ + (k.encode(), v.encode()) + for k, v in request.headers.items() + if k.lower() not in HOP_BY_HOP_HEADERS + ] - new_headers.append((bytes("Host", "utf-8"), bytes(client.base_url.host, "utf-8"))) rp_req = client.build_request( request.method, url, headers=new_headers, content=request.stream() ) From 57716c692405e6de22495d84d067720c0cc237f1 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Sun, 8 Feb 2026 15:28:36 -0800 Subject: [PATCH 20/26] Add CORS middleware for OSM proxy --- api/core/config.py | 8 ++++++++ api/main.py | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/api/core/config.py b/api/core/config.py index a861236..2163857 100644 --- a/api/core/config.py +++ b/api/core/config.py @@ -1,9 +1,17 @@ from pydantic_settings import BaseSettings, SettingsConfigDict + + class Settings(BaseSettings): """Application settings.""" PROJECT_NAME: str = "Workspaces API" + # JSON array of allowed CORS origins. For example: + # + # ["https://workspaces.example.com", "https://leaderboard.example.com"] + # + CORS_ORIGINS: list[str] = [] + TASK_DATABASE_URL: str = "postgresql+asyncpg://user:pass@localhost:5432/tasking_manager" OSM_DATABASE_URL: str = "postgresql+asyncpg://user:pass@localhost:5432/tasking_manager" diff --git a/api/main.py b/api/main.py index 6567d70..cfae2d4 100644 --- a/api/main.py +++ b/api/main.py @@ -5,6 +5,7 @@ import httpx import sentry_sdk from fastapi import Depends, FastAPI, HTTPException, Request, status +from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import RedirectResponse, StreamingResponse from sqlmodel.ext.asyncio.session import AsyncSession from starlette.background import BackgroundTask @@ -64,6 +65,15 @@ async def lifespan(_app: FastAPI): lifespan=lifespan, ) +app.add_middleware( + CORSMiddleware, + allow_origins=settings.CORS_ORIGINS, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + max_age=100, +) + # Include routers app.include_router(teams_router, prefix="/api/v1") app.include_router(workspaces_router, prefix="/api/v1") From 6227337b18922bb888211776f2226d0e7235d752 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Thu, 5 Mar 2026 11:02:16 -0800 Subject: [PATCH 21/26] Fix potential resource exhaustion in TDEI HTTP client --- api/core/security.py | 64 ++++++++++++++++++++++++++++---------------- api/main.py | 10 ++++++- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/api/core/security.py b/api/core/security.py index 4c36ce6..3fe3bb8 100644 --- a/api/core/security.py +++ b/api/core/security.py @@ -22,6 +22,25 @@ maxsize=1000, ttl=60 * 60 ) +# Shared HTTP client for TDEI backend calls. Initialized by main.py lifespan. +_tdei_client: httpx.AsyncClient | None = None + + +def init_tdei_client() -> None: + global _tdei_client + _tdei_client = httpx.AsyncClient( + base_url=settings.TDEI_BACKEND_URL, + timeout=httpx.Timeout(connect=10, read=30, write=30, pool=10), + ) + + +async def close_tdei_client() -> None: + global _tdei_client + if _tdei_client is not None: + await _tdei_client.aclose() + _tdei_client = None + + security = HTTPBearer() @@ -119,6 +138,7 @@ def get_task_db_session( ) -> AsyncSession: return session + async def validate_token( credentials: HTTPAuthorizationCredentials = Depends(security), osm_db_session: AsyncSession = Depends(get_osm_db_session), @@ -185,32 +205,30 @@ async def _validate_token_uncached( r.user_name = payload.get("preferred_username", "unknown") # get user's project groups and roles from TDEI - pg_base_url = f"{settings.TDEI_BACKEND_URL}/project-group-roles/{user_id}" pgs = [] - async with httpx.AsyncClient() as http_client: - response = await http_client.get( - pg_base_url, - headers=headers, - params={"page_no": 1, "page_size": 1000}, - ) + response = await _tdei_client.get( + f"project-group-roles/{user_id}", + headers=headers, + params={"page_no": 1, "page_size": 1000}, + ) + + # token is not valid or server unavailable + if response.status_code != 200: + raise credentials_exception + + try: + pg_data = response.json() + except Exception: + raise credentials_exception - # token is not valid or server unavailable - if response.status_code != 200: - raise credentials_exception - - try: - pg_data = response.json() - except Exception: - raise credentials_exception - - for i in pg_data: - pgs.append( - UserInfoPGMembership( - project_group_id=i["tdei_project_group_id"], - project_group_name=i["project_group_name"], - tdeiRoles=i["roles"], - ) + for i in pg_data: + pgs.append( + UserInfoPGMembership( + project_group_id=i["tdei_project_group_id"], + project_group_name=i["project_group_name"], + tdeiRoles=i["roles"], ) + ) r.projectGroups = pgs diff --git a/api/main.py b/api/main.py index cfae2d4..ef859f5 100644 --- a/api/main.py +++ b/api/main.py @@ -14,7 +14,12 @@ from api.core.config import settings from api.core.database import get_task_session from api.core.logging import get_logger, setup_logging -from api.core.security import UserInfo, validate_token +from api.core.security import ( + UserInfo, + close_tdei_client, + init_tdei_client, + validate_token, +) from api.src.teams.routes import router as teams_router from api.src.workspaces.repository import WorkspaceRepository from api.src.workspaces.routes import router as workspaces_router @@ -50,12 +55,14 @@ async def lifespan(_app: FastAPI): # 2 hour timeout for long-running OSM imports: timeout=httpx.Timeout(connect=10, read=7200, write=7200, pool=10), ) + init_tdei_client() yield # App runs # Run after app cleanup: await _osm_client.aclose() _osm_client = None + await close_tdei_client() app = FastAPI( @@ -78,6 +85,7 @@ async def lifespan(_app: FastAPI): app.include_router(teams_router, prefix="/api/v1") app.include_router(workspaces_router, prefix="/api/v1") + @app.get("/health") async def health_check(): """Health check endpoint. Used for Docker.""" From 34c0423fbd57beda97def936b21e539affac1b83 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Thu, 5 Mar 2026 11:36:50 -0800 Subject: [PATCH 22/26] Harden JWKS URL construction to avoid malformed endpoints --- api/core/jwt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/core/jwt.py b/api/core/jwt.py index 1f2c268..46e04c8 100644 --- a/api/core/jwt.py +++ b/api/core/jwt.py @@ -11,8 +11,8 @@ def _get_jwks_client() -> jwt.PyJWKClient: if _jwks_client is None: _jwks_client = jwt.PyJWKClient( - f"{settings.TDEI_OIDC_URL}realms/{settings.TDEI_OIDC_REALM}" - f"/protocol/openid-connect/certs" + f"{settings.TDEI_OIDC_URL.rstrip("/")}/realms/" + f"{settings.TDEI_OIDC_REALM}/protocol/openid-connect/certs" ) return _jwks_client From 15c7436d1fa0ae456059732645ac909047e7740e Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Thu, 5 Mar 2026 12:12:10 -0800 Subject: [PATCH 23/26] Fix OSM proxy upstream logging with forwarding headers --- api/main.py | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/api/main.py b/api/main.py index ef859f5..ba2e33c 100644 --- a/api/main.py +++ b/api/main.py @@ -123,6 +123,16 @@ def get_workspace_repository( ] ) +# Do not forward spoofed reverse-proxy informational headers: +STRIP_REQUEST_HEADERS = HOP_BY_HOP_HEADERS | { + "host", + "x-forwarded-for", + "x-forwarded-host", + "x-forwarded-proto", + "x-real-ip", + "forwarded", +} + # Define paths that do not require X-Workspace header AUTH_WHITELIST_PATTERNS = [ re.compile(p) @@ -134,11 +144,24 @@ def get_workspace_repository( @app.get("/api/capabilities.json") -async def capabilities(): +async def capabilities(request: Request): """Proxy OSM capabilities manifest without requiring authentication.""" + client_host = request.client.host if request.client else "unknown" + req_headers = [ + (k.encode(), v.encode()) + for k, v in request.headers.items() + if k.lower() not in STRIP_REQUEST_HEADERS + ] + [ + (b"Host", _osm_client.base_url.host.encode()), + (b"X-Real-IP", client_host.encode()), + (b"X-Forwarded-For", client_host.encode()), + (b"X-Forwarded-Host", (request.url.hostname or "").encode()), + (b"X-Forwarded-Proto", request.url.scheme.encode()), + ] + url = httpx.URL(path="/api/capabilities.json") - rp_req = _osm_client.build_request("GET", url) + rp_req = _osm_client.build_request("GET", url, headers=req_headers) try: rp_resp = await _osm_client.send(rp_req, stream=True) @@ -207,16 +230,21 @@ async def catch_all( ) client = _osm_client - - # Forward all request headers except the hop-by-hops: - new_headers = [ + client_host = request.client.host if request.client else "unknown" + req_headers = [ (k.encode(), v.encode()) for k, v in request.headers.items() - if k.lower() not in HOP_BY_HOP_HEADERS + if k.lower() not in STRIP_REQUEST_HEADERS + ] + [ + (b"Host", client.base_url.host.encode()), + (b"X-Real-IP", client_host.encode()), + (b"X-Forwarded-For", client_host.encode()), + (b"X-Forwarded-Host", (request.url.hostname or "").encode()), + (b"X-Forwarded-Proto", request.url.scheme.encode()), ] rp_req = client.build_request( - request.method, url, headers=new_headers, content=request.stream() + request.method, url, headers=req_headers, content=request.stream() ) try: rp_resp = await client.send(rp_req, stream=True) From 7011b63e2ca3cc85cd95d8a8e5ff31e86661528e Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Fri, 6 Mar 2026 09:59:39 -0800 Subject: [PATCH 24/26] Remove now unused authorizedWorkspace variable --- api/main.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/api/main.py b/api/main.py index ba2e33c..396b530 100644 --- a/api/main.py +++ b/api/main.py @@ -200,7 +200,6 @@ async def catch_all( """ Catch-all route to proxy requests to the OSM service. """ - authorizedWorkspace = None if request.headers.get("X-Workspace") is not None: try: @@ -216,8 +215,6 @@ async def catch_all( status_code=status.HTTP_403_FORBIDDEN, detail="You do not have access to this workspace", ) - - authorizedWorkspace = workspace_id else: if not any(p.fullmatch(request.url.path) for p in AUTH_WHITELIST_PATTERNS): raise HTTPException( From ca6fe12e167fd55e4c0d4942c993e45231a7b6f0 Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Sat, 7 Mar 2026 00:02:07 -0800 Subject: [PATCH 25/26] Handle TDEI transport failures explicitly --- api/core/security.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/api/core/security.py b/api/core/security.py index 3fe3bb8..58c1288 100644 --- a/api/core/security.py +++ b/api/core/security.py @@ -206,11 +206,18 @@ async def _validate_token_uncached( # get user's project groups and roles from TDEI pgs = [] - response = await _tdei_client.get( - f"project-group-roles/{user_id}", - headers=headers, - params={"page_no": 1, "page_size": 1000}, - ) + + try: + response = await _tdei_client.get( + f"project-group-roles/{user_id}", + headers=headers, + params={"page_no": 1, "page_size": 1000}, + ) + except httpx.RequestError: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail="Could not reach TDEI backend", + ) from None # token is not valid or server unavailable if response.status_code != 200: From 3d0c0568d4434e3e562f24ab2f37d8adbc8fd08d Mon Sep 17 00:00:00 2001 From: Cy Rossignol Date: Sat, 7 Mar 2026 13:55:36 -0800 Subject: [PATCH 26/26] Guard UUID parsing to avoid 500s on malformed tokens --- api/core/security.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/api/core/security.py b/api/core/security.py index 58c1288..3e60de1 100644 --- a/api/core/security.py +++ b/api/core/security.py @@ -200,8 +200,13 @@ async def _validate_token_uncached( } r = UserInfo() + + try: + r.user_uuid = UUID(user_id) + except ValueError: + raise credentials_exception from None + r.credentials = token - r.user_uuid = UUID(user_id) r.user_name = payload.get("preferred_username", "unknown") # get user's project groups and roles from TDEI