diff --git a/CHANGELOG.md b/CHANGELOG.md index 33bb75a4..c52c2ed7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added -- Added catalogs route support to enable federated hierarchical catalog browsing and navigation in the STAC API. [#547](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/547) +- Added optional `/catalogs` route support to enable federated hierarchical catalog browsing and navigation. [#547](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/547) +- Added DELETE `/catalogs/{catalog_id}/collections/{collection_id}` endpoint to support removing collections from catalogs. When a collection belongs to multiple catalogs, it removes only the specified catalog from the collection's parent_ids. When a collection belongs to only one catalog, the collection is deleted entirely. [#554](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/554) +- Added `parent_ids` internal field to collections to support multi-catalog hierarchies. Collections can now belong to multiple catalogs, with parent catalog IDs stored in this field for efficient querying and management. [#554](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/554) ### Changed diff --git a/README.md b/README.md index 106b8d2d..53998be3 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ [![GitHub forks](https://img.shields.io/github/forks/stac-utils/stac-fastapi-elasticsearch-opensearch.svg?color=blue)](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/network/members) [![PyPI version](https://img.shields.io/pypi/v/stac-fastapi-elasticsearch.svg?color=blue)](https://pypi.org/project/stac-fastapi-elasticsearch/) [![STAC](https://img.shields.io/badge/STAC-1.1.0-blue.svg)](https://github.com/radiantearth/stac-spec/tree/v1.1.0) - [![stac-fastapi](https://img.shields.io/badge/stac--fastapi-6.0.0-blue.svg)](https://github.com/stac-utils/stac-fastapi) + [![stac-fastapi](https://img.shields.io/badge/stac--fastapi-6.1.1-blue.svg)](https://github.com/stac-utils/stac-fastapi) ## Sponsors & Supporters @@ -28,6 +28,7 @@ The following organizations have contributed time and/or funding to support the ## Latest News +- **12/09/2025:** **Feature Merge: Federated Catalogs.** The [`Catalogs Endpoint`](https://github.com/Healy-Hyperspatial/stac-api-extensions-catalogs-endpoint) extension is now in main! This enables a registry of catalogs and supports **poly-hierarchy** (collections belonging to multiple catalogs simultaneously). Enable it via `ENABLE_CATALOGS_EXTENSION`. _Coming next: Support for nested sub-catalogs._ - **11/07/2025:** 🌍 The SFEOS STAC Viewer is now available at: https://healy-hyperspatial.github.io/sfeos-web. Use this site to examine your data and test your STAC API! - **10/24/2025:** Added `previous_token` pagination using Redis for efficient navigation. This feature allows users to navigate backwards through large result sets by storing pagination state in Redis. To use this feature, ensure Redis is configured (see [Redis for navigation](#redis-for-navigation)) and set `REDIS_ENABLE=true` in your environment. - **10/23/2025:** The `EXCLUDED_FROM_QUERYABLES` environment variable was added to exclude fields from the `queryables` endpoint. See [docs](#excluding-fields-from-queryables). @@ -239,6 +240,7 @@ This implementation follows the [STAC API Catalogs Extension](https://github.com ### Features - **Hierarchical Navigation**: Browse catalogs and sub-catalogs in a parent-child relationship structure +- **Multi-Catalog Collections**: Collections can belong to multiple catalogs simultaneously, enabling flexible organizational hierarchies - **Collection Discovery**: Access collections within specific catalog contexts - **STAC API Compliance**: Follows STAC specification for catalog objects and linking - **Flexible Querying**: Support for standard STAC API query parameters when browsing collections within catalogs @@ -252,6 +254,7 @@ This implementation follows the [STAC API Catalogs Extension](https://github.com - **GET `/catalogs/{catalog_id}/collections`**: Retrieve collections within a specific catalog - **POST `/catalogs/{catalog_id}/collections`**: Create a new collection within a specific catalog - **GET `/catalogs/{catalog_id}/collections/{collection_id}`**: Retrieve a specific collection within a catalog +- **DELETE `/catalogs/{catalog_id}/collections/{collection_id}`**: Delete a collection from a catalog (removes parent_id if multiple parents exist, deletes collection if it's the only parent) - **GET `/catalogs/{catalog_id}/collections/{collection_id}/items`**: Retrieve items within a collection in a catalog context - **GET `/catalogs/{catalog_id}/collections/{collection_id}/items/{item_id}`**: Retrieve a specific item within a catalog context @@ -292,6 +295,11 @@ curl "http://localhost:8081/catalogs/earth-observation/collections/sentinel-2/it # Get specific item within a catalog curl "http://localhost:8081/catalogs/earth-observation/collections/sentinel-2/items/S2A_20231015_123456" +# Delete a collection from a catalog +# If the collection has multiple parent catalogs, only removes this catalog from parent_ids +# If this is the only parent catalog, deletes the collection entirely +curl -X DELETE "http://localhost:8081/catalogs/earth-observation/collections/sentinel-2" + # Delete a catalog (collections remain intact) curl -X DELETE "http://localhost:8081/catalogs/earth-observation" diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/catalogs.py b/stac_fastapi/core/stac_fastapi/core/extensions/catalogs.py index 872fc54d..0e0c13e6 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/catalogs.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/catalogs.py @@ -2,7 +2,7 @@ import logging from typing import List, Optional, Type -from urllib.parse import urlencode, urlparse +from urllib.parse import urlencode import attr from fastapi import APIRouter, FastAPI, HTTPException, Query, Request @@ -140,6 +140,18 @@ def register(self, app: FastAPI, settings=None) -> None: tags=["Catalogs"], ) + # Add endpoint for deleting a collection from a catalog + self.router.add_api_route( + path="/catalogs/{catalog_id}/collections/{collection_id}", + endpoint=self.delete_catalog_collection, + methods=["DELETE"], + response_class=self.response_class, + status_code=204, + summary="Delete Catalog Collection", + description="Delete a collection from a catalog. If the collection has multiple parent catalogs, only removes this catalog from parent_ids. If this is the only parent, deletes the collection entirely.", + tags=["Catalogs"], + ) + # Add endpoint for getting items in a collection within a catalog self.router.add_api_route( path="/catalogs/{catalog_id}/collections/{collection_id}/items", @@ -348,52 +360,44 @@ async def delete_catalog( f"Failed to delete collection {coll_id}: {e}" ) else: - # Remove catalog link from each collection (orphan them) + # Remove catalog from each collection's parent_ids and links (orphan them) for coll_id in collection_ids: try: - collection = await self.client.get_collection( - coll_id, request=request + # Get the collection from database to access parent_ids + collection_db = await self.client.database.find_collection( + coll_id ) - # Remove the catalog link from the collection - if hasattr(collection, "links"): - collection.links = [ - link - for link in collection.links - if not ( - getattr(link, "rel", None) == "catalog" - and catalog_id in getattr(link, "href", "") - ) - ] - elif isinstance(collection, dict): - collection["links"] = [ - link - for link in collection.get("links", []) - if not ( - link.get("rel") == "catalog" - and catalog_id in link.get("href", "") - ) - ] - # Update the collection in the database - collection_dict = ( - collection.model_dump(mode="json") - if hasattr(collection, "model_dump") - else collection - ) - collection_db = ( - self.client.database.collection_serializer.stac_to_db( - collection_dict, request + # Remove catalog_id from parent_ids + parent_ids = collection_db.get("parent_ids", []) + if catalog_id in parent_ids: + parent_ids.remove(catalog_id) + collection_db["parent_ids"] = parent_ids + + # Also remove the catalog link from the collection's links + if "links" in collection_db: + collection_db["links"] = [ + link + for link in collection_db.get("links", []) + if not ( + link.get("rel") == "catalog" + and catalog_id in link.get("href", "") + ) + ] + + # Update the collection in the database + await self.client.database.update_collection( + collection_id=coll_id, + collection=collection_db, + refresh=True, + ) + logger.info( + f"Removed catalog {catalog_id} from collection {coll_id} parent_ids and links" + ) + else: + logger.debug( + f"Catalog {catalog_id} not in parent_ids for collection {coll_id}" ) - ) - await self.client.database.client.index( - index=COLLECTIONS_INDEX, - id=coll_id, - body=collection_db.model_dump() - if hasattr(collection_db, "model_dump") - else collection_db, - refresh=True, - ) - logger.info(f"Removed catalog link from collection {coll_id}") except Exception as e: error_msg = str(e) if "not found" in error_msg.lower(): @@ -402,7 +406,7 @@ async def delete_catalog( ) else: logger.warning( - f"Failed to remove catalog link from collection {coll_id}: {e}" + f"Failed to remove catalog {catalog_id} from collection {coll_id}: {e}" ) # Delete the catalog @@ -434,91 +438,30 @@ async def get_catalog_collections( Collections object containing collections linked from the catalog. """ try: - # Get the catalog from the database - db_catalog = await self.client.database.find_catalog(catalog_id) + # Verify the catalog exists + await self.client.database.find_catalog(catalog_id) - # Convert to STAC format to access links - catalog = self.client.catalog_serializer.db_to_stac(db_catalog, request) + # Query collections by parent_ids field using Elasticsearch directly + # This uses the parent_ids field in the collection mapping to find all + # collections that have this catalog as a parent + query_body = {"query": {"term": {"parent_ids": catalog_id}}} - # Extract collection IDs from catalog links - # - # FRAGILE IMPLEMENTATION WARNING: - # This approach relies on parsing URL patterns to determine catalog-collection relationships. - # This is fragile and will break if: - # - URLs don't follow the expected /collections/{id} pattern - # - Base URLs contain /collections/ in other segments - # - Relative links are used instead of absolute URLs - # - Links have trailing slashes or query parameters - # - # TODO: In a future version, this should be replaced with a proper database relationship - # (e.g., parent_catalog_id field on Collection documents) - # - collection_ids = [] - if hasattr(catalog, "links") and catalog.links: - base_url = str(request.base_url).rstrip("/") - base_path = urlparse(base_url).path.rstrip("/") + # Execute the search to get collection IDs + try: + search_result = await self.client.database.client.search( + index=COLLECTIONS_INDEX, body=query_body + ) + except Exception as e: + logger.error( + f"Error searching for collections with parent {catalog_id}: {e}" + ) + search_result = {"hits": {"hits": []}} - for link in catalog.links: - rel = ( - link.get("rel") - if hasattr(link, "get") - else getattr(link, "rel", None) - ) - if rel in ["child", "item"]: - # Extract collection ID from href using proper URL parsing - href = ( - link.get("href", "") - if hasattr(link, "get") - else getattr(link, "href", "") - ) - if href: - try: - parsed_url = urlparse(href) - path = parsed_url.path.rstrip("/") - - # Resolve relative URLs against base URL - if not href.startswith(("http://", "https://")): - full_path = ( - f"{base_path}{path}" if path else base_path - ) - else: - # For absolute URLs, ensure they belong to our base domain - if parsed_url.netloc != urlparse(base_url).netloc: - continue - full_path = path - - # Look for collections endpoint at the end of the path - # This prevents false positives when /collections/ appears in base URL - collections_pattern = "/collections/" - if collections_pattern in full_path: - # Find the LAST occurrence of /collections/ to avoid base URL conflicts - last_collections_pos = full_path.rfind( - collections_pattern - ) - if last_collections_pos != -1: - # Extract everything after the last /collections/ - after_collections = full_path[ - last_collections_pos - + len(collections_pattern) : - ] - - # Handle cases where there might be additional path segments - # We only want the immediate collection ID - collection_id = ( - after_collections.split("/")[0] - if after_collections - else None - ) - - if ( - collection_id - and collection_id not in collection_ids - ): - collection_ids.append(collection_id) - - except Exception: - # If URL parsing fails, skip this link - continue + # Extract collection IDs from search results + collection_ids = [] + hits = search_result.get("hits", {}).get("hits", []) + for hit in hits: + collection_ids.append(hit.get("_id")) # Fetch the collections collections = [] @@ -591,56 +534,115 @@ async def create_catalog_collection( # Verify the catalog exists await self.client.database.find_catalog(catalog_id) - # Create the collection using the same pattern as TransactionsClient.create_collection - # This handles the Collection model from stac_pydantic correctly - collection_dict = collection.model_dump(mode="json") + # Check if the collection already exists in the database + try: + existing_collection_db = await self.client.database.find_collection( + collection.id + ) + # Collection exists, just add the parent ID if not already present + existing_collection_dict = existing_collection_db + + # Ensure parent_ids field exists + if "parent_ids" not in existing_collection_dict: + existing_collection_dict["parent_ids"] = [] + + # Add catalog_id to parent_ids if not already present + if catalog_id not in existing_collection_dict["parent_ids"]: + existing_collection_dict["parent_ids"].append(catalog_id) + + # Update the collection in the database + await self.client.database.update_collection( + collection_id=collection.id, + collection=existing_collection_dict, + refresh=True, + ) + + # Convert back to STAC format for the response + updated_collection = ( + self.client.database.collection_serializer.db_to_stac( + existing_collection_dict, + request, + extensions=[ + type(ext).__name__ + for ext in self.client.database.extensions + ], + ) + ) - # Add a link from the collection back to its parent catalog BEFORE saving to database - base_url = str(request.base_url) - catalog_link = { - "rel": "catalog", - "type": "application/json", - "href": f"{base_url}catalogs/{catalog_id}", - "title": catalog_id, - } + # Update the catalog to include a link to the collection + await self._add_collection_to_catalog_links( + catalog_id, collection.id, request + ) - # Add the catalog link to the collection dict - if "links" not in collection_dict: - collection_dict["links"] = [] + return updated_collection - # Check if the catalog link already exists - catalog_href = catalog_link["href"] - link_exists = any( - link.get("href") == catalog_href and link.get("rel") == "catalog" - for link in collection_dict.get("links", []) - ) + except Exception as e: + # Only proceed to create if collection truly doesn't exist + error_msg = str(e) + if "not found" not in error_msg.lower(): + # Re-raise if it's a different error + raise + # Collection doesn't exist, create it + # Create the collection using the same pattern as TransactionsClient.create_collection + # This handles the Collection model from stac_pydantic correctly + collection_dict = collection.model_dump(mode="json") + + # Add the catalog ID to the parent_ids field + if "parent_ids" not in collection_dict: + collection_dict["parent_ids"] = [] + + if catalog_id not in collection_dict["parent_ids"]: + collection_dict["parent_ids"].append(catalog_id) + + # Add a link from the collection back to its parent catalog BEFORE saving to database + base_url = str(request.base_url) + catalog_link = { + "rel": "catalog", + "type": "application/json", + "href": f"{base_url}catalogs/{catalog_id}", + "title": catalog_id, + } + + # Add the catalog link to the collection dict + if "links" not in collection_dict: + collection_dict["links"] = [] + + # Check if the catalog link already exists + catalog_href = catalog_link["href"] + link_exists = any( + link.get("href") == catalog_href and link.get("rel") == "catalog" + for link in collection_dict.get("links", []) + ) - if not link_exists: - collection_dict["links"].append(catalog_link) + if not link_exists: + collection_dict["links"].append(catalog_link) - # Now convert to database format (this will process the links) - collection_db = self.client.database.collection_serializer.stac_to_db( - collection_dict, request - ) - await self.client.database.create_collection( - collection=collection_db, refresh=True - ) + # Now convert to database format (this will process the links) + collection_db = self.client.database.collection_serializer.stac_to_db( + collection_dict, request + ) + await self.client.database.create_collection( + collection=collection_db, refresh=True + ) - # Convert back to STAC format for the response - created_collection = self.client.database.collection_serializer.db_to_stac( - collection_db, - request, - extensions=[ - type(ext).__name__ for ext in self.client.database.extensions - ], - ) + # Convert back to STAC format for the response + created_collection = ( + self.client.database.collection_serializer.db_to_stac( + collection_db, + request, + extensions=[ + type(ext).__name__ + for ext in self.client.database.extensions + ], + ) + ) - # Update the catalog to include a link to the new collection - await self._add_collection_to_catalog_links( - catalog_id, collection.id, request - ) + # Update the catalog to include a link to the new collection + await self._add_collection_to_catalog_links( + catalog_id, collection.id, request + ) - return created_collection + return created_collection except HTTPException as e: # Re-raise HTTP exceptions (e.g., catalog not found, collection validation errors) @@ -765,7 +767,25 @@ async def get_catalog_collection( status_code=404, detail=f"Catalog {catalog_id} not found" ) - # Delegate to the core client's get_collection method + # Verify the collection exists and has the catalog as a parent + try: + collection_db = await self.client.database.find_collection(collection_id) + + # Check if the catalog_id is in the collection's parent_ids + parent_ids = collection_db.get("parent_ids", []) + if catalog_id not in parent_ids: + raise HTTPException( + status_code=404, + detail=f"Collection {collection_id} does not belong to catalog {catalog_id}", + ) + except HTTPException: + raise + except Exception: + raise HTTPException( + status_code=404, detail=f"Collection {collection_id} not found" + ) + + # Return the collection return await self.client.get_collection( collection_id=collection_id, request=request ) @@ -853,3 +873,159 @@ async def get_catalog_collection_item( return await self.client.get_item( item_id=item_id, collection_id=collection_id, request=request ) + + async def delete_catalog_collection( + self, catalog_id: str, collection_id: str, request: Request + ) -> None: + """Delete a collection from a catalog. + + If the collection has multiple parent catalogs, only removes this catalog + from the parent_ids. If this is the only parent catalog, deletes the + collection entirely. + + Args: + catalog_id: The ID of the catalog. + collection_id: The ID of the collection. + request: Request object. + + Raises: + HTTPException: If the catalog or collection is not found, or if the + collection does not belong to the catalog. + """ + try: + # Verify the catalog exists + await self.client.database.find_catalog(catalog_id) + + # Get the collection + collection_db = await self.client.database.find_collection(collection_id) + + # Check if the catalog_id is in the collection's parent_ids + parent_ids = collection_db.get("parent_ids", []) + if catalog_id not in parent_ids: + raise HTTPException( + status_code=404, + detail=f"Collection {collection_id} does not belong to catalog {catalog_id}", + ) + + # If the collection has multiple parents, just remove this catalog from parent_ids + if len(parent_ids) > 1: + parent_ids.remove(catalog_id) + collection_db["parent_ids"] = parent_ids + + # Update the collection in the database + await self.client.database.update_collection( + collection_id=collection_id, collection=collection_db, refresh=True + ) + + logger.info( + f"Removed catalog {catalog_id} from collection {collection_id} parent_ids" + ) + else: + # If this is the only parent, delete the collection entirely + await self.client.database.delete_collection( + collection_id, refresh=True + ) + logger.info( + f"Deleted collection {collection_id} (only parent was catalog {catalog_id})" + ) + + # Remove the collection link from the catalog + await self._remove_collection_from_catalog_links( + catalog_id, collection_id, request + ) + + except HTTPException: + raise + except Exception as e: + logger.error( + f"Error deleting collection {collection_id} from catalog {catalog_id}: {e}", + exc_info=True, + ) + raise HTTPException( + status_code=500, + detail=f"Failed to delete collection from catalog: {str(e)}", + ) + + async def _remove_collection_from_catalog_links( + self, catalog_id: str, collection_id: str, request: Request + ) -> None: + """Remove a collection link from a catalog. + + This helper method updates a catalog's links to remove a reference + to a collection by reindexing the updated catalog document. + + Args: + catalog_id: The ID of the catalog to update. + collection_id: The ID of the collection to unlink. + request: Request object for base URL construction. + """ + try: + # Get the current catalog + db_catalog = await self.client.database.find_catalog(catalog_id) + catalog = self.client.catalog_serializer.db_to_stac(db_catalog, request) + + # Get the catalog links + catalog_links = ( + catalog.get("links") + if isinstance(catalog, dict) + else getattr(catalog, "links", None) + ) + + if not catalog_links: + return + + # Find and remove the collection link + collection_href = ( + f"{str(request.base_url).rstrip('/')}/collections/{collection_id}" + ) + links_to_keep = [] + link_removed = False + + for link in catalog_links: + link_href = ( + link.get("href") + if hasattr(link, "get") + else getattr(link, "href", None) + ) + if link_href == collection_href and not link_removed: + # Skip this link (remove it) + link_removed = True + else: + links_to_keep.append(link) + + if link_removed: + # Update the catalog with the modified links + if isinstance(catalog, dict): + catalog["links"] = links_to_keep + else: + catalog.links = links_to_keep + + # Convert back to database format and update + updated_db_catalog = self.client.catalog_serializer.stac_to_db( + catalog, request + ) + updated_db_catalog_dict = ( + updated_db_catalog.model_dump() + if hasattr(updated_db_catalog, "model_dump") + else updated_db_catalog + ) + updated_db_catalog_dict["type"] = "Catalog" + + # Update the document + await self.client.database.client.index( + index=COLLECTIONS_INDEX, + id=catalog_id, + body=updated_db_catalog_dict, + refresh=True, + ) + + logger.info( + f"Removed collection {collection_id} link from catalog {catalog_id}" + ) + + except Exception as e: + logger.error( + f"Failed to remove collection link from catalog {catalog_id}: {e}", + exc_info=True, + ) + # Don't fail the entire operation if link removal fails diff --git a/stac_fastapi/core/stac_fastapi/core/serializers.py b/stac_fastapi/core/stac_fastapi/core/serializers.py index f935bff5..6cae58af 100644 --- a/stac_fastapi/core/stac_fastapi/core/serializers.py +++ b/stac_fastapi/core/stac_fastapi/core/serializers.py @@ -181,8 +181,9 @@ def db_to_stac( # Avoid modifying the input dict in-place ... doing so breaks some tests collection = deepcopy(collection) - # Remove internal bbox_shape field (not part of STAC spec) + # Remove internal fields (not part of STAC spec) collection.pop("bbox_shape", None) + collection.pop("parent_ids", None) # Set defaults collection_id = collection.get("id") diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index 129194da..8cad42b5 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -160,6 +160,7 @@ class Geometry(Protocol): # noqa "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, "properties": { "id": {"type": "keyword"}, + "parent_ids": {"type": "keyword"}, "bbox_shape": {"type": "geo_shape"}, "extent.temporal.interval": { "type": "date", diff --git a/stac_fastapi/tests/api/test_api.py b/stac_fastapi/tests/api/test_api.py index 76830a06..fe10bfaa 100644 --- a/stac_fastapi/tests/api/test_api.py +++ b/stac_fastapi/tests/api/test_api.py @@ -57,6 +57,7 @@ "GET /catalogs/{catalog_id}/collections", "POST /catalogs/{catalog_id}/collections", "GET /catalogs/{catalog_id}/collections/{collection_id}", + "DELETE /catalogs/{catalog_id}/collections/{collection_id}", "GET /catalogs/{catalog_id}/collections/{collection_id}/items", "GET /catalogs/{catalog_id}/collections/{collection_id}/items/{item_id}", "", diff --git a/stac_fastapi/tests/extensions/test_catalogs.py b/stac_fastapi/tests/extensions/test_catalogs.py index 7c9960e7..62c46a71 100644 --- a/stac_fastapi/tests/extensions/test_catalogs.py +++ b/stac_fastapi/tests/extensions/test_catalogs.py @@ -67,18 +67,19 @@ async def test_get_nonexistent_catalog(catalogs_app_client): @pytest.mark.asyncio async def test_get_catalog_collections(catalogs_app_client, load_test_data, ctx): """Test getting collections linked from a catalog.""" - # First create a catalog with a link to the test collection + # First create a catalog test_catalog = load_test_data("test_catalog.json") test_catalog["id"] = f"test-catalog-{uuid.uuid4()}" - # Update the catalog links to point to the actual test collection - for link in test_catalog["links"]: - if link["rel"] == "child": - link["href"] = f"http://test-server/collections/{ctx.collection['id']}" - create_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) assert create_resp.status_code == 201 + # Add the existing collection to the catalog + add_resp = await catalogs_app_client.post( + f"/catalogs/{test_catalog['id']}/collections", json=ctx.collection + ) + assert add_resp.status_code == 201 + # Now get collections from the catalog resp = await catalogs_app_client.get(f"/catalogs/{test_catalog['id']}/collections") assert resp.status_code == 200 @@ -136,6 +137,12 @@ async def test_get_catalog_collection(catalogs_app_client, load_test_data, ctx): create_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) assert create_resp.status_code == 201 + # Add the existing collection to the catalog + add_resp = await catalogs_app_client.post( + f"/catalogs/{test_catalog['id']}/collections", json=ctx.collection + ) + assert add_resp.status_code == 201 + # Get a specific collection through the catalog route resp = await catalogs_app_client.get( f"/catalogs/{test_catalog['id']}/collections/{ctx.collection['id']}" @@ -596,3 +603,437 @@ async def test_delete_catalog_no_cascade(catalogs_app_client, load_test_data): assert ( catalog_link is None ), "Collection should not have catalog link after catalog deletion" + + +@pytest.mark.asyncio +async def test_delete_catalog_removes_parent_ids_from_collections( + catalogs_app_client, load_test_data +): + """Test that deleting a catalog removes its ID from child collections' parent_ids.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + test_catalog["links"] = [ + link for link in test_catalog.get("links", []) if link.get("rel") != "child" + ] + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Create 3 collections in the catalog + collection_ids = [] + for i in range(3): + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}-{i}" + test_collection["id"] = collection_id + + create_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=test_collection + ) + assert create_resp.status_code == 201 + collection_ids.append(collection_id) + + # Verify all collections have the catalog in their parent_ids + # (indirectly verified by checking they're accessible via the catalog endpoint) + get_collections_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_id}/collections" + ) + assert get_collections_resp.status_code == 200 + collections_response = get_collections_resp.json() + returned_ids = [col["id"] for col in collections_response["collections"]] + for collection_id in collection_ids: + assert collection_id in returned_ids + + # Delete the catalog without cascade + delete_resp = await catalogs_app_client.delete(f"/catalogs/{catalog_id}") + assert delete_resp.status_code == 204 + + # Verify all collections still exist + for collection_id in collection_ids: + get_resp = await catalogs_app_client.get(f"/collections/{collection_id}") + assert get_resp.status_code == 200 + + # Verify collections are no longer accessible via the deleted catalog + # (This indirectly verifies parent_ids was updated) + for collection_id in collection_ids: + get_from_catalog_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_id}/collections/{collection_id}" + ) + assert get_from_catalog_resp.status_code == 404 + + +@pytest.mark.asyncio +async def test_create_catalog_collection_adds_parent_id( + catalogs_app_client, load_test_data +): + """Test that creating a collection in a catalog adds the catalog to parent_ids.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Create a new collection through the catalog endpoint + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}" + test_collection["id"] = collection_id + + create_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=test_collection + ) + assert create_resp.status_code == 201 + + created_collection = create_resp.json() + assert created_collection["id"] == collection_id + + # Verify the collection has the catalog in parent_ids by getting it directly + get_resp = await catalogs_app_client.get(f"/collections/{collection_id}") + assert get_resp.status_code == 200 + + collection_data = get_resp.json() + # parent_ids should be in the collection data (from database) + # We can verify it exists by checking the catalog link + catalog_link = None + for link in collection_data.get("links", []): + if link.get("rel") == "catalog" and catalog_id in link.get("href", ""): + catalog_link = link + break + + assert catalog_link is not None, "Collection should have catalog link" + + +@pytest.mark.asyncio +async def test_add_existing_collection_to_catalog( + catalogs_app_client, load_test_data, ctx +): + """Test adding an existing collection to a catalog adds parent_id.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Add existing collection to the catalog + add_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=ctx.collection + ) + assert add_resp.status_code == 201 + + # Verify we can get the collection through the catalog endpoint + get_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_id}/collections/{ctx.collection['id']}" + ) + assert get_resp.status_code == 200 + + +@pytest.mark.asyncio +async def test_collection_with_multiple_parent_catalogs( + catalogs_app_client, load_test_data +): + """Test that a collection can have multiple parent catalogs.""" + # Create two catalogs + catalog_ids = [] + for i in range(2): + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}-{i}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + catalog_ids.append(catalog_id) + + # Create a collection in the first catalog + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}" + test_collection["id"] = collection_id + + create_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_ids[0]}/collections", json=test_collection + ) + assert create_resp.status_code == 201 + + # Add the same collection to the second catalog + add_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_ids[1]}/collections", json=test_collection + ) + assert add_resp.status_code == 201 + + # Verify we can get the collection from both catalogs + for catalog_id in catalog_ids: + get_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_id}/collections/{collection_id}" + ) + assert get_resp.status_code == 200 + + +@pytest.mark.asyncio +async def test_get_catalog_collections_uses_parent_ids( + catalogs_app_client, load_test_data +): + """Test that get_catalog_collections queries by parent_ids.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Create multiple collections in the catalog + collection_ids = [] + for i in range(3): + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}-{i}" + test_collection["id"] = collection_id + + create_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=test_collection + ) + assert create_resp.status_code == 201 + collection_ids.append(collection_id) + + # Get all collections from the catalog + get_resp = await catalogs_app_client.get(f"/catalogs/{catalog_id}/collections") + assert get_resp.status_code == 200 + + collections_response = get_resp.json() + returned_ids = [col["id"] for col in collections_response["collections"]] + + # All created collections should be returned + for collection_id in collection_ids: + assert collection_id in returned_ids + + +@pytest.mark.asyncio +async def test_delete_collection_from_catalog_single_parent( + catalogs_app_client, load_test_data +): + """Test deleting a collection from a catalog when it's the only parent.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Create a collection in the catalog + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}" + test_collection["id"] = collection_id + + create_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=test_collection + ) + assert create_resp.status_code == 201 + + # Delete the collection from the catalog + delete_resp = await catalogs_app_client.delete( + f"/catalogs/{catalog_id}/collections/{collection_id}" + ) + assert delete_resp.status_code == 204 + + # Verify the collection is completely deleted + get_resp = await catalogs_app_client.get(f"/collections/{collection_id}") + assert get_resp.status_code == 404 + + +@pytest.mark.asyncio +async def test_delete_collection_from_catalog_multiple_parents( + catalogs_app_client, load_test_data +): + """Test deleting a collection from a catalog when it has multiple parents.""" + # Create two catalogs + catalog_ids = [] + for i in range(2): + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}-{i}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + catalog_ids.append(catalog_id) + + # Create a collection in the first catalog + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}" + test_collection["id"] = collection_id + + create_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_ids[0]}/collections", json=test_collection + ) + assert create_resp.status_code == 201 + + # Add the collection to the second catalog + add_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_ids[1]}/collections", json=test_collection + ) + assert add_resp.status_code == 201 + + # Delete the collection from the first catalog + delete_resp = await catalogs_app_client.delete( + f"/catalogs/{catalog_ids[0]}/collections/{collection_id}" + ) + assert delete_resp.status_code == 204 + + # Verify the collection still exists + get_resp = await catalogs_app_client.get(f"/collections/{collection_id}") + assert get_resp.status_code == 200 + + # Verify we can still get it from the second catalog + get_from_catalog_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_ids[1]}/collections/{collection_id}" + ) + assert get_from_catalog_resp.status_code == 200 + + # Verify we cannot get it from the first catalog anymore + get_from_deleted_catalog_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_ids[0]}/collections/{collection_id}" + ) + assert get_from_deleted_catalog_resp.status_code == 404 + + +@pytest.mark.asyncio +async def test_get_collection_not_in_catalog_returns_404( + catalogs_app_client, load_test_data, ctx +): + """Test that getting a collection from a catalog it doesn't belong to returns 404.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Try to get a collection that's not in this catalog + get_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_id}/collections/{ctx.collection['id']}" + ) + assert get_resp.status_code == 404 + + +@pytest.mark.asyncio +async def test_delete_collection_not_in_catalog_returns_404( + catalogs_app_client, load_test_data, ctx +): + """Test that deleting a collection from a catalog it doesn't belong to returns 404.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Try to delete a collection that's not in this catalog + delete_resp = await catalogs_app_client.delete( + f"/catalogs/{catalog_id}/collections/{ctx.collection['id']}" + ) + assert delete_resp.status_code == 404 + + +@pytest.mark.asyncio +async def test_catalog_links_contain_all_collections( + catalogs_app_client, load_test_data +): + """Test that a catalog's links contain all 3 collections added to it.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + # Remove any placeholder child links + test_catalog["links"] = [ + link for link in test_catalog.get("links", []) if link.get("rel") != "child" + ] + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Create 3 collections in the catalog + collection_ids = [] + for i in range(3): + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}-{i}" + test_collection["id"] = collection_id + + create_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=test_collection + ) + assert create_resp.status_code == 201 + collection_ids.append(collection_id) + + # Get the catalog and verify all 3 collections are in its links + catalog_get_resp = await catalogs_app_client.get(f"/catalogs/{catalog_id}") + assert catalog_get_resp.status_code == 200 + + catalog_data = catalog_get_resp.json() + catalog_links = catalog_data.get("links", []) + + # Extract all child links (collection links) + child_links = [link for link in catalog_links if link.get("rel") == "child"] + + # Verify we have exactly 3 child links + assert ( + len(child_links) == 3 + ), f"Catalog should have 3 child links, but has {len(child_links)}" + + # Verify each collection ID is in the child links + child_hrefs = [link.get("href", "") for link in child_links] + for collection_id in collection_ids: + collection_href = f"/collections/{collection_id}" + assert any( + collection_href in href for href in child_hrefs + ), f"Collection {collection_id} missing from catalog links. Found links: {child_hrefs}" + + +@pytest.mark.asyncio +async def test_parent_ids_not_exposed_to_client(catalogs_app_client, load_test_data): + """Test that parent_ids field is not exposed in API responses.""" + # Create a catalog + test_catalog = load_test_data("test_catalog.json") + catalog_id = f"test-catalog-{uuid.uuid4()}" + test_catalog["id"] = catalog_id + + catalog_resp = await catalogs_app_client.post("/catalogs", json=test_catalog) + assert catalog_resp.status_code == 201 + + # Create a collection in the catalog + test_collection = load_test_data("test_collection.json") + collection_id = f"test-collection-{uuid.uuid4()}" + test_collection["id"] = collection_id + + create_resp = await catalogs_app_client.post( + f"/catalogs/{catalog_id}/collections", json=test_collection + ) + assert create_resp.status_code == 201 + + # Verify parent_ids is not in the creation response + created_collection = create_resp.json() + assert ( + "parent_ids" not in created_collection + ), "parent_ids should not be exposed in API response" + + # Verify parent_ids is not in the get response + get_resp = await catalogs_app_client.get(f"/collections/{collection_id}") + assert get_resp.status_code == 200 + collection_data = get_resp.json() + assert ( + "parent_ids" not in collection_data + ), "parent_ids should not be exposed in API response" + + # Verify parent_ids is not in the catalog collection endpoint response + catalog_collections_resp = await catalogs_app_client.get( + f"/catalogs/{catalog_id}/collections" + ) + assert catalog_collections_resp.status_code == 200 + collections_data = catalog_collections_resp.json() + for collection in collections_data.get("collections", []): + assert ( + "parent_ids" not in collection + ), "parent_ids should not be exposed in API response"