diff --git a/backend/common-cdk/common_constructs/stack.py b/backend/common-cdk/common_constructs/stack.py index 652b0383f..74224427c 100644 --- a/backend/common-cdk/common_constructs/stack.py +++ b/backend/common-cdk/common_constructs/stack.py @@ -122,6 +122,12 @@ def state_api_domain_name(self) -> str | None: return f'state-api.{self.hosted_zone.zone_name}' return None + @property + def search_api_domain_name(self) -> str | None: + if self.hosted_zone is not None: + return f'search.{self.hosted_zone.zone_name}' + return None + @property def ui_domain_name(self) -> str | None: if self.hosted_zone is not None: diff --git a/backend/compact-connect/app_clients/README.md b/backend/compact-connect/app_clients/README.md index 4573eead0..d1e04dd9f 100644 --- a/backend/compact-connect/app_clients/README.md +++ b/backend/compact-connect/app_clients/README.md @@ -66,7 +66,7 @@ jurisdiction. ```bash -python3 bin/create_app_client.py -e -u +python3 bin/create_app_client.py -u ``` **Interactive Process:** diff --git a/backend/compact-connect/bin/compile_requirements.sh b/backend/compact-connect/bin/compile_requirements.sh index 0706a9062..1a843d80e 100755 --- a/backend/compact-connect/bin/compile_requirements.sh +++ b/backend/compact-connect/bin/compile_requirements.sh @@ -21,6 +21,8 @@ pip-compile --no-emit-index-url --upgrade --no-strip-extras lambdas/python/provi # avoid installation failures # pip-compile --no-emit-index-url --upgrade --no-strip-extras lambdas/python/purchases/requirements-dev.in # pip-compile --no-emit-index-url --upgrade --no-strip-extras lambdas/python/purchases/requirements.in +pip-compile --no-emit-index-url --upgrade --no-strip-extras lambdas/python/search/requirements-dev.in +pip-compile --no-emit-index-url --upgrade --no-strip-extras lambdas/python/search/requirements.in pip-compile --no-emit-index-url --upgrade --no-strip-extras lambdas/python/staff-user-pre-token/requirements-dev.in pip-compile --no-emit-index-url --upgrade --no-strip-extras lambdas/python/staff-user-pre-token/requirements.in pip-compile --no-emit-index-url --upgrade --no-strip-extras lambdas/python/staff-users/requirements-dev.in diff --git a/backend/compact-connect/bin/download_oas30.py b/backend/compact-connect/bin/download_oas30.py index 7c09f31f6..8869da8f0 100755 --- a/backend/compact-connect/bin/download_oas30.py +++ b/backend/compact-connect/bin/download_oas30.py @@ -106,6 +106,8 @@ def update_server_urls(spec: dict, api_name: str) -> None: base_url = 'https://state-api.beta.compactconnect.org' elif api_name == 'LicenseApi': base_url = 'https://api.beta.compactconnect.org' + elif api_name == 'SearchApi': + base_url = 'https://search.beta.compactconnect.org' else: # Keep original URL if API name is not recognized return @@ -155,6 +157,7 @@ def main(): parser = argparse.ArgumentParser(description='Download OpenAPI v3 specifications from AWS API Gateway') parser.add_argument('--state-api-only', action='store_true', help='Download only the StateApi specification') parser.add_argument('--license-api-only', action='store_true', help='Download only the LicenseApi specification') + parser.add_argument('--search-api-only', action='store_true', help='Download only the SearchApi specification') args = parser.parse_args() @@ -165,17 +168,23 @@ def main(): # Define output paths state_api_path = os.path.join(workspace_dir, 'docs', 'api-specification', 'latest-oas30.json') license_api_path = os.path.join(workspace_dir, 'docs', 'internal', 'api-specification', 'latest-oas30.json') + search_api_path = os.path.join(workspace_dir, 'docs', 'search-internal', 'api-specification', 'latest-oas30.json') # Download StateApi (external API) - if not args.license_api_only: + if not args.license_api_only and not args.search_api_only: sys.stdout.write('\n=== Downloading StateApi specification ===\n') download_api_spec('StateApi', state_api_path) # Download LicenseApi (internal API) - if not args.state_api_only: + if not args.state_api_only and not args.search_api_only: sys.stdout.write('\n=== Downloading LicenseApi specification ===\n') download_api_spec('LicenseApi', license_api_path) + # Download SearchApi (search internal API) + if not args.state_api_only and not args.license_api_only: + sys.stdout.write('\n=== Downloading SearchApi specification ===\n') + download_api_spec('SearchApi', search_api_path) + sys.stdout.write('\nAll specifications downloaded successfully!\n') sys.exit(0) diff --git a/backend/compact-connect/bin/sync_deps.sh b/backend/compact-connect/bin/sync_deps.sh index 1656a985a..d3bfea152 100755 --- a/backend/compact-connect/bin/sync_deps.sh +++ b/backend/compact-connect/bin/sync_deps.sh @@ -20,6 +20,8 @@ pip-sync \ lambdas/python/disaster-recovery/requirements.txt \ lambdas/python/provider-data-v1/requirements-dev.txt \ lambdas/python/provider-data-v1/requirements.txt \ + lambdas/python/search/requirements-dev.txt \ + lambdas/python/search/requirements.txt \ lambdas/python/staff-user-pre-token/requirements-dev.txt \ lambdas/python/staff-user-pre-token/requirements.txt \ lambdas/python/staff-users/requirements-dev.txt \ diff --git a/backend/compact-connect/bin/trim_oas30.py b/backend/compact-connect/bin/trim_oas30.py index e28158d16..53d80cc1a 100755 --- a/backend/compact-connect/bin/trim_oas30.py +++ b/backend/compact-connect/bin/trim_oas30.py @@ -35,6 +35,7 @@ def strip_options_endpoints(oas30: dict) -> dict: parser.add_argument( '-i', '--internal', action='store_true', help='Use internal API specification files instead of regular ones' ) + parser.add_argument('-s', '--search', action='store_true', help='Use search API specification files') args = parser.parse_args() @@ -42,12 +43,13 @@ def strip_options_endpoints(oas30: dict) -> dict: script_dir = os.path.dirname(os.path.abspath(__file__)) workspace_dir = os.path.dirname(script_dir) - # Determine the base directory based on the internal flag - base_dir = ( - os.path.join('docs', 'internal', 'api-specification') - if args.internal - else os.path.join('docs', 'api-specification') - ) + # Determine the base directory based on the flags + if args.search: + base_dir = os.path.join('docs', 'search-internal', 'api-specification') + elif args.internal: + base_dir = os.path.join('docs', 'internal', 'api-specification') + else: + base_dir = os.path.join('docs', 'api-specification') file_path = os.path.join(workspace_dir, base_dir, 'latest-oas30.json') with open(file_path) as f: diff --git a/backend/compact-connect/bin/update_api_docs.sh b/backend/compact-connect/bin/update_api_docs.sh index 7a38f63fa..5f8537b9e 100755 --- a/backend/compact-connect/bin/update_api_docs.sh +++ b/backend/compact-connect/bin/update_api_docs.sh @@ -1,7 +1,7 @@ #!/bin/bash # Update API documentation workflow -# Downloads, trims, and updates Postman collections for both StateApi and LicenseApi +# Downloads, trims, and updates Postman collections for StateApi, LicenseApi, and SearchApi set -e # Exit immediately if any command fails @@ -99,6 +99,14 @@ trim_specs() { exit 1 fi print_success "LicenseApi specification trimmed" + + # Trim search API spec + print_status "Trimming SearchApi specification..." + if ! python3 bin/trim_oas30.py --search; then + print_error "Failed to trim SearchApi specification" + exit 1 + fi + print_success "SearchApi specification trimmed" } # Function to update Postman collections @@ -120,6 +128,14 @@ update_postman() { exit 1 fi print_success "LicenseApi Postman collection updated" + + # Update search Postman collection + print_status "Updating SearchApi Postman collection..." + if ! python3 bin/update_postman_collection.py --search; then + print_error "Failed to update SearchApi Postman collection" + exit 1 + fi + print_success "SearchApi Postman collection updated" } # Function to verify files exist @@ -129,8 +145,10 @@ verify_files() { local files=( "docs/api-specification/latest-oas30.json" "docs/internal/api-specification/latest-oas30.json" + "docs/search-internal/api-specification/latest-oas30.json" "docs/postman/postman-collection.json" "docs/internal/postman/postman-collection.json" + "docs/search-internal/postman/postman-collection.json" ) for file in "${files[@]}"; do @@ -174,8 +192,10 @@ main() { print_status "Updated files:" echo " - docs/api-specification/latest-oas30.json" echo " - docs/internal/api-specification/latest-oas30.json" + echo " - docs/search-internal/api-specification/latest-oas30.json" echo " - docs/postman/postman-collection.json" echo " - docs/internal/postman/postman-collection.json" + echo " - docs/search-internal/postman/postman-collection.json" } # Handle script interruption diff --git a/backend/compact-connect/bin/update_postman_collection.py b/backend/compact-connect/bin/update_postman_collection.py index 492b8b724..1ffe060a2 100755 --- a/backend/compact-connect/bin/update_postman_collection.py +++ b/backend/compact-connect/bin/update_postman_collection.py @@ -196,6 +196,7 @@ def main(): parser.add_argument( '-i', '--internal', action='store_true', help='Use internal API specification files instead of regular ones' ) + parser.add_argument('-s', '--search', action='store_true', help='Use search API specification files') args = parser.parse_args() @@ -203,9 +204,16 @@ def main(): script_dir = os.path.dirname(os.path.abspath(__file__)) workspace_dir = os.path.dirname(script_dir) - # Determine the base directory based on the internal flag - base_dir = os.path.join('internal', 'api-specification') if args.internal else os.path.join('api-specification') - postman_dir = os.path.join('internal', 'postman') if args.internal else os.path.join('postman') + # Determine the base directory based on the flags + if args.search: + base_dir = os.path.join('search-internal', 'api-specification') + postman_dir = os.path.join('search-internal', 'postman') + elif args.internal: + base_dir = os.path.join('internal', 'api-specification') + postman_dir = os.path.join('internal', 'postman') + else: + base_dir = os.path.join('api-specification') + postman_dir = os.path.join('postman') openapi_path = os.path.join(workspace_dir, 'docs', base_dir, 'latest-oas30.json') tmp_path = os.path.join(workspace_dir, 'tmp.json') @@ -215,7 +223,7 @@ def main(): generate_postman_collection(openapi_path, tmp_path) try: - # Load the generated and existing collections + # Load the generated collection with open(tmp_path) as f: new_collection = json.load(f) with open(postman_path) as f: diff --git a/backend/compact-connect/common_constructs/constants.py b/backend/compact-connect/common_constructs/constants.py new file mode 100644 index 000000000..26201949e --- /dev/null +++ b/backend/compact-connect/common_constructs/constants.py @@ -0,0 +1,2 @@ +PROD_ENV_NAME = 'prod' +BETA_ENV_NAME = 'beta' diff --git a/backend/compact-connect/common_constructs/python_function.py b/backend/compact-connect/common_constructs/python_function.py index 0dbcf7828..043e3da47 100644 --- a/backend/compact-connect/common_constructs/python_function.py +++ b/backend/compact-connect/common_constructs/python_function.py @@ -5,7 +5,7 @@ from aws_cdk import Duration from aws_cdk.aws_cloudwatch import Alarm, ComparisonOperator, Stats, TreatMissingData from aws_cdk.aws_cloudwatch_actions import SnsAction -from aws_cdk.aws_iam import IRole, Role, ServicePrincipal +from aws_cdk.aws_iam import IRole, ManagedPolicy, Role, ServicePrincipal from aws_cdk.aws_lambda import ILayerVersion, Runtime from aws_cdk.aws_lambda_python_alpha import PythonFunction as CdkPythonFunction from aws_cdk.aws_logs import ILogGroup, LogGroup, RetentionDays @@ -81,6 +81,25 @@ def __init__( assumed_by=ServicePrincipal('lambda.amazonaws.com'), ) log_group.grant_write(role) + + if 'vpc' in kwargs: + # if the function is being created in a VPC, add the AWSLambdaVPCAccessExecutionRole policy to the role + role.add_managed_policy( + ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaVPCAccessExecutionRole') + ) + NagSuppressions.add_resource_suppressions( + role, + suppressions=[ + { + 'id': 'AwsSolutions-IAM4', + 'appliesTo': [ + 'Policy::arn::iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole' + ], + 'reason': 'Lambdas deployed within a VPC require this policy to access the VPC.', + }, + ], + ) + # We can't directly grant a provided role permission to log to our log group, since that could create a # circular dependency with the stack the role came from. The role creator will have to be responsible for # setting its permissions. diff --git a/backend/compact-connect/common_constructs/queued_lambda_processor.py b/backend/compact-connect/common_constructs/queued_lambda_processor.py index d1deb4458..346a3c4fa 100644 --- a/backend/compact-connect/common_constructs/queued_lambda_processor.py +++ b/backend/compact-connect/common_constructs/queued_lambda_processor.py @@ -29,6 +29,7 @@ def __init__( encryption_key: IKey, alarm_topic: ITopic, dlq_count_alarm_threshold: int = 10, + dlq_retention_period: Duration | None = None, ): super().__init__(scope, construct_id) @@ -39,6 +40,7 @@ def __init__( encryption=QueueEncryption.KMS, encryption_master_key=encryption_key, enforce_ssl=True, + retention_period=dlq_retention_period, ) self.queue = Queue( diff --git a/backend/compact-connect/common_constructs/user_pool.py b/backend/compact-connect/common_constructs/user_pool.py index 7b63501f9..b766143a4 100644 --- a/backend/compact-connect/common_constructs/user_pool.py +++ b/backend/compact-connect/common_constructs/user_pool.py @@ -216,7 +216,7 @@ def add_custom_app_client_domain( suppressions=[ { 'id': 'AwsSolutions-L1', - 'reason': 'We do not maintain this lambda runtime. It will be updated with future CDK versions' + 'reason': 'We do not maintain this lambda runtime. It will be updated with future CDK versions', }, { 'id': 'HIPAA.Security-LambdaDLQ', diff --git a/backend/compact-connect/docs/design/README.md b/backend/compact-connect/docs/design/README.md index 84d89add1..1a64eadfb 100644 --- a/backend/compact-connect/docs/design/README.md +++ b/backend/compact-connect/docs/design/README.md @@ -10,6 +10,8 @@ Look here for continued documentation of the back-end design, as it progresses. - **[Privileges](#privileges)** - **[Attestations](#attestations)** - **[Transaction History Reporting](#transaction-history-reporting)** +- **[Advanced Data Search](#advanced-data-search)** +- **[CI/CD Pipelines](#cicd-pipelines)** - **[Audit Logging](#audit-logging)** ## Compacts and Jurisdictions @@ -675,6 +677,120 @@ For this reason, we use the batch settlement time as the timestamp for the trans transaction history table. This ensures that any transactions that are in a batch which fails to settle will eventually be processed and stored in the transaction history table. + +## Advanced Data Search +[Back to top](#backend-design) + +To support advanced search capabilities for provider and privilege records, this project leverages +[AWS OpenSearch Service](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/what-is.html). +Provider data from the provider DynamoDB table is indexed into an OpenSearch Domain (Cluster), enabling staff users to perform complex searches through the Search API (search.compactconnect.org). + +The OpenSearch resources are deployed within a Virtual Private Cloud (VPC) to provide network-level security and restrict outside access. Unlike DynamoDB, which is a fully managed and serverless AWS service that does not require (and does not support) VPC deployment, OpenSearch domains have data nodes that must be managed. Placing the OpenSearch domain in a VPC allows us to tightly control which resources and users can access it, reducing exposure to external threats. + +### Architecture Overview +![Advanced Search Diagram](./advanced-provider-search.pdf) + +The search infrastructure consists of several key components: + +1. **OpenSearch Domain**: A managed OpenSearch cluster deployed within a VPC +2. **Index Manager**: A CloudFormation custom resource that creates and manages domain indices +3. **Search API**: API Gateway endpoints backed by Lambda functions for querying the domain +4. **Populate Handler**: A Lambda function for bulk indexing all provider data from DynamoDB +5. **Provider Update Ingest Handler**: A Lambda function for updating documents in OpenSearch whenever provider records are updated in DynamoDB. + +### Index Structure + +Provider documents are stored in compact-specific indices with the naming convention: `compact_{compact}_providers_{version}` +(e.g., `compact_aslp_providers_v1`). We use index aliases to provide a stable reference to the current version of each index (e.g., `compact_aslp_providers`), allowing read and write operations to be transparently redirected during planned index migrations or upgrades. This enables seamless index schema changes without requiring app code changes, as applications and APIs can continue to reference the alias rather than a specific index name. See [OpenSearch index alias documentation](https://docs.opensearch.org/latest/im-plugin/index-alias/) for more information. + +#### Index Management + +The `IndexManagerCustomResource` is a CloudFormation custom resource that creates compact-specific indices when the +domain is first created. It ensures the indices/aliases exist with the correct mapping before any indexing operations begin. + +#### Index Mapping + +Each provider document contains all information you would see from the provider detail api endpoint with `readGeneral` permission. See the [application code](../../lambdas/python/search/handlers/manage_opensearch_indices.py) for the current mapping definition. + +The index uses a custom ASCII-folding analyzer for name fields, which allows searching for names with international +characters using their ASCII equivalents (e.g., searching "Jose" matches "José"). + +### Search API Endpoints + +The Search API provides two endpoints for querying the OpenSearch domain: + +#### Provider Search +``` +POST /v1/compacts/{compact}/providers/search +``` + +Returns provider records matching the query. Response includes the full provider document with licenses, privileges, +and military affiliations. + +#### Privilege CSV Export +``` +POST /v1/compacts/{compact}/privileges/export +``` + +Returns flattened privilege records. This endpoint queries the same provider index but extracts and flattens +privileges, combining privilege data with license data to provide a denormalized list of objects which are then exported to a CSV file for downloading. + +### Document Indexing + +#### Initial Population / Re-indexing + +The `populate_provider_documents` Lambda function handles bulk indexing of provider data from DynamoDB into +OpenSearch. This function is invoked manually through the AWS Console for: +- Initial data population when the search infrastructure is first deployed +- Full re-indexing if data becomes out of sync + +The function: +1. Scans the provider table using the `providerDateOfUpdate` GSI +2. Retrieves complete provider records for each provider +3. Sanitizes data using `ProviderGeneralResponseSchema` +4. Bulk indexes documents + +**Resumable Processing**: If the function approaches the 15-minute Lambda timeout, it returns pagination information in the +`resumeFrom` field that can be passed as lambda input to continue processing: + +```json +{ + "startingCompact": "aslp", + "startingLastKey": {"pk": "...", "sk": "..."} +} +``` + +**Race Condition Consideration**: A potential race condition can occur when running this function while provider data is being actively updated: + +1. The `populate_provider_documents` Lambda function queries the current data from DynamoDB for a provider +2. A change is made in DynamoDB for that same provider +3. The DynamoDB stream handler queries the data and indexes the change into OpenSearch after the ~30 second delay of sitting in SQS +4. The `populate_provider_documents` Lambda function finally indexes the stale data into OpenSearch, overwriting the change indexed by the DynamoDB stream handler + +For this reason, it is recommended that this process be run during a period of low traffic. Given that it is a one-time process to initially populate the table, the risk is low and if needed, the Lambda function can be run again to synchronize all the provider documents. + +#### Updates via DynamoDB Streams + +To keep the OpenSearch index synchronized with changes in the provider DynamoDB table, the system uses DynamoDB Streams to capture all modifications made to provide records (see [AWS documentation](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Streams.html)). This ensures that provider documents in OpenSearch are updated automatically whenever records are created, modified, or deleted in the provider table. + +**Architecture Flow:** + +1. **DynamoDB Stream**: The provider table has a DynamoDB stream enabled with `NEW_AND_OLD_IMAGES` view type, which captures both the before and after state of any record modification. + +2. **EventBridge Pipe**: An EventBridge Pipe reads events from the DynamoDB stream and forwards them to an SQS queue. + +3. **Provider Update Ingest Lambda**: The Lambda function processes SQS message batches, determines the providers that were modified, and upserts their latest information into the appropriate OpenSearch index. + +### Monitoring and Alarms + +The search infrastructure includes CloudWatch alarms for capacity monitoring. If these alarms get triggered, review +usage metrics to determine if the Domain needs to be scaled up: + +- **CPU Utilization**: Alerts when CPU exceeds threshold +- **Memory Pressure**: Monitors JVM memory pressure +- **Storage Space**: Alerts on low disk space +- **Cluster Health**: Monitors yellow/red cluster status + ## CI/CD Pipelines This project leverages AWS CodePipeline to deploy the backend and frontend infrastructure. See the diff --git a/backend/compact-connect/docs/design/advanced-provider-search.pdf b/backend/compact-connect/docs/design/advanced-provider-search.pdf new file mode 100644 index 000000000..9c0841b2f Binary files /dev/null and b/backend/compact-connect/docs/design/advanced-provider-search.pdf differ diff --git a/backend/compact-connect/docs/it_staff_onboarding_instructions.md b/backend/compact-connect/docs/it_staff_onboarding_instructions.md index 45fe9642d..184d99b40 100644 --- a/backend/compact-connect/docs/it_staff_onboarding_instructions.md +++ b/backend/compact-connect/docs/it_staff_onboarding_instructions.md @@ -1,4 +1,4 @@ -# CompactConnect Automated License Data Upload Instructions (Beta Release) +# CompactConnect Automated License Data Upload Instructions ## Overview @@ -61,8 +61,7 @@ Follow these steps to obtain an access token and make requests to the CompactCon ### Step 1: Generate an Access Token You must first obtain an access token to authenticate your API requests. The access token will be used in the -Authorization header of subsequent API calls. While the following curl command demonstrates how to generate a token for -the **beta** environment, you should implement this authentication flow in your application's programming language using +Authorization header of subsequent API calls. While the following curl command demonstrates how to generate a token, you should implement this authentication flow in your application's programming language using appropriate HTTPS request libraries: > **Note**: When copying commands, be careful of line breaks. You may need to remove any extra spaces or @@ -104,10 +103,10 @@ AWS documentation: https://docs.aws.amazon.com/cognito/latest/developerguide/tok - Your application should request a new token before the current one expires - Store the `access_token` value for use in API requests -### Step 2: Upload License Data to the Beta Environment (JSON POST Endpoint) +### Step 2: Upload License Data (JSON POST Endpoint) The CompactConnect License API can be called through a POST REST endpoint which takes in a list of license record -objects. The following curl command example demonstrates how to upload license data into the **beta** environment, but +objects. The following curl command example demonstrates how to upload license data, but you should implement this API call in your application's programming language using appropriate HTTPS request libraries. You will need to replace the example payload with valid license data that includes the correct license types for your specific compact. See the diff --git a/backend/compact-connect/docs/search-internal/api-specification/latest-oas30.json b/backend/compact-connect/docs/search-internal/api-specification/latest-oas30.json new file mode 100644 index 000000000..c1c006228 --- /dev/null +++ b/backend/compact-connect/docs/search-internal/api-specification/latest-oas30.json @@ -0,0 +1,1440 @@ +{ + "openapi": "3.0.1", + "info": { + "title": "SearchApi", + "version": "2025-12-02T19:49:45Z" + }, + "servers": [ + { + "url": "https://search.beta.compactconnect.org" + } + ], + "paths": { + "/v1/compacts/{compact}/providers/search": { + "post": { + "parameters": [ + { + "name": "compact", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SandboSearctZ4sfzliddmr" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "200 response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SandboSearcRcmFGOzNZ5TZ" + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "SandboSearcRcmFGOzNZ5TZ": { + "required": [ + "providers", + "total" + ], + "type": "object", + "properties": { + "total": { + "type": "object", + "properties": { + "value": { + "type": "integer" + }, + "relation": { + "type": "string", + "enum": [ + "eq", + "gte" + ] + } + }, + "description": "Total hits information from OpenSearch" + }, + "lastSort": { + "type": "array", + "description": "Sort values from the last hit to use with search_after for the next page" + }, + "providers": { + "type": "array", + "items": { + "required": [ + "birthMonthDay", + "compact", + "compactEligibility", + "dateOfExpiration", + "dateOfUpdate", + "familyName", + "givenName", + "jurisdictionUploadedCompactEligibility", + "jurisdictionUploadedLicenseStatus", + "licenseJurisdiction", + "licenseStatus", + "privilegeJurisdictions", + "providerId", + "type" + ], + "type": "object", + "properties": { + "privileges": { + "type": "array", + "items": { + "required": [ + "administratorSetStatus", + "compact", + "dateOfExpiration", + "dateOfIssuance", + "dateOfRenewal", + "dateOfUpdate", + "jurisdiction", + "licenseJurisdiction", + "licenseType", + "privilegeId", + "providerId", + "status", + "type" + ], + "type": "object", + "properties": { + "investigationStatus": { + "type": "string", + "enum": [ + "underInvestigation" + ] + }, + "licenseJurisdiction": { + "type": "string", + "enum": [ + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "pr", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "vi", + "wa", + "wv", + "wi", + "wy" + ] + }, + "compact": { + "type": "string", + "enum": [ + "aslp", + "octp", + "coun" + ] + }, + "jurisdiction": { + "type": "string", + "enum": [ + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "pr", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "vi", + "wa", + "wv", + "wi", + "wy" + ] + }, + "attestations": { + "type": "array", + "items": { + "required": [ + "attestationId", + "version" + ], + "type": "object", + "properties": { + "attestationId": { + "maxLength": 100, + "type": "string" + }, + "version": { + "maxLength": 100, + "type": "string" + } + } + } + }, + "investigations": { + "type": "array", + "items": { + "required": [ + "compact", + "creationDate", + "dateOfUpdate", + "investigationId", + "jurisdiction", + "licenseType", + "providerId", + "submittingUser", + "type" + ], + "type": "object", + "properties": { + "licenseType": { + "type": "string" + }, + "investigationId": { + "type": "string" + }, + "compact": { + "type": "string", + "enum": [ + "aslp", + "octp", + "coun" + ] + }, + "providerId": { + "pattern": "[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab]{1}[0-9a-f]{3}-[0-9a-f]{12}", + "type": "string" + }, + "jurisdiction": { + "type": "string", + "enum": [ + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "pr", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "vi", + "wa", + "wv", + "wi", + "wy" + ] + }, + "submittingUser": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "investigation" + ] + }, + "creationDate": { + "type": "string", + "format": "date-time" + }, + "dateOfUpdate": { + "type": "string", + "format": "date-time" + } + } + } + }, + "type": { + "type": "string", + "enum": [ + "privilege" + ] + }, + "compactTransactionId": { + "type": "string" + }, + "dateOfIssuance": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "licenseType": { + "type": "string" + }, + "administratorSetStatus": { + "type": "string", + "enum": [ + "active", + "inactive" + ] + }, + "dateOfExpiration": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "activeSince": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "privilegeId": { + "type": "string" + }, + "providerId": { + "pattern": "[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab]{1}[0-9a-f]{3}-[0-9a-f]{12}", + "type": "string" + }, + "dateOfRenewal": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "adverseActions": { + "type": "array", + "items": { + "required": [ + "actionAgainst", + "adverseActionId", + "compact", + "creationDate", + "dateOfUpdate", + "effectiveStartDate", + "encumbranceType", + "jurisdiction", + "licenseType", + "licenseTypeAbbreviation", + "providerId", + "submittingUser", + "type" + ], + "type": "object", + "properties": { + "clinicalPrivilegeActionCategories": { + "type": "array", + "items": { + "type": "string" + } + }, + "compact": { + "type": "string", + "enum": [ + "aslp", + "octp", + "coun" + ] + }, + "jurisdiction": { + "type": "string", + "enum": [ + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "pr", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "vi", + "wa", + "wv", + "wi", + "wy" + ] + }, + "licenseTypeAbbreviation": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "adverseAction" + ] + }, + "creationDate": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "actionAgainst": { + "type": "string", + "enum": [ + "license", + "privilege" + ] + }, + "licenseType": { + "type": "string" + }, + "providerId": { + "pattern": "[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab]{1}[0-9a-f]{3}-[0-9a-f]{12}", + "type": "string" + }, + "submittingUser": { + "type": "string" + }, + "effectiveStartDate": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "adverseActionId": { + "type": "string" + }, + "effectiveLiftDate": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "encumbranceType": { + "type": "string" + }, + "liftingUser": { + "type": "string" + }, + "dateOfUpdate": { + "type": "string", + "format": "date-time" + } + } + } + }, + "dateOfUpdate": { + "type": "string", + "format": "date-time" + }, + "status": { + "type": "string", + "enum": [ + "active", + "inactive" + ] + } + } + } + }, + "licenseJurisdiction": { + "type": "string", + "enum": [ + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "pr", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "vi", + "wa", + "wv", + "wi", + "wy" + ] + }, + "compact": { + "type": "string", + "enum": [ + "aslp", + "octp", + "coun" + ] + }, + "npi": { + "pattern": "^[0-9]{10}$", + "type": "string" + }, + "givenName": { + "maxLength": 100, + "type": "string" + }, + "compactEligibility": { + "type": "string", + "enum": [ + "eligible", + "ineligible" + ] + }, + "jurisdictionUploadedCompactEligibility": { + "type": "string", + "enum": [ + "eligible", + "ineligible" + ] + }, + "jurisdictionUploadedLicenseStatus": { + "type": "string", + "enum": [ + "active", + "inactive" + ] + }, + "privilegeJurisdictions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "pr", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "vi", + "wa", + "wv", + "wi", + "wy" + ] + } + }, + "type": { + "type": "string", + "enum": [ + "provider" + ] + }, + "suffix": { + "maxLength": 100, + "type": "string" + }, + "currentHomeJurisdiction": { + "type": "string", + "enum": [ + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "pr", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "vi", + "wa", + "wv", + "wi", + "wy" + ] + }, + "licenses": { + "type": "array", + "items": { + "required": [ + "compact", + "compactEligibility", + "dateOfExpiration", + "dateOfIssuance", + "dateOfUpdate", + "familyName", + "givenName", + "homeAddressCity", + "homeAddressPostalCode", + "homeAddressState", + "homeAddressStreet1", + "jurisdiction", + "jurisdictionUploadedCompactEligibility", + "jurisdictionUploadedLicenseStatus", + "licenseStatus", + "licenseType", + "providerId", + "type" + ], + "type": "object", + "properties": { + "compact": { + "type": "string", + "enum": [ + "aslp", + "octp", + "coun" + ] + }, + "homeAddressStreet2": { + "maxLength": 100, + "minLength": 1, + "type": "string" + }, + "jurisdiction": { + "type": "string", + "enum": [ + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "pr", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "vi", + "wa", + "wv", + "wi", + "wy" + ] + }, + "homeAddressStreet1": { + "maxLength": 100, + "minLength": 2, + "type": "string" + }, + "investigations": { + "type": "array", + "items": { + "required": [ + "compact", + "creationDate", + "dateOfUpdate", + "investigationId", + "jurisdiction", + "licenseType", + "providerId", + "submittingUser", + "type" + ], + "type": "object", + "properties": { + "licenseType": { + "type": "string" + }, + "investigationId": { + "type": "string" + }, + "compact": { + "type": "string", + "enum": [ + "aslp", + "octp", + "coun" + ] + }, + "providerId": { + "pattern": "[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab]{1}[0-9a-f]{3}-[0-9a-f]{12}", + "type": "string" + }, + "jurisdiction": { + "type": "string", + "enum": [ + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "pr", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "vi", + "wa", + "wv", + "wi", + "wy" + ] + }, + "submittingUser": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "investigation" + ] + }, + "creationDate": { + "type": "string", + "format": "date-time" + }, + "dateOfUpdate": { + "type": "string", + "format": "date-time" + } + } + } + }, + "type": { + "type": "string", + "enum": [ + "license-home" + ] + }, + "suffix": { + "maxLength": 100, + "type": "string" + }, + "dateOfIssuance": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "licenseType": { + "type": "string" + }, + "emailAddress": { + "type": "string", + "format": "email" + }, + "dateOfExpiration": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "homeAddressState": { + "maxLength": 100, + "minLength": 2, + "type": "string" + }, + "providerId": { + "pattern": "[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab]{1}[0-9a-f]{3}-[0-9a-f]{12}", + "type": "string" + }, + "dateOfRenewal": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "familyName": { + "maxLength": 100, + "type": "string" + }, + "homeAddressCity": { + "maxLength": 100, + "minLength": 2, + "type": "string" + }, + "licenseNumber": { + "maxLength": 100, + "type": "string" + }, + "investigationStatus": { + "type": "string", + "enum": [ + "underInvestigation" + ] + }, + "npi": { + "pattern": "^[0-9]{10}$", + "type": "string" + }, + "homeAddressPostalCode": { + "maxLength": 7, + "minLength": 5, + "type": "string" + }, + "compactEligibility": { + "type": "string", + "enum": [ + "eligible", + "ineligible" + ] + }, + "givenName": { + "maxLength": 100, + "type": "string" + }, + "jurisdictionUploadedCompactEligibility": { + "type": "string", + "enum": [ + "eligible", + "ineligible" + ] + }, + "jurisdictionUploadedLicenseStatus": { + "type": "string", + "enum": [ + "active", + "inactive" + ] + }, + "phoneNumber": { + "pattern": "^\\+[0-9]{8,15}$", + "type": "string" + }, + "licenseStatus": { + "type": "string", + "enum": [ + "active", + "inactive" + ] + }, + "licenseStatusName": { + "maxLength": 100, + "type": "string" + }, + "middleName": { + "maxLength": 100, + "type": "string" + }, + "adverseActions": { + "type": "array", + "items": { + "required": [ + "actionAgainst", + "adverseActionId", + "compact", + "creationDate", + "dateOfUpdate", + "effectiveStartDate", + "encumbranceType", + "jurisdiction", + "licenseType", + "licenseTypeAbbreviation", + "providerId", + "submittingUser", + "type" + ], + "type": "object", + "properties": { + "clinicalPrivilegeActionCategories": { + "type": "array", + "items": { + "type": "string" + } + }, + "compact": { + "type": "string", + "enum": [ + "aslp", + "octp", + "coun" + ] + }, + "jurisdiction": { + "type": "string", + "enum": [ + "al", + "ak", + "az", + "ar", + "ca", + "co", + "ct", + "de", + "dc", + "fl", + "ga", + "hi", + "id", + "il", + "in", + "ia", + "ks", + "ky", + "la", + "me", + "md", + "ma", + "mi", + "mn", + "ms", + "mo", + "mt", + "ne", + "nv", + "nh", + "nj", + "nm", + "ny", + "nc", + "nd", + "oh", + "ok", + "or", + "pa", + "pr", + "ri", + "sc", + "sd", + "tn", + "tx", + "ut", + "vt", + "va", + "vi", + "wa", + "wv", + "wi", + "wy" + ] + }, + "licenseTypeAbbreviation": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "adverseAction" + ] + }, + "creationDate": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "actionAgainst": { + "type": "string", + "enum": [ + "license", + "privilege" + ] + }, + "licenseType": { + "type": "string" + }, + "providerId": { + "pattern": "[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab]{1}[0-9a-f]{3}-[0-9a-f]{12}", + "type": "string" + }, + "submittingUser": { + "type": "string" + }, + "effectiveStartDate": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "adverseActionId": { + "type": "string" + }, + "effectiveLiftDate": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "encumbranceType": { + "type": "string" + }, + "liftingUser": { + "type": "string" + }, + "dateOfUpdate": { + "type": "string", + "format": "date-time" + } + } + } + }, + "dateOfUpdate": { + "type": "string", + "format": "date-time" + } + } + } + }, + "dateOfExpiration": { + "type": "string", + "format": "date" + }, + "militaryAffiliations": { + "type": "array", + "items": { + "required": [ + "affiliationType", + "compact", + "dateOfUpdate", + "dateOfUpload", + "fileNames", + "providerId", + "status", + "type" + ], + "type": "object", + "properties": { + "dateOfUpload": { + "pattern": "^[12]{1}[0-9]{3}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "type": "string", + "format": "date" + }, + "compact": { + "type": "string", + "enum": [ + "aslp", + "octp", + "coun" + ] + }, + "providerId": { + "pattern": "[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab]{1}[0-9a-f]{3}-[0-9a-f]{12}", + "type": "string" + }, + "affiliationType": { + "type": "string", + "enum": [ + "militaryMember", + "militaryMemberSpouse" + ] + }, + "type": { + "type": "string", + "enum": [ + "militaryAffiliation" + ] + }, + "dateOfUpdate": { + "type": "string", + "format": "date-time" + }, + "fileNames": { + "type": "array", + "items": { + "type": "string" + } + }, + "status": { + "type": "string", + "enum": [ + "active", + "inactive" + ] + } + } + } + }, + "providerId": { + "pattern": "[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab]{1}[0-9a-f]{3}-[0-9a-f]{12}", + "type": "string" + }, + "licenseStatus": { + "type": "string", + "enum": [ + "active", + "inactive" + ] + }, + "familyName": { + "maxLength": 100, + "type": "string" + }, + "middleName": { + "maxLength": 100, + "type": "string" + }, + "birthMonthDay": { + "pattern": "^[0-1]{1}[0-9]{1}-[0-3]{1}[0-9]{1}", + "type": "string" + }, + "compactConnectRegisteredEmailAddress": { + "type": "string", + "format": "email" + }, + "dateOfUpdate": { + "type": "string", + "format": "date-time" + } + } + } + } + } + }, + "SandboSearctZ4sfzliddmr": { + "required": [ + "query" + ], + "type": "object", + "properties": { + "search_after": { + "type": "array", + "description": "Sort values from the last hit of the previous page for cursor-based pagination" + }, + "size": { + "maximum": 100, + "minimum": 1, + "type": "integer", + "description": "Number of results to return" + }, + "query": { + "type": "object", + "description": "The OpenSearch query body" + }, + "from": { + "minimum": 0, + "type": "integer", + "description": "Starting document offset for pagination" + }, + "sort": { + "type": "array", + "description": "Sort order for results (required for search_after pagination)", + "items": { + "type": "object" + } + } + }, + "additionalProperties": false + } + } + }, + "x-amazon-apigateway-security-policy": "TLS_1_0" +} diff --git a/backend/compact-connect/docs/search-internal/api-specification/swagger.html b/backend/compact-connect/docs/search-internal/api-specification/swagger.html new file mode 100644 index 000000000..44396776c --- /dev/null +++ b/backend/compact-connect/docs/search-internal/api-specification/swagger.html @@ -0,0 +1,22 @@ + + + + + + + SwaggerUI + + + +
+ + + + diff --git a/backend/compact-connect/lambdas/nodejs/package.json b/backend/compact-connect/lambdas/nodejs/package.json index 7276357dc..a4e7e7fa4 100644 --- a/backend/compact-connect/lambdas/nodejs/package.json +++ b/backend/compact-connect/lambdas/nodejs/package.json @@ -46,7 +46,7 @@ "@aws-sdk/client-sesv2": "^3.901.0", "@aws-sdk/util-dynamodb": "^3.901.0", "@jusdino-ia/email-builder": "^0.0.9-alpha.3", - "nodemailer": "^7.0.7", + "nodemailer": "^7.0.11", "zod": "^3.23.8" } } diff --git a/backend/compact-connect/lambdas/nodejs/yarn.lock b/backend/compact-connect/lambdas/nodejs/yarn.lock index 998a714d5..9121698ca 100644 --- a/backend/compact-connect/lambdas/nodejs/yarn.lock +++ b/backend/compact-connect/lambdas/nodejs/yarn.lock @@ -4510,10 +4510,10 @@ node-releases@^2.0.18: resolved "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz" integrity sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g== -nodemailer@^7.0.7: - version "7.0.9" - resolved "https://registry.yarnpkg.com/nodemailer/-/nodemailer-7.0.9.tgz#fe5abd4173e08e01aa243c7cddd612ad8c6ccc18" - integrity sha512-9/Qm0qXIByEP8lEV2qOqcAW7bRpL8CR9jcTwk3NBnHJNmP9fIJ86g2fgmIXqHY+nj55ZEMwWqYAT2QTDpRUYiQ== +nodemailer@^7.0.11: + version "7.0.11" + resolved "https://registry.yarnpkg.com/nodemailer/-/nodemailer-7.0.11.tgz#5f7b06afaec20073cff36bea92d1c7395cc3e512" + integrity sha512-gnXhNRE0FNhD7wPSCGhdNh46Hs6nm+uTyg+Kq0cZukNQiYdnCsoQjodNP9BQVG9XrcK/v6/MgpAPBUFyzh9pvw== normalize-path@^3.0.0, normalize-path@~3.0.0: version "3.0.0" diff --git a/backend/compact-connect/lambdas/python/cognito-backup/requirements-dev.txt b/backend/compact-connect/lambdas/python/cognito-backup/requirements-dev.txt index f94e5cec9..184c65492 100644 --- a/backend/compact-connect/lambdas/python/cognito-backup/requirements-dev.txt +++ b/backend/compact-connect/lambdas/python/cognito-backup/requirements-dev.txt @@ -6,11 +6,11 @@ # aws-lambda-powertools==3.23.0 # via -r lambdas/python/cognito-backup/requirements-dev.in -boto3==1.41.0 +boto3==1.42.11 # via # -r lambdas/python/cognito-backup/requirements-dev.in # moto -botocore==1.41.0 +botocore==1.42.11 # via # -r lambdas/python/cognito-backup/requirements-dev.in # boto3 @@ -37,13 +37,13 @@ jmespath==1.0.1 # aws-lambda-powertools # boto3 # botocore -joserfc==1.4.3 +joserfc==1.6.0 # via moto markupsafe==3.0.3 # via # jinja2 # werkzeug -moto[cognitoidp,s3]==5.1.17 +moto[cognitoidp,s3]==5.1.18 # via -r lambdas/python/cognito-backup/requirements-dev.in packaging==25.0 # via pytest @@ -55,7 +55,7 @@ pycparser==2.23 # via cffi pygments==2.19.2 # via pytest -pytest==9.0.1 +pytest==9.0.2 # via -r lambdas/python/cognito-backup/requirements-dev.in python-dateutil==2.9.0.post0 # via @@ -71,18 +71,18 @@ requests==2.32.5 # responses responses==0.25.8 # via moto -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil typing-extensions==4.15.0 # via aws-lambda-powertools -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto xmltodict==1.0.2 # via moto diff --git a/backend/compact-connect/lambdas/python/cognito-backup/requirements.txt b/backend/compact-connect/lambdas/python/cognito-backup/requirements.txt index a58094bd4..b9089934d 100644 --- a/backend/compact-connect/lambdas/python/cognito-backup/requirements.txt +++ b/backend/compact-connect/lambdas/python/cognito-backup/requirements.txt @@ -6,9 +6,9 @@ # aws-lambda-powertools==3.23.0 # via -r lambdas/python/cognito-backup/requirements.in -boto3==1.41.0 +boto3==1.42.11 # via -r lambdas/python/cognito-backup/requirements.in -botocore==1.41.0 +botocore==1.42.11 # via # -r lambdas/python/cognito-backup/requirements.in # boto3 @@ -20,11 +20,11 @@ jmespath==1.0.1 # botocore python-dateutil==2.9.0.post0 # via botocore -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil typing-extensions==4.15.0 # via aws-lambda-powertools -urllib3==2.5.0 +urllib3==2.6.2 # via botocore diff --git a/backend/compact-connect/lambdas/python/common/cc_common/config.py b/backend/compact-connect/lambdas/python/common/cc_common/config.py index 79fd69b75..e952271fa 100644 --- a/backend/compact-connect/lambdas/python/common/cc_common/config.py +++ b/backend/compact-connect/lambdas/python/common/cc_common/config.py @@ -22,6 +22,20 @@ class _Config: presigned_post_ttl_seconds = 3600 default_page_size = 100 + @property + def environment_region(self): + """ + Returns the region name of the region the lambda is running in. + """ + return os.environ['AWS_REGION'] + + @property + def opensearch_host_endpoint(self): + """ + Returns the OpenSearch host endpoint for the domain. + """ + return os.environ['OPENSEARCH_HOST_ENDPOINT'] + @cached_property def cognito_client(self): return boto3.client('cognito-idp') @@ -274,6 +288,10 @@ def transaction_client(self): def transaction_reports_bucket_name(self): return os.environ['TRANSACTION_REPORTS_BUCKET_NAME'] + @property + def export_results_bucket_name(self): + return os.environ['EXPORT_RESULTS_BUCKET_NAME'] + @property def transaction_history_table_name(self): return os.environ['TRANSACTION_HISTORY_TABLE_NAME'] diff --git a/backend/compact-connect/lambdas/python/common/cc_common/data_model/schema/provider/api.py b/backend/compact-connect/lambdas/python/common/cc_common/data_model/schema/provider/api.py index 40deefa06..36d464caa 100644 --- a/backend/compact-connect/lambdas/python/common/cc_common/data_model/schema/provider/api.py +++ b/backend/compact-connect/lambdas/python/common/cc_common/data_model/schema/provider/api.py @@ -3,7 +3,7 @@ from marshmallow import ValidationError, validates_schema from marshmallow.fields import UUID, Date, DateTime, Email, Integer, List, Nested, Raw, String -from marshmallow.validate import Length, OneOf, Regexp +from marshmallow.validate import Length, OneOf, Range, Regexp from cc_common.data_model.schema.base_record import ForgivingSchema from cc_common.data_model.schema.common import CCRequestSchema @@ -31,6 +31,37 @@ PrivilegeReadPrivateResponseSchema, ) +# Keys that indicate cross-index query attempts in OpenSearch DSL +# These are used by terms lookup, more_like_this, and other queries to reference external indices +_CROSS_INDEX_KEYS = frozenset({'index', '_index'}) + + +def _validate_no_cross_index_keys(obj, path: str = 'query') -> None: + """ + Recursively validate that an object does not contain cross-index lookup keys. + + This function traverses the query structure looking for keys that would indicate + an attempt to access data from other indices: + - 'index': Used in terms lookup queries to specify an external index + - '_index': Used in more_like_this queries to reference documents from other indices + + These keys should never appear in legitimate single-index queries against the + provider search index. + + :param obj: The object to validate (dict, list, or scalar) + :param path: The current path in the object for error messages + :raises ValidationError: If a cross-index key is found + """ + if isinstance(obj, dict): + for key, value in obj.items(): + if key in _CROSS_INDEX_KEYS: + raise ValidationError(f"Cross-index queries are not allowed. Found '{key}' at {path}.{key}") + _validate_no_cross_index_keys(value, path=f'{path}.{key}') + elif isinstance(obj, list): + for i, item in enumerate(obj): + _validate_no_cross_index_keys(item, path=f'{path}[{i}]') + # Scalar values (str, int, bool, None) are safe - we only check keys + class ProviderSSNResponseSchema(ForgivingSchema): """ @@ -131,7 +162,6 @@ class ProviderGeneralResponseSchema(ForgivingSchema): familyName = String(required=True, allow_none=False, validate=Length(1, 100)) suffix = String(required=False, allow_none=False, validate=Length(1, 100)) # This date is determined by the license records uploaded by a state - # they do not include a timestamp, so we use the Date field type dateOfExpiration = Raw(required=True, allow_none=False) compactConnectRegisteredEmailAddress = Email(required=False, allow_none=False) @@ -449,3 +479,75 @@ class StateProviderDetailGeneralResponseSchema(ForgivingSchema): privileges = List(Nested(StatePrivilegeGeneralResponseSchema, required=True, allow_none=False)) providerUIUrl = String(required=True, allow_none=False) + + +class SearchProvidersRequestSchema(CCRequestSchema): + """ + Schema for advanced search providers requests. + + This schema is used to validate incoming requests to the advanced search providers API endpoint. + It accepts an OpenSearch DSL query body for flexible querying of the provider index. + + The request body closely mirrors OpenSearch DSL for pagination using `search_after`. + See: https://docs.opensearch.org/latest/search-plugins/searching-data/paginate/#the-search_after-parameter + + Serialization direction: + API -> load() -> Python + """ + + # The OpenSearch query body - we use Raw to allow the full flexibility of OpenSearch queries + query = Raw(required=True, allow_none=False) + + # Pagination parameters following OpenSearch DSL + # 'from' is a reserved word in Python, so we use 'from_' with data_key='from' + from_ = Integer(required=False, allow_none=False, data_key='from', validate=Range(min=0, max=9900)) + size = Integer(required=False, allow_none=False, validate=Range(min=1, max=100)) + + # Sort order - required when using search_after pagination + # Example: [{"providerId": "asc"}, {"dateOfUpdate": "desc"}] + sort = Raw(required=False, allow_none=False) + + # The search_after parameter for cursor-based pagination + # This should be the 'sort' values from the last hit of the previous page + # Example: ["provider-uuid-123", "2024-01-15T10:30:00Z"] + search_after = Raw(required=False, allow_none=False) + + @validates_schema + def validate_no_cross_index_queries(self, data, **kwargs): + """ + Validate that the query does not contain cross-index lookup attempts. + + This is a defense-in-depth security measure to prevent queries that attempt to access + data from other compact indices. The primary protection is the OpenSearch domain setting + `rest.action.multi.allow_explicit_index: false`, but this validation provides an + additional application-layer check. + + Dangerous patterns blocked: + - Terms lookup with external index: {"terms": {"field": {"index": "other_index", ...}}} + - More like this with external docs: {"more_like_this": {"like": [{"_index": "other_index"}]}} + """ + _validate_no_cross_index_keys(data.get('query', {})) + + +class ExportPrivilegesRequestSchema(CCRequestSchema): + """ + Schema for Exporting list of privileges into CSV file. + + This schema is used to validate incoming requests to the advanced search providers API endpoint. + It accepts an OpenSearch DSL query body for flexible querying of the provider index. + + Serialization direction: + API -> load() -> Python + """ + + # The OpenSearch query body - we use Raw to allow the full flexibility of OpenSearch queries + query = Raw(required=True, allow_none=False) + + @validates_schema + def validate_no_cross_index_queries(self, data, **kwargs): + """ + Validate that the query does not contain cross-index lookup attempts. + + This is a defense-in-depth security measure. See SearchProvidersRequestSchema for details. + """ + _validate_no_cross_index_keys(data.get('query', {})) diff --git a/backend/compact-connect/lambdas/python/common/cc_common/utils.py b/backend/compact-connect/lambdas/python/common/cc_common/utils.py index df351444e..8c6771ced 100644 --- a/backend/compact-connect/lambdas/python/common/cc_common/utils.py +++ b/backend/compact-connect/lambdas/python/common/cc_common/utils.py @@ -468,6 +468,54 @@ def process_messages(event, context: LambdaContext): # noqa: ARG001 unused-argu return process_messages +def sqs_batch_handler(fn: Callable): + """Process a batch of messages from an SQS queue, passing all messages to the handler at once. + + This handler is similar to sqs_handler but passes ALL messages to the decorated function + at once, allowing for batch processing, deduplication, and bulk operations. The decorated + function is responsible for returning the batchItemFailures response directly. + + This handler uses batch item failure reporting: + https://docs.aws.amazon.com/lambda/latest/dg/example_serverless_SQS_Lambda_batch_item_failures_section.html + + The decorated function receives a list of records, where each record contains: + - 'messageId': The SQS message ID (used for batch item failure reporting) + - 'body': The parsed JSON body of the SQS message + + The decorated function must return: {'batchItemFailures': [{'itemIdentifier': messageId}, ...]} + """ + + @wraps(fn) + @metrics.log_metrics + @logger.inject_lambda_context + def process_messages(event, context: LambdaContext): # noqa: ARG001 unused-argument + sqs_records = event.get('Records', []) + logger.info('Starting batch processing', batch_count=len(sqs_records)) + + if not sqs_records: + logger.info('No records to process') + return {'batchItemFailures': []} + + # Parse all SQS message bodies and create records with messageId for failure tracking + records = [] + for sqs_record in sqs_records: + message_id = sqs_record['messageId'] + try: + body = json.loads(sqs_record['body']) + records.append({'messageId': message_id, 'body': body}) + except json.JSONDecodeError as e: + # If we can't parse the message body, log error but don't fail the whole batch + logger.error('Failed to parse SQS message body', message_id=message_id, exc_info=e) + # We can't process this message, but we also shouldn't retry it since it's malformed + # So we don't add it to failures - it will be deleted from the queue + + # Call the decorated function with all parsed records + # The function is responsible for returning {'batchItemFailures': [...]} + return fn(records) + + return process_messages + + def sqs_handler_with_notification_tracking(fn: Callable): """ Process messages from SQS with notification tracking capabilities. diff --git a/backend/compact-connect/lambdas/python/common/requirements-dev.txt b/backend/compact-connect/lambdas/python/common/requirements-dev.txt index 4dbfbec7e..a59a86d41 100644 --- a/backend/compact-connect/lambdas/python/common/requirements-dev.txt +++ b/backend/compact-connect/lambdas/python/common/requirements-dev.txt @@ -12,31 +12,31 @@ attrs==25.4.0 # via # jsonschema # referencing -aws-sam-translator==1.102.0 +aws-sam-translator==1.105.0 # via cfn-lint aws-xray-sdk==2.15.0 # via moto -boto3==1.41.0 +boto3==1.42.11 # via # aws-sam-translator # moto -boto3-stubs[full]==1.40.76 +boto3-stubs[full]==1.42.11 # via -r lambdas/python/common/requirements-dev.in -boto3-stubs-full==1.40.76 +boto3-stubs-full==1.42.10 # via boto3-stubs -botocore==1.41.0 +botocore==1.42.11 # via # aws-xray-sdk # boto3 # moto # s3transfer -botocore-stubs==1.40.76 +botocore-stubs==1.42.11 # via boto3-stubs certifi==2025.11.12 # via requests cffi==2.0.0 # via cryptography -cfn-lint==1.41.0 +cfn-lint==1.43.0 # via moto charset-normalizer==3.4.4 # via requests @@ -59,7 +59,7 @@ jmespath==1.0.1 # via # boto3 # botocore -joserfc==1.4.3 +joserfc==1.6.0 # via moto jsonpatch==1.33 # via cfn-lint @@ -85,13 +85,13 @@ markupsafe==3.0.3 # via # jinja2 # werkzeug -moto[all]==5.1.17 +moto[all]==5.1.18 # via -r lambdas/python/common/requirements-dev.in mpmath==1.3.0 # via sympy multipart==1.3.0 # via moto -networkx==3.5 +networkx==3.6.1 # via cfn-lint openapi-schema-validator==0.6.3 # via openapi-spec-validator @@ -105,7 +105,7 @@ py-partiql-parser==0.6.3 # via moto pycparser==2.23 # via cffi -pydantic==2.12.4 +pydantic==2.12.5 # via aws-sam-translator pydantic-core==2.41.5 # via pydantic @@ -138,11 +138,11 @@ responses==0.25.8 # via moto rfc3339-validator==0.1.4 # via openapi-schema-validator -rpds-py==0.29.0 +rpds-py==0.30.0 # via # jsonschema # referencing -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via @@ -150,9 +150,9 @@ six==1.17.0 # rfc3339-validator sympy==1.14.0 # via cfn-lint -types-awscrt==0.28.4 +types-awscrt==0.30.0 # via botocore-stubs -types-s3transfer==0.14.0 +types-s3transfer==0.16.0 # via boto3-stubs typing-extensions==4.15.0 # via @@ -163,15 +163,15 @@ typing-extensions==4.15.0 # typing-inspection typing-inspection==0.4.2 # via pydantic -tzdata==2025.2 +tzdata==2025.3 # via faker -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # docker # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto wrapt==2.0.1 # via aws-xray-sdk diff --git a/backend/compact-connect/lambdas/python/common/requirements.txt b/backend/compact-connect/lambdas/python/common/requirements.txt index 6f49c3833..632ab69b1 100644 --- a/backend/compact-connect/lambdas/python/common/requirements.txt +++ b/backend/compact-connect/lambdas/python/common/requirements.txt @@ -10,9 +10,9 @@ argon2-cffi-bindings==25.1.0 # via argon2-cffi aws-lambda-powertools==3.23.0 # via -r lambdas/python/common/requirements.in -boto3==1.41.0 +boto3==1.42.11 # via -r lambdas/python/common/requirements.in -botocore==1.41.0 +botocore==1.42.11 # via # boto3 # s3transfer @@ -43,13 +43,13 @@ python-dateutil==2.9.0.post0 # via botocore requests==2.32.5 # via -r lambdas/python/common/requirements.in -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil typing-extensions==4.15.0 # via aws-lambda-powertools -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # requests diff --git a/backend/compact-connect/lambdas/python/compact-configuration/requirements-dev.txt b/backend/compact-connect/lambdas/python/compact-configuration/requirements-dev.txt index 043ab6de2..1f5e19463 100644 --- a/backend/compact-connect/lambdas/python/compact-configuration/requirements-dev.txt +++ b/backend/compact-connect/lambdas/python/compact-configuration/requirements-dev.txt @@ -4,9 +4,9 @@ # # pip-compile --no-emit-index-url --no-strip-extras lambdas/python/compact-configuration/requirements-dev.in # -boto3==1.41.0 +boto3==1.42.11 # via moto -botocore==1.41.0 +botocore==1.42.11 # via # boto3 # moto @@ -33,7 +33,7 @@ markupsafe==3.0.3 # via # jinja2 # werkzeug -moto[dynamodb,s3]==5.1.17 +moto[dynamodb,s3]==5.1.18 # via -r lambdas/python/compact-configuration/requirements-dev.in py-partiql-parser==0.6.3 # via moto @@ -54,17 +54,17 @@ requests==2.32.5 # responses responses==0.25.8 # via moto -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # docker # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto xmltodict==1.0.2 # via moto diff --git a/backend/compact-connect/lambdas/python/custom-resources/requirements-dev.txt b/backend/compact-connect/lambdas/python/custom-resources/requirements-dev.txt index db56d974e..a751a4574 100644 --- a/backend/compact-connect/lambdas/python/custom-resources/requirements-dev.txt +++ b/backend/compact-connect/lambdas/python/custom-resources/requirements-dev.txt @@ -4,9 +4,9 @@ # # pip-compile --no-emit-index-url --no-strip-extras lambdas/python/custom-resources/requirements-dev.in # -boto3==1.41.0 +boto3==1.42.11 # via moto -botocore==1.41.0 +botocore==1.42.11 # via # boto3 # moto @@ -33,7 +33,7 @@ markupsafe==3.0.3 # via # jinja2 # werkzeug -moto[dynamodb,s3]==5.1.17 +moto[dynamodb,s3]==5.1.18 # via -r lambdas/python/custom-resources/requirements-dev.in py-partiql-parser==0.6.3 # via moto @@ -54,17 +54,17 @@ requests==2.32.5 # responses responses==0.25.8 # via moto -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # docker # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto xmltodict==1.0.2 # via moto diff --git a/backend/compact-connect/lambdas/python/data-events/requirements-dev.txt b/backend/compact-connect/lambdas/python/data-events/requirements-dev.txt index 22a836bd7..a31fe9b7a 100644 --- a/backend/compact-connect/lambdas/python/data-events/requirements-dev.txt +++ b/backend/compact-connect/lambdas/python/data-events/requirements-dev.txt @@ -4,9 +4,9 @@ # # pip-compile --no-emit-index-url --no-strip-extras lambdas/python/data-events/requirements-dev.in # -boto3==1.41.0 +boto3==1.42.11 # via moto -botocore==1.41.0 +botocore==1.42.11 # via # boto3 # moto @@ -33,7 +33,7 @@ markupsafe==3.0.3 # via # jinja2 # werkzeug -moto[dynamodb,s3]==5.1.17 +moto[dynamodb,s3]==5.1.18 # via -r lambdas/python/data-events/requirements-dev.in py-partiql-parser==0.6.3 # via moto @@ -54,17 +54,17 @@ requests==2.32.5 # responses responses==0.25.8 # via moto -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # docker # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto xmltodict==1.0.2 # via moto diff --git a/backend/compact-connect/lambdas/python/data-events/tests/function/test_encumbrance_events.py b/backend/compact-connect/lambdas/python/data-events/tests/function/test_encumbrance_events.py index 48250af94..7600f8011 100644 --- a/backend/compact-connect/lambdas/python/data-events/tests/function/test_encumbrance_events.py +++ b/backend/compact-connect/lambdas/python/data-events/tests/function/test_encumbrance_events.py @@ -3074,9 +3074,7 @@ def test_license_encumbrance_listener_does_not_create_duplicate_update_records_f # Verify STILL only one update record exists (no duplicate created) update_records_after_retry = ( - self.test_data_generator.query_privilege_update_records_for_given_record_from_database( - privilege - ) + self.test_data_generator.query_privilege_update_records_for_given_record_from_database(privilege) ) matching_updates_after_retry = [ update @@ -3147,9 +3145,7 @@ def test_license_encumbrance_listener_does_not_create_duplicate_update_records_f # Verify STILL only one update record exists (no duplicate created) update_records_after_retry = ( - self.test_data_generator.query_privilege_update_records_for_given_record_from_database( - privilege - ) + self.test_data_generator.query_privilege_update_records_for_given_record_from_database(privilege) ) matching_updates_after_retry = [ update @@ -3241,9 +3237,7 @@ def test_license_encumbrance_lifted_listener_does_not_create_duplicate_update_re # license_encumbrance_lifted_listener will skip creating privilege updates because it only # does so on LICENSE_ENCUMBERED privileges and none of those would remain update_records_after_retry = ( - self.test_data_generator.query_privilege_update_records_for_given_record_from_database( - privilege - ) + self.test_data_generator.query_privilege_update_records_for_given_record_from_database(privilege) ) matching_updates_after_retry = [ update for update in update_records_after_retry if update.updateType == UpdateCategory.LIFTING_ENCUMBRANCE @@ -3369,4 +3363,3 @@ def test_license_encumbrance_notification_listener_creates_notification_events_t self.assertEqual(expected_sks, list(notification_records.keys())) for sk in expected_sks: self.assertEqual(NotificationStatus.SUCCESS, notification_records.get(sk).get('status')) - diff --git a/backend/compact-connect/lambdas/python/disaster-recovery/requirements-dev.txt b/backend/compact-connect/lambdas/python/disaster-recovery/requirements-dev.txt index 44d9a0022..834cf4a1c 100644 --- a/backend/compact-connect/lambdas/python/disaster-recovery/requirements-dev.txt +++ b/backend/compact-connect/lambdas/python/disaster-recovery/requirements-dev.txt @@ -4,9 +4,9 @@ # # pip-compile --no-emit-index-url --no-strip-extras lambdas/python/disaster-recovery/requirements-dev.in # -boto3==1.41.0 +boto3==1.42.11 # via moto -botocore==1.41.0 +botocore==1.42.11 # via # boto3 # moto @@ -33,7 +33,7 @@ markupsafe==3.0.3 # via # jinja2 # werkzeug -moto[dynamodb,s3]==5.1.17 +moto[dynamodb,s3]==5.1.18 # via -r lambdas/python/disaster-recovery/requirements-dev.in py-partiql-parser==0.6.3 # via moto @@ -54,17 +54,17 @@ requests==2.32.5 # responses responses==0.25.8 # via moto -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # docker # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto xmltodict==1.0.2 # via moto diff --git a/backend/compact-connect/lambdas/python/provider-data-v1/handlers/licenses.py b/backend/compact-connect/lambdas/python/provider-data-v1/handlers/licenses.py index 18ef1af65..59d8bd64f 100644 --- a/backend/compact-connect/lambdas/python/provider-data-v1/handlers/licenses.py +++ b/backend/compact-connect/lambdas/python/provider-data-v1/handlers/licenses.py @@ -75,6 +75,7 @@ def post_licenses(event: dict, context: LambdaContext): # noqa: ARG001 unused-a # verify that none of the SSN+LicenseType combinations are repeats within the same batch license_keys = [(license_record['ssn'], license_record['licenseType']) for license_record in licenses] if len(set(license_keys)) < len(license_keys): + logger.info('Duplicate SSNs detected in same request.', compact=compact, jurisdiction=jurisdiction) raise CCInvalidRequestCustomResponseException( response_body={ 'message': 'Invalid license records in request. See errors for more detail.', diff --git a/backend/compact-connect/lambdas/python/provider-data-v1/requirements-dev.txt b/backend/compact-connect/lambdas/python/provider-data-v1/requirements-dev.txt index 35825ad72..178f7bf50 100644 --- a/backend/compact-connect/lambdas/python/provider-data-v1/requirements-dev.txt +++ b/backend/compact-connect/lambdas/python/provider-data-v1/requirements-dev.txt @@ -4,9 +4,9 @@ # # pip-compile --no-emit-index-url --no-strip-extras lambdas/python/provider-data-v1/requirements-dev.in # -boto3==1.41.0 +boto3==1.42.11 # via moto -botocore==1.41.0 +botocore==1.42.11 # via # boto3 # moto @@ -35,7 +35,7 @@ markupsafe==3.0.3 # via # jinja2 # werkzeug -moto[dynamodb,s3]==5.1.17 +moto[dynamodb,s3]==5.1.18 # via -r lambdas/python/provider-data-v1/requirements-dev.in py-partiql-parser==0.6.3 # via moto @@ -56,19 +56,19 @@ requests==2.32.5 # responses responses==0.25.8 # via moto -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil -tzdata==2025.2 +tzdata==2025.3 # via faker -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # docker # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto xmltodict==1.0.2 # via moto diff --git a/backend/compact-connect/lambdas/python/provider-data-v1/tests/__init__.py b/backend/compact-connect/lambdas/python/provider-data-v1/tests/__init__.py index a6e1f1cc9..08a9c0b79 100644 --- a/backend/compact-connect/lambdas/python/provider-data-v1/tests/__init__.py +++ b/backend/compact-connect/lambdas/python/provider-data-v1/tests/__init__.py @@ -97,6 +97,11 @@ def setUpClass(cls): {'name': 'audiologist', 'abbreviation': 'aud'}, {'name': 'speech-language pathologist', 'abbreviation': 'slp'}, ], + 'octp': [ + {'name': 'occupational therapist', 'abbreviation': 'ot'}, + {'name': 'occupational therapy assistant', 'abbreviation': 'ota'}, + ], + 'coun': [{'name': 'licensed professional counselor', 'abbreviation': 'lpc'}], }, ), }, diff --git a/backend/compact-connect/lambdas/python/purchases/requirements-dev.in b/backend/compact-connect/lambdas/python/purchases/requirements-dev.in index 11892d0b0..c4c8851a6 100644 --- a/backend/compact-connect/lambdas/python/purchases/requirements-dev.in +++ b/backend/compact-connect/lambdas/python/purchases/requirements-dev.in @@ -14,3 +14,4 @@ boto3>=1.34.33, <2 cryptography>=46, <47 marshmallow>=3.21.3, <4.0.0 requests>=2.31.0, <3.0.0 +urllib3>=2.6.0, <3 diff --git a/backend/compact-connect/lambdas/python/purchases/requirements-dev.txt b/backend/compact-connect/lambdas/python/purchases/requirements-dev.txt index 7c95681ce..d74008ce4 100644 --- a/backend/compact-connect/lambdas/python/purchases/requirements-dev.txt +++ b/backend/compact-connect/lambdas/python/purchases/requirements-dev.txt @@ -2,21 +2,21 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile requirements-dev.in +# pip-compile --no-emit-index-url --no-strip-extras lambdas/python/purchases/requirements-dev.in # argon2-cffi==25.1.0 - # via -r requirements-dev.in + # via -r lambdas/python/purchases/requirements-dev.in argon2-cffi-bindings==25.1.0 # via argon2-cffi aws-lambda-powertools==3.23.0 - # via -r requirements-dev.in + # via -r lambdas/python/purchases/requirements-dev.in boolean-py==5.0 # via license-expression -boto3==1.40.76 +boto3==1.42.11 # via - # -r requirements-dev.in + # -r lambdas/python/purchases/requirements-dev.in # moto -botocore==1.40.76 +botocore==1.42.11 # via # boto3 # moto @@ -37,23 +37,23 @@ charset-normalizer==3.4.4 # via requests click==8.3.1 # via pip-tools -coverage[toml]==7.12.0 +coverage[toml]==7.13.0 # via - # -r requirements-dev.in + # -r lambdas/python/purchases/requirements-dev.in # pytest-cov cryptography==46.0.3 # via - # -r requirements-dev.in + # -r lambdas/python/purchases/requirements-dev.in # moto -cyclonedx-python-lib==9.1.0 +cyclonedx-python-lib==11.6.0 # via pip-audit defusedxml==0.7.1 # via py-serializable docker==7.1.0 # via moto faker==37.12.0 - # via -r requirements-dev.in -filelock==3.20.0 + # via -r lambdas/python/purchases/requirements-dev.in +filelock==3.20.1 # via cachecontrol idna==3.11 # via requests @@ -74,15 +74,15 @@ markupsafe==3.0.3 # via # jinja2 # werkzeug -marshmallow==3.26.1 - # via -r requirements-dev.in +marshmallow==3.26.2 + # via -r lambdas/python/purchases/requirements-dev.in mdurl==0.1.2 # via markdown-it-py -moto[dynamodb,s3]==5.1.17 - # via -r requirements-dev.in +moto[dynamodb,s3]==5.1.18 + # via -r lambdas/python/purchases/requirements-dev.in msgpack==1.1.2 # via cachecontrol -packageurl-python==0.17.5 +packageurl-python==0.17.6 # via cyclonedx-python-lib packaging==25.0 # via @@ -93,13 +93,13 @@ packaging==25.0 # pytest pip-api==0.0.34 # via pip-audit -pip-audit==2.9.0 - # via -r requirements-dev.in +pip-audit==2.10.0 + # via -r lambdas/python/purchases/requirements-dev.in pip-requirements-parser==32.0.1 # via pip-audit pip-tools==7.5.2 - # via -r requirements-dev.in -platformdirs==4.5.0 + # via -r lambdas/python/purchases/requirements-dev.in +platformdirs==4.5.1 # via pip-audit pluggy==1.6.0 # via @@ -121,12 +121,12 @@ pyproject-hooks==1.2.0 # via # build # pip-tools -pytest==9.0.1 +pytest==9.0.2 # via - # -r requirements-dev.in + # -r lambdas/python/purchases/requirements-dev.in # pytest-cov pytest-cov==7.0.0 - # via -r requirements-dev.in + # via -r lambdas/python/purchases/requirements-dev.in python-dateutil==2.9.0.post0 # via # botocore @@ -137,7 +137,7 @@ pyyaml==6.0.3 # responses requests==2.32.5 # via - # -r requirements-dev.in + # -r lambdas/python/purchases/requirements-dev.in # cachecontrol # docker # moto @@ -147,27 +147,31 @@ responses==0.25.8 # via moto rich==14.2.0 # via pip-audit -ruff==0.14.5 - # via -r requirements-dev.in -s3transfer==0.14.0 +ruff==0.14.9 + # via -r lambdas/python/purchases/requirements-dev.in +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil sortedcontainers==2.4.0 # via cyclonedx-python-lib -toml==0.10.2 +tomli==2.3.0 + # via pip-audit +tomli-w==1.2.0 # via pip-audit typing-extensions==4.15.0 - # via aws-lambda-powertools -tzdata==2025.2 + # via + # aws-lambda-powertools + # cyclonedx-python-lib +tzdata==2025.3 # via faker -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # docker # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto wheel==0.45.1 # via pip-tools diff --git a/backend/compact-connect/lambdas/python/purchases/requirements.in b/backend/compact-connect/lambdas/python/purchases/requirements.in index e5e0becf4..3ddeb3993 100644 --- a/backend/compact-connect/lambdas/python/purchases/requirements.in +++ b/backend/compact-connect/lambdas/python/purchases/requirements.in @@ -1,2 +1,4 @@ # common requirements are managed in the common-python requirements.in file authorizenet>=1.1.6, <2 +# explicitly setting this transitive dependency to pick vulnerability patch +urllib3>=2.6.0, <3 diff --git a/backend/compact-connect/lambdas/python/purchases/requirements.txt b/backend/compact-connect/lambdas/python/purchases/requirements.txt index 6141bed67..4bbd97e5b 100644 --- a/backend/compact-connect/lambdas/python/purchases/requirements.txt +++ b/backend/compact-connect/lambdas/python/purchases/requirements.txt @@ -18,5 +18,5 @@ pyxb-x==1.2.6.3 # via authorizenet requests==2.32.5 # via authorizenet -urllib3==2.5.0 +urllib3==2.6.2 # via requests diff --git a/backend/compact-connect/lambdas/python/search/custom_resource_handler.py b/backend/compact-connect/lambdas/python/search/custom_resource_handler.py new file mode 100644 index 000000000..cb46deb96 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/custom_resource_handler.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +from abc import ABC, abstractmethod +from typing import TypedDict + +from aws_lambda_powertools.logging.lambda_context import build_lambda_context_model +from aws_lambda_powertools.utilities.typing import LambdaContext +from cc_common.config import logger + + +class CustomResourceResponse(TypedDict, total=False): + """Return body for the custom resource handler.""" + + PhysicalResourceId: str + Data: dict + NoEcho: bool + + +class CustomResourceHandler(ABC): + """Base class for custom resource migrations. + + This class provides a framework for implementing CloudFormation custom resources. + It handles the routing of CloudFormation events to appropriate methods and provides a consistent + logging pattern. + + Subclasses must implement the on_create, on_update, and on_delete methods. + + Instances of this class are callable and can be used directly as Lambda handlers. + """ + + def __init__(self, handler_name: str): + """Initialize the custom resource handler. + + :type handler_name: str + """ + self.handler_name = handler_name + + def __call__(self, event: dict, _context: LambdaContext) -> CustomResourceResponse | None: + return self._on_event(event, _context) + + def _on_event(self, event: dict, _context: LambdaContext) -> CustomResourceResponse | None: + """CloudFormation event handler using the CDK provider framework. + See: https://docs.aws.amazon.com/cdk/api/v2/python/aws_cdk.custom_resources/README.html + + This method routes the event to the appropriate handler method based on the request type. + + :param event: The lambda event with properties in ResourceProperties + :type event: dict + :param _context: Lambda context + :type _context: LambdaContext + :return: Optional result from the handler method + :rtype: Optional[CustomResourceResponse] + :raises ValueError: If the request type is not supported + """ + + # @logger.inject_lambda_context doesn't work on instance methods, so we'll build the context manually + lambda_context = build_lambda_context_model(_context) + logger.structure_logs(**lambda_context.__dict__) + + logger.info(f'{self.handler_name} handler started') + + properties = event.get('ResourceProperties', {}) + request_type = event['RequestType'] + + match request_type: + case 'Create': + try: + resp = self.on_create(properties) + except Exception as e: + logger.error(f'Error in {self.handler_name} creation', exc_info=e) + raise + case 'Update': + try: + resp = self.on_update(properties) + except Exception as e: + logger.error(f'Error in {self.handler_name} update', exc_info=e) + raise + case 'Delete': + try: + resp = self.on_delete(properties) + except Exception as e: + logger.error(f'Error in {self.handler_name} delete', exc_info=e) + raise + case _: + raise ValueError(f'Unexpected request type: {request_type}') + + logger.info(f'{self.handler_name} handler complete') + return resp + + @abstractmethod + def on_create(self, properties: dict) -> CustomResourceResponse | None: + """Handle Create events. + + This method should be implemented by subclasses to perform the migration when a resource is being created. + + :param properties: The ResourceProperties from the CloudFormation event + :type properties: dict + :return: Any result to be returned to CloudFormation + :rtype: Optional[CustomResourceResponse] + """ + + @abstractmethod + def on_update(self, properties: dict) -> CustomResourceResponse | None: + """Handle Update events. + + This method should be implemented by subclasses to perform the migration when a resource is being updated. + + :param properties: The ResourceProperties from the CloudFormation event + :type properties: dict + :return: Any result to be returned to CloudFormation + :rtype: Optional[CustomResourceResponse] + """ + + @abstractmethod + def on_delete(self, properties: dict) -> CustomResourceResponse | None: + """Handle Delete events. + + This method should be implemented by subclasses to handle deletion of the migration. In many cases, this can + be a no-op as the migration is temporary and deletion should have no effect. + + :param properties: The ResourceProperties from the CloudFormation event + :type properties: dict + :return: Any result to be returned to CloudFormation + :rtype: Optional[CustomResourceResponse] + """ diff --git a/backend/compact-connect/lambdas/python/search/handlers/__init__.py b/backend/compact-connect/lambdas/python/search/handlers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/compact-connect/lambdas/python/search/handlers/manage_opensearch_indices.py b/backend/compact-connect/lambdas/python/search/handlers/manage_opensearch_indices.py new file mode 100644 index 000000000..c6a56d9d0 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/handlers/manage_opensearch_indices.py @@ -0,0 +1,388 @@ +import time + +from cc_common.config import config, logger +from cc_common.exceptions import CCInternalException +from custom_resource_handler import CustomResourceHandler, CustomResourceResponse +from opensearch_client import OpenSearchClient + +# Initial index version for new deployments +INITIAL_INDEX_VERSION = 'v1' + +# Readiness check configuration +# OpenSearch domains may take time to become responsive after CloudFormation reports them as created. +DOMAIN_READINESS_CHECK_INTERVAL_SECONDS = 10 +DOMAIN_READINESS_MAX_ATTEMPTS = 30 # 30 attempts * 10 seconds = 5 minutes max wait + + +class OpenSearchIndexManager(CustomResourceHandler): + """ + Custom resource handler to create OpenSearch indices for compacts. + + Creates versioned indices (e.g., compact_aslp_providers_v1) with aliases + (e.g., compact_aslp_providers) to enable safe blue-green migrations for + future mapping changes. Queries use the alias, allowing the underlying + index to be swapped without application changes. + See https://docs.opensearch.org/latest/im-plugin/index-alias/ + """ + + def on_create(self, properties: dict) -> CustomResourceResponse | None: + """ + Create the versioned indices and aliases on creation. + """ + logger.info( + 'Starting OpenSearch index creation', + opensearch_host=config.opensearch_host_endpoint, + ) + + # Wait for domain to become responsive + client = self._wait_for_domain_ready() + + # Get index configuration from custom resource properties + number_of_shards = int(properties['numberOfShards']) + number_of_replicas = int(properties['numberOfReplicas']) + + logger.info( + 'Index configuration', + number_of_shards=number_of_shards, + number_of_replicas=number_of_replicas, + ) + + compacts = config.compacts + for compact in compacts: + # Create versioned index name (e.g., compact_aslp_providers_v1) + index_name = f'compact_{compact}_providers_{INITIAL_INDEX_VERSION}' + # Create alias name (e.g., compact_aslp_providers) + alias_name = f'compact_{compact}_providers' + self._create_provider_index_with_alias( + client=client, + index_name=index_name, + alias_name=alias_name, + number_of_shards=number_of_shards, + number_of_replicas=number_of_replicas, + ) + + def on_update(self, properties: dict) -> CustomResourceResponse | None: + """ + No-op on update. + """ + + def on_delete(self, _properties: dict) -> CustomResourceResponse | None: + """ + No-op on delete. + """ + + def _wait_for_domain_ready(self) -> OpenSearchClient: + """ + Wait for the OpenSearch domain to become responsive. + + Newly created OpenSearch domains may not be immediately responsive even after + CloudFormation reports them as created. This method attempts to create a client + and verify connectivity with retries before proceeding with index creation. + + :return: A connected OpenSearchClient instance + :raises CCInternalException: If the domain is not responsive after max attempts + """ + last_exception = None + + for attempt in range(1, DOMAIN_READINESS_MAX_ATTEMPTS + 1): + try: + logger.info( + 'Attempting to connect to OpenSearch domain', + attempt=attempt, + max_attempts=DOMAIN_READINESS_MAX_ATTEMPTS, + ) + client = OpenSearchClient() + # Perform a lightweight health check to verify connectivity + # This will use the client's internal retry logic + cluster_health = client.cluster_health() + logger.info( + 'Successfully connected to OpenSearch domain', + cluster_status=cluster_health.get('status'), + number_of_nodes=cluster_health.get('number_of_nodes'), + ) + return client + except CCInternalException as e: + # CCInternalException is raised by OpenSearchClient after its internal retries are exhausted + last_exception = e + if attempt < DOMAIN_READINESS_MAX_ATTEMPTS: + logger.warning( + 'Domain not yet responsive, waiting before retry', + attempt=attempt, + max_attempts=DOMAIN_READINESS_MAX_ATTEMPTS, + wait_seconds=DOMAIN_READINESS_CHECK_INTERVAL_SECONDS, + error=str(e), + ) + time.sleep(DOMAIN_READINESS_CHECK_INTERVAL_SECONDS) + else: + logger.error( + 'Domain did not become responsive within timeout', + attempts=DOMAIN_READINESS_MAX_ATTEMPTS, + error=str(e), + ) + except Exception as e: # noqa BLE001 + # Handle unexpected exceptions (e.g., connection errors during client initialization) + last_exception = e + if attempt < DOMAIN_READINESS_MAX_ATTEMPTS: + logger.warning( + 'Connection attempt failed, waiting before retry', + attempt=attempt, + max_attempts=DOMAIN_READINESS_MAX_ATTEMPTS, + wait_seconds=DOMAIN_READINESS_CHECK_INTERVAL_SECONDS, + error=str(e), + ) + time.sleep(DOMAIN_READINESS_CHECK_INTERVAL_SECONDS) + else: + logger.error( + 'Failed to connect to OpenSearch domain after max attempts', + attempts=DOMAIN_READINESS_MAX_ATTEMPTS, + error=str(e), + ) + + raise CCInternalException( + f'OpenSearch domain did not become responsive after {DOMAIN_READINESS_MAX_ATTEMPTS} attempts ' + f'({DOMAIN_READINESS_MAX_ATTEMPTS * DOMAIN_READINESS_CHECK_INTERVAL_SECONDS} seconds). ' + f'Last error: {last_exception}' + ) + + def _create_provider_index_with_alias( + self, + client: OpenSearchClient, + index_name: str, + alias_name: str, + number_of_shards: int, + number_of_replicas: int, + ) -> None: + """ + Create the provider index and alias in OpenSearch if they don't exist. + + :param client: The OpenSearch client + :param index_name: The versioned index name (e.g., compact_aslp_providers_v1) + :param alias_name: The alias name (e.g., compact_aslp_providers) + :param number_of_shards: Number of primary shards for the index + :param number_of_replicas: Number of replica shards for the index + """ + # Check if the alias already exists (meaning an index version is already set up) + if client.alias_exists(alias_name): + logger.info(f"Alias '{alias_name}' already exists. Skipping index and alias creation.") + return + + # Check if the index already exists (edge case: index exists but alias doesn't) + if client.index_exists(index_name): + logger.info(f"Index '{index_name}' already exists. Creating alias only.") + client.create_alias(index_name, alias_name) + logger.info(f"Alias '{alias_name}' -> '{index_name}' created successfully.") + return + + # Create the index with the specified configuration + logger.info(f"Creating index '{index_name}'...") + index_mapping = self._get_provider_index_mapping(number_of_shards, number_of_replicas) + client.create_index(index_name, index_mapping) + logger.info(f"Index '{index_name}' created successfully.") + + # Create the alias pointing to the new index + logger.info(f"Creating alias '{alias_name}' -> '{index_name}'...") + client.create_alias(index_name, alias_name) + logger.info(f"Alias '{alias_name}' -> '{index_name}' created successfully.") + + def _get_provider_index_mapping(self, number_of_shards: int, number_of_replicas: int) -> dict: + """ + Define the index mapping for provider documents. + + :param number_of_shards: Number of primary shards for the index + :param number_of_replicas: Number of replica shards for the index + :return: The index mapping dictionary + """ + # Nested schema for AttestationVersion + attestation_version_properties = { + 'attestationId': {'type': 'keyword'}, + 'version': {'type': 'keyword'}, + } + + # Nested schema for AdverseAction + adverse_action_properties = { + 'type': {'type': 'keyword'}, + 'adverseActionId': {'type': 'keyword'}, + 'compact': {'type': 'keyword'}, + 'jurisdiction': {'type': 'keyword'}, + 'providerId': {'type': 'keyword'}, + 'licenseType': {'type': 'keyword'}, + 'licenseTypeAbbreviation': {'type': 'keyword'}, + 'actionAgainst': {'type': 'keyword'}, + 'effectiveStartDate': {'type': 'date'}, + 'creationDate': {'type': 'date'}, + 'effectiveLiftDate': {'type': 'date'}, + 'dateOfUpdate': {'type': 'date'}, + 'encumbranceType': {'type': 'keyword'}, + 'clinicalPrivilegeActionCategories': {'type': 'keyword'}, + 'clinicalPrivilegeActionCategory': {'type': 'keyword'}, + 'submittingUser': {'type': 'keyword'}, + 'liftingUser': {'type': 'keyword'}, + } + + # Nested schema for Investigation + investigation_properties = { + 'type': {'type': 'keyword'}, + 'investigationId': {'type': 'keyword'}, + 'compact': {'type': 'keyword'}, + 'jurisdiction': {'type': 'keyword'}, + 'licenseType': {'type': 'keyword'}, + 'status': {'type': 'keyword'}, + 'dateOfUpdate': {'type': 'date'}, + } + + # Nested schema for License + license_properties = { + 'providerId': {'type': 'keyword'}, + 'type': {'type': 'keyword'}, + 'dateOfUpdate': {'type': 'date'}, + 'compact': {'type': 'keyword'}, + 'jurisdiction': {'type': 'keyword'}, + 'licenseType': {'type': 'keyword'}, + 'licenseStatusName': {'type': 'keyword'}, + 'licenseStatus': {'type': 'keyword'}, + 'jurisdictionUploadedLicenseStatus': {'type': 'keyword'}, + 'compactEligibility': {'type': 'keyword'}, + 'jurisdictionUploadedCompactEligibility': {'type': 'keyword'}, + 'npi': {'type': 'keyword'}, + 'licenseNumber': {'type': 'keyword'}, + 'givenName': { + 'type': 'text', + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}, + }, + 'middleName': { + 'type': 'text', + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}, + }, + 'familyName': { + 'type': 'text', + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}, + }, + 'suffix': {'type': 'keyword'}, + 'dateOfIssuance': {'type': 'date'}, + 'dateOfRenewal': {'type': 'date'}, + 'dateOfExpiration': {'type': 'date'}, + 'homeAddressStreet1': {'type': 'text'}, + 'homeAddressStreet2': {'type': 'text'}, + 'homeAddressCity': { + 'type': 'text', + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}, + }, + 'homeAddressState': {'type': 'keyword'}, + 'homeAddressPostalCode': {'type': 'keyword'}, + 'emailAddress': {'type': 'keyword'}, + 'phoneNumber': {'type': 'keyword'}, + 'adverseActions': {'type': 'nested', 'properties': adverse_action_properties}, + 'investigations': {'type': 'nested', 'properties': investigation_properties}, + 'investigationStatus': {'type': 'keyword'}, + } + + # Nested schema for Privilege + privilege_properties = { + 'type': {'type': 'keyword'}, + 'providerId': {'type': 'keyword'}, + 'compact': {'type': 'keyword'}, + 'jurisdiction': {'type': 'keyword'}, + 'licenseJurisdiction': {'type': 'keyword'}, + 'licenseType': {'type': 'keyword'}, + 'dateOfIssuance': {'type': 'date'}, + 'dateOfRenewal': {'type': 'date'}, + 'dateOfExpiration': {'type': 'date'}, + 'dateOfUpdate': {'type': 'date'}, + 'adverseActions': {'type': 'nested', 'properties': adverse_action_properties}, + 'investigations': {'type': 'nested', 'properties': investigation_properties}, + 'administratorSetStatus': {'type': 'keyword'}, + 'compactTransactionId': {'type': 'keyword'}, + 'attestations': {'type': 'nested', 'properties': attestation_version_properties}, + 'privilegeId': {'type': 'keyword'}, + 'status': {'type': 'keyword'}, + 'activeSince': {'type': 'date'}, + 'investigationStatus': {'type': 'keyword'}, + } + + # Nested schema for MilitaryAffiliation + military_affiliation_properties = { + 'type': {'type': 'keyword'}, + 'dateOfUpdate': {'type': 'date'}, + 'providerId': {'type': 'keyword'}, + 'compact': {'type': 'keyword'}, + 'fileNames': {'type': 'keyword'}, + 'affiliationType': {'type': 'keyword'}, + 'dateOfUpload': {'type': 'date'}, + 'status': {'type': 'keyword'}, + } + + return { + 'settings': { + 'index': { + 'number_of_shards': number_of_shards, + 'number_of_replicas': number_of_replicas, + }, + 'analysis': { + # this custom analyzer is recommended by Opensearch when you have international character + # sets, and you want to support searching by their closest ASCII equivalents. + # See https://docs.opensearch.org/latest/analyzers/token-filters/asciifolding/ + 'filter': {'custom_ascii_folding': {'type': 'asciifolding', 'preserve_original': True}}, + 'analyzer': { + 'custom_ascii_analyzer': { + 'type': 'custom', + 'tokenizer': 'standard', + 'filter': ['lowercase', 'custom_ascii_folding'], + } + }, + }, + }, + 'mappings': { + 'properties': { + # Top-level provider fields + 'providerId': {'type': 'keyword'}, + 'type': {'type': 'keyword'}, + 'dateOfUpdate': {'type': 'date'}, + 'compact': {'type': 'keyword'}, + 'licenseJurisdiction': {'type': 'keyword'}, + 'currentHomeJurisdiction': {'type': 'keyword'}, + 'licenseStatus': {'type': 'keyword'}, + 'compactEligibility': {'type': 'keyword'}, + 'npi': {'type': 'keyword'}, + 'givenName': { + 'type': 'text', + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}, + }, + 'middleName': { + 'type': 'text', + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}, + }, + 'familyName': { + 'type': 'text', + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}, + }, + 'suffix': {'type': 'keyword'}, + 'dateOfExpiration': {'type': 'date'}, + 'compactConnectRegisteredEmailAddress': {'type': 'keyword'}, + 'jurisdictionUploadedLicenseStatus': {'type': 'keyword'}, + 'jurisdictionUploadedCompactEligibility': {'type': 'keyword'}, + 'privilegeJurisdictions': {'type': 'keyword'}, + 'providerFamGivMid': {'type': 'keyword'}, + 'providerDateOfUpdate': {'type': 'date'}, + 'birthMonthDay': {'type': 'keyword'}, + 'militaryStatus': {'type': 'keyword'}, + 'militaryStatusNote': {'type': 'text'}, + # Nested arrays + 'licenses': {'type': 'nested', 'properties': license_properties}, + 'privileges': {'type': 'nested', 'properties': privilege_properties}, + 'militaryAffiliations': { + 'type': 'nested', + 'properties': military_affiliation_properties, + }, + } + }, + } + + +on_event = OpenSearchIndexManager('opensearch-index-manager') diff --git a/backend/compact-connect/lambdas/python/search/handlers/populate_provider_documents.py b/backend/compact-connect/lambdas/python/search/handlers/populate_provider_documents.py new file mode 100644 index 000000000..855016fec --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/handlers/populate_provider_documents.py @@ -0,0 +1,397 @@ +""" +Lambda handler to populate OpenSearch with provider documents. + +This Lambda scans the provider table using the providerDateOfUpdate GSI, +retrieves complete provider records, sanitizes them, and bulk indexes them +into OpenSearch. + +This Lambda is intended to be invoked manually through the AWS console for +initial data population or re-indexing operations. + +The Lambda supports pagination across multiple invocations. If processing +cannot complete within 12 minutes, it will return the current compact and +last pagination key. The developer can then re-invoke the Lambda with this +output as input to continue processing. + +Example input for resumption: +{ + "startingCompact": "aslp", + "startingLastKey": {"pk": "...", "sk": "..."} +} + +Race Condition Consideration: +A potential race condition can occur when running this function while provider +data is being actively updated: +1. This Lambda queries the current data from DynamoDB for a provider +2. A change is made in DynamoDB for that same provider +3. The DynamoDB stream handler queries the data and indexes the change into + OpenSearch after the ~30 second delay of sitting in SQS +4. This Lambda finally indexes the stale data into OpenSearch, overwriting + the change indexed by the DynamoDB stream handler + +For this reason, it is recommended that this process be run during a period of +low traffic. Given that it is a one-time process to initially populate the +table, the risk is low and if needed, this Lambda function can be run again to +synchronize all the provider documents. +""" + +from aws_lambda_powertools.utilities.typing import LambdaContext +from cc_common.config import config, logger +from cc_common.exceptions import CCInternalException +from marshmallow import ValidationError +from opensearch_client import OpenSearchClient +from utils import generate_provider_opensearch_document + +# Batch size for DynamoDB pagination +DYNAMODB_PAGE_SIZE = 1000 +# Batch size for OpenSearch bulk indexing (1 provider averages ~2KB, 1000 * 2KB = 2MB) +OPENSEARCH_BULK_SIZE = 1000 +# Time threshold in milliseconds - stop when less than 3 minutes remain +# This leaves a 3-minute buffer before the 15-minute Lambda timeout +TIME_THRESHOLD_MS = 60 * 3000 + + +def populate_provider_documents(event: dict, context: LambdaContext): + """ + Populate OpenSearch indices with provider documents. + + Retrieves complete provider records, sanitizes them using ProviderGeneralResponseSchema, + and bulk indexes them into the appropriate OpenSearch indices. + + If processing cannot complete within 12 minutes, the function returns pagination + information that can be passed as input to continue processing. + + :param event: Lambda event with optional parameters: + - startingCompact: The compact to start/resume processing from + - startingLastKey: The DynamoDB pagination key to resume from + :param context: Lambda context + :return: Summary of indexing operation, including pagination info if incomplete + """ + data_client = config.data_client + opensearch_client = OpenSearchClient() + + # Get optional pagination parameters from event for resumption (normal mode) + starting_compact = event.get('startingCompact') + starting_last_key = event.get('startingLastKey') + + # Track statistics + stats = { + 'total_providers_processed': 0, + 'total_providers_indexed': 0, + 'total_providers_failed': 0, + 'compacts_processed': [], + 'errors': [], + 'completed': True, # Will be set to False if we need to paginate + } + + # Determine which compacts to process + compacts_to_process = config.compacts + + # If resuming, skip compacts before the starting compact + if starting_compact: + if starting_compact in compacts_to_process: + start_index = compacts_to_process.index(starting_compact) + compacts_to_process = compacts_to_process[start_index:] + logger.info( + 'Resuming from compact', + starting_compact=starting_compact, + starting_last_key=starting_last_key, + ) + else: + logger.warning( + 'Starting compact not found, processing all compacts', + starting_compact=starting_compact, + ) + starting_last_key = None # Reset last key if compact not found + + for compact_index, compact in enumerate(compacts_to_process): + logger.info('Processing compact', compact=compact) + + documents_to_index = [] + compact_stats = { + 'providers_processed': 0, + 'providers_indexed': 0, + 'providers_failed': 0, + } + + # Track pagination state + # Use starting_last_key only for the first compact being processed (resumption case). + # The starting_last_key is specific to the compact that was being processed when we timed out, + # so it's only valid for that compact (which is now the first in compacts_to_process). + # For all subsequent compacts, we start from the beginning with last_key = None. + last_key = starting_last_key if compact_index == 0 else None + # Track the key used to fetch the current batch (needed for retry on indexing failure) + batch_start_key = last_key + has_more = True + + while has_more: + # Check if we're running out of time before starting a new batch + remaining_time_ms = context.get_remaining_time_in_millis() + if remaining_time_ms < TIME_THRESHOLD_MS: + # We need to stop and return pagination info for resumption + logger.info( + 'Approaching time limit, returning pagination info', + remaining_time_ms=remaining_time_ms, + current_compact=compact, + last_key=last_key, + ) + + # Index any remaining documents before returning + try: + _index_records_and_track_stats(documents_to_index, compact, opensearch_client, compact_stats) + except CCInternalException as e: + # Indexing failed after retries, return pagination info for manual retry + return _build_error_response( + stats, + compact_stats, + compact, + batch_start_key, + str(e), + ) + + # Update stats for current compact + stats['total_providers_processed'] += compact_stats['providers_processed'] + stats['total_providers_indexed'] += compact_stats['providers_indexed'] + stats['total_providers_failed'] += compact_stats['providers_failed'] + if compact_stats['providers_processed'] > 0: + stats['compacts_processed'].append( + { + 'compact': compact, + **compact_stats, + } + ) + + # Return pagination info for resumption + stats['completed'] = False + stats['resumeFrom'] = { + 'startingCompact': compact, + 'startingLastKey': last_key, + } + + logger.info( + 'Returning for pagination', + total_providers_processed=stats['total_providers_processed'], + total_providers_indexed=stats['total_providers_indexed'], + resume_from=stats['resumeFrom'], + ) + + return stats + + # Build pagination parameters + dynamo_pagination = {'pageSize': DYNAMODB_PAGE_SIZE} + if last_key: + dynamo_pagination['lastKey'] = last_key + + # Save the key used to fetch this batch (for retry if indexing fails) + batch_start_key = last_key + + # Query providers from the GSI + result = data_client.get_providers_sorted_by_updated( + compact=compact, + scan_forward=True, + pagination=dynamo_pagination, + ) + + providers = result.get('items', []) + last_key = result.get('pagination', {}).get('lastKey') + has_more = last_key is not None + + logger.info( + 'Retrieved providers batch', + compact=compact, + batch_size=len(providers), + has_more=has_more, + ) + + # Process each provider in the batch + for provider_record in providers: + compact_stats['providers_processed'] += 1 + provider_id = provider_record.get('providerId') + + if not provider_id: + logger.warning('Provider record missing providerId', record=provider_record) + compact_stats['providers_failed'] += 1 + continue + + try: + # Use the shared utility to process the provider + serializable_document = generate_provider_opensearch_document(compact, provider_id) + documents_to_index.append(serializable_document) + + except ValidationError as e: + logger.warning( + 'Failed to process provider record', + provider_id=provider_id, + compact=compact, + errors=e.messages, + ) + compact_stats['providers_failed'] += 1 + continue + + # Bulk index when batch is full + if len(documents_to_index) >= OPENSEARCH_BULK_SIZE: + try: + _index_records_and_track_stats(documents_to_index, compact, opensearch_client, compact_stats) + documents_to_index = [] + except CCInternalException as e: + # Indexing failed after retries, return pagination info for manual retry + return _build_error_response( + stats, + compact_stats, + compact, + batch_start_key, + str(e), + ) + + # Index any remaining documents for this compact + if documents_to_index: + try: + _index_records_and_track_stats(documents_to_index, compact, opensearch_client, compact_stats) + except CCInternalException as e: + # Indexing failed after retries, return pagination info for manual retry + return _build_error_response( + stats, + compact_stats, + compact, + batch_start_key, + str(e), + ) + + # Update overall stats + stats['total_providers_processed'] += compact_stats['providers_processed'] + stats['total_providers_indexed'] += compact_stats['providers_indexed'] + stats['total_providers_failed'] += compact_stats['providers_failed'] + stats['compacts_processed'].append( + { + 'compact': compact, + **compact_stats, + } + ) + + logger.info( + 'Completed processing compact', + compact=compact, + providers_processed=compact_stats['providers_processed'], + providers_indexed=compact_stats['providers_indexed'], + providers_failed=compact_stats['providers_failed'], + ) + + logger.info( + 'Completed populating provider documents', + total_providers_processed=stats['total_providers_processed'], + total_providers_indexed=stats['total_providers_indexed'], + total_providers_failed=stats['total_providers_failed'], + ) + + return stats + + +def _index_records_and_track_stats( + documents_to_index: list[dict], compact: str, opensearch_client: OpenSearchClient, compact_stats: dict +): + index_name = f'compact_{compact}_providers' + if documents_to_index: + failed_ids = _bulk_index_documents(opensearch_client, index_name, documents_to_index) + compact_stats['providers_indexed'] += len(documents_to_index) - len(failed_ids) + if failed_ids: + compact_stats['providers_failed'] += len(failed_ids) + logger.warning( + 'Some documents failed to index in batch', + compact=compact, + failed_count=len(failed_ids), + failed_document_ids=list(failed_ids), + ) + + +def _build_error_response( + stats: dict, compact_stats: dict, compact: str, batch_start_key: dict | None, error_message: str +) -> dict: + """ + Build an error response with pagination info for retry after indexing failure. + + :param stats: The overall statistics dictionary + :param compact_stats: The current compact's statistics + :param compact: The compact being processed when the error occurred + :param batch_start_key: The pagination key used to fetch the batch that failed to index + :param error_message: The error message from the failed indexing attempt + :return: Response dictionary with error info and pagination for retry + """ + logger.error( + 'Bulk indexing failed after retries, returning pagination info for retry', + compact=compact, + batch_start_key=batch_start_key, + error=error_message, + ) + + # Update stats for current compact + stats['total_providers_processed'] += compact_stats['providers_processed'] + stats['total_providers_indexed'] += compact_stats['providers_indexed'] + stats['total_providers_failed'] += compact_stats['providers_failed'] + if compact_stats['providers_processed'] > 0: + stats['compacts_processed'].append( + { + 'compact': compact, + **compact_stats, + } + ) + + # Return pagination info for retry - use batch_start_key so the failed batch is re-fetched + stats['completed'] = False + stats['resumeFrom'] = { + 'startingCompact': compact, + 'startingLastKey': batch_start_key, + } + stats['errors'].append( + { + 'compact': compact, + 'error': error_message, + } + ) + + return stats + + +def _bulk_index_documents(opensearch_client: OpenSearchClient, index_name: str, documents: list[dict]) -> set[str]: + """ + Bulk index documents into OpenSearch. + + :param opensearch_client: The OpenSearch client + :param index_name: The index to write to + :param documents: List of documents to index + :return: Set of failed document IDs (empty set if all documents succeeded) + :raises CCInternalException: If bulk indexing fails after max retry attempts + """ + if not documents: + return set() + + # This will raise CCInternalException if all retries fail + response = opensearch_client.bulk_index(index_name=index_name, documents=documents) + + # Check for errors in the bulk response (individual document failures, not connection issues) + if response.get('errors'): + failed_ids = set() + for item in response.get('items', []): + index_result = item.get('index', {}) + if index_result.get('error'): + doc_id = index_result.get('_id') + failed_ids.add(doc_id) + logger.warning( + 'Bulk index item error', + document_id=doc_id, + error=index_result.get('error'), + ) + logger.warning( + 'Bulk index completed with errors', + index_name=index_name, + total_documents=len(documents), + error_count=len(failed_ids), + failed_document_ids=list(failed_ids), + ) + return failed_ids + + logger.info( + 'Indexed documents', + index_name=index_name, + document_count=len(documents), + ) + return set() diff --git a/backend/compact-connect/lambdas/python/search/handlers/provider_update_ingest.py b/backend/compact-connect/lambdas/python/search/handlers/provider_update_ingest.py new file mode 100644 index 000000000..442dfe4d9 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/handlers/provider_update_ingest.py @@ -0,0 +1,208 @@ +""" +Lambda handler to process SQS messages containing DynamoDB stream events and index +provider documents into OpenSearch. + +This Lambda is triggered by SQS (via EventBridge Pipe from DynamoDB streams) from +the provider table. It processes events in batches, deduplicates provider IDs by +compact, and bulk indexes the sanitized provider documents into the appropriate +OpenSearch indices. + +The handler uses the @sqs_batch_handler decorator which passes all SQS messages +to the handler at once, enabling batch processing and deduplication. The handler +returns batchItemFailures directly for partial success handling. +""" + +from boto3.dynamodb.types import TypeDeserializer +from cc_common.config import config, logger +from cc_common.exceptions import CCInternalException, CCNotFoundException +from cc_common.utils import sqs_batch_handler +from marshmallow import ValidationError +from opensearch_client import OpenSearchClient +from utils import generate_provider_opensearch_document + +# Instantiate the OpenSearch client outside of the handler to cache connection between invocations +opensearch_client = OpenSearchClient(timeout=30) + + +@sqs_batch_handler +def provider_update_ingest_handler(records: list[dict]) -> dict: + """ + Process DynamoDB stream events from SQS and index provider documents into OpenSearch. + + This function: + 1. Creates a set for each compact to deduplicate provider IDs + 2. Extracts compact and providerId from each stream record (old or new image) + 3. Processes each unique provider by compact using the shared utility + 4. Bulk indexes the documents into the appropriate OpenSearch index + + :param records: List of SQS records, each containing 'messageId' and 'body' (DynamoDB stream record) + :return: Response with batch item failures for partial success handling + """ + if not records: + logger.info('No records to process') + return {'batchItemFailures': []} + + logger.info('Processing SQS batch with DynamoDB stream records', record_count=len(records)) + + # Create a set for each compact to deduplicate provider IDs + providers_by_compact: dict[str, set[str]] = {compact: set() for compact in config.compacts} + + # Track which message IDs correspond to which compact/provider for failure reporting + record_mapping: dict[str, tuple[str, str]] = {} # message_id -> (compact, provider_id) + + # Extract compact and providerId from each record + for record in records: + message_id = record['messageId'] + # The body contains the DynamoDB stream record sent via EventBridge Pipe + stream_record = record['body'] + + # Try to get the data from NewImage first, fall back to OldImage for deletes + image = stream_record.get('dynamodb', {}).get('NewImage') or stream_record.get('dynamodb', {}).get('OldImage') + + if not image: + logger.error('Record has no image data', message_id=message_id) + continue + + # Extract compact and providerId from the DynamoDB image + # The format is {'S': 'value'} for string attributes + deserialized_image = TypeDeserializer().deserialize(value={'M': image}) + compact = deserialized_image.get('compact') + provider_id = deserialized_image.get('providerId') + record_type = deserialized_image.get('type') + + if not compact or not provider_id: + logger.error( + 'Record missing required fields', + record_type=record_type, + message_id=message_id, + ) + continue + + # Add to the appropriate compact's set to dedup provider ids + if compact in providers_by_compact: + providers_by_compact[compact].add(provider_id) + record_mapping[message_id] = (compact, provider_id) + else: + logger.warning('Unknown compact in record', compact=compact, provider_id=provider_id) + + # Process providers and bulk index by compact + batch_item_failures = [] + failed_providers: dict[str, set] = {compact: set() for compact in config.compacts} + + for compact, provider_ids in providers_by_compact.items(): + index_name = f'compact_{compact}_providers' + logger.info('Processing providers for compact', compact=compact, provider_count=len(provider_ids)) + + documents_to_index = [] + providers_to_delete = [] # Provider IDs that no longer exist and need to be deleted from the index + + for provider_id in provider_ids: + try: + document = generate_provider_opensearch_document(compact, provider_id) + documents_to_index.append(document) + except CCNotFoundException as e: + # if no provider records are found, the provider needs to be deleted from the index + logger.warning( + 'No provider records found. This may occur if a license upload rollback was performed or if records' + ' were manually deleted. Will delete provider document from index.', + provider_id=provider_id, + compact=compact, + error=str(e), + ) + providers_to_delete.append(provider_id) + except ValidationError as e: + logger.warning( + 'Failed to process provider for indexing', + provider_id=provider_id, + compact=compact, + error=str(e), + ) + failed_providers[compact].add(provider_id) + + if failed_providers[compact]: + logger.warning( + 'Some providers failed serialization', + compact=compact, + failed_provider_ids=failed_providers[compact], + successful_count=len(documents_to_index), + ) + + # Bulk index the documents + if documents_to_index: + try: + response = opensearch_client.bulk_index(index_name=index_name, documents=documents_to_index) + + # Check for individual document failures + if response.get('errors'): + for item in response.get('items', []): + index_result = item.get('index', {}) + if index_result.get('error'): + doc_id = index_result.get('_id') + logger.error( + 'Document indexing failed', + provider_id=doc_id, + error=index_result.get('error'), + ) + failed_providers[compact].add(doc_id) + + logger.info( + 'Bulk indexed documents', + index_name=index_name, + document_count=len(documents_to_index), + had_errors=response.get('errors', False), + ) + except CCInternalException as e: + # All documents for this compact failed to index + logger.error( + 'Failed to bulk index documents after retries', + index_name=index_name, + document_count=len(documents_to_index), + error=str(e), + ) + # Mark all providers in this compact as failed + document_provider_ids = [document['providerId'] for document in documents_to_index] + for provider_id in document_provider_ids: + failed_providers[compact].add(provider_id) + + # Bulk delete providers that no longer exist + if providers_to_delete: + try: + failed_provider_ids = opensearch_client.bulk_delete( + index_name=index_name, document_ids=providers_to_delete + ) + failed_providers[compact].update(failed_provider_ids) + + logger.info( + 'Bulk deleted documents', + index_name=index_name, + document_count=len(providers_to_delete), + failed_provider_ids=list(failed_provider_ids), + ) + except CCInternalException as e: + # All deletes for this compact failed + logger.error( + 'Failed to bulk delete documents after retries', + index_name=index_name, + document_count=len(providers_to_delete), + error=str(e), + ) + # Mark all providers to delete as failed + for provider_id in providers_to_delete: + failed_providers[compact].add(provider_id) + + # Build batch item failures response for failed providers + # Map back from failed providers to their SQS message IDs + for message_id, (compact, provider_id) in record_mapping.items(): + if provider_id in failed_providers[compact]: + logger.info( + 'Returning message id in batch item failures for failed provider', + compact=compact, + provider_id=provider_id, + message_id=message_id, + ) + batch_item_failures.append({'itemIdentifier': message_id}) + + if batch_item_failures: + logger.warning('Reporting batch item failures', failure_count=len(batch_item_failures)) + + return {'batchItemFailures': batch_item_failures} diff --git a/backend/compact-connect/lambdas/python/search/handlers/search.py b/backend/compact-connect/lambdas/python/search/handlers/search.py new file mode 100644 index 000000000..bdc5bed74 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/handlers/search.py @@ -0,0 +1,593 @@ +import csv +import io + +from aws_lambda_powertools.utilities.typing import LambdaContext +from cc_common.config import config, logger +from cc_common.data_model.schema.common import CCPermissionsAction +from cc_common.data_model.schema.provider.api import ( + ExportPrivilegesRequestSchema, + ProviderGeneralResponseSchema, + SearchProvidersRequestSchema, + StatePrivilegeGeneralResponseSchema, +) +from cc_common.exceptions import ( + CCInternalException, + CCInvalidRequestCustomResponseException, + CCInvalidRequestException, + CCNotFoundException, +) +from cc_common.utils import api_handler, authorize_compact_level_only_action +from marshmallow import ValidationError +from opensearch_client import OpenSearchClient + +# Default and maximum page sizes for search results +MAX_PROVIDER_PAGE_SIZE = 100 +MAX_MATCH_TOTAL_ALLOWED = 10000 + +# Presigned URL expiration time in seconds (1 minute) +PRESIGNED_URL_EXPIRATION_SECONDS = 60 + +# CSV field names for privilege export +PRIVILEGE_CSV_FIELDS = [ + 'type', + 'providerId', + 'compact', + 'jurisdiction', + 'licenseType', + 'privilegeId', + 'status', + 'compactEligibility', + 'dateOfExpiration', + 'dateOfIssuance', + 'dateOfRenewal', + 'familyName', + 'givenName', + 'middleName', + 'suffix', + 'licenseJurisdiction', + 'licenseStatus', + 'licenseStatusName', + 'licenseNumber', + 'npi', +] + + +# Instantiate the OpenSearch client outside of the handler to cache connection between invocations +# Set timeout to 20 seconds to give API gateway time to respond with response +opensearch_client = OpenSearchClient(timeout=20) + + +@api_handler +@authorize_compact_level_only_action(action=CCPermissionsAction.READ_GENERAL) +def search_api_handler(event: dict, context: LambdaContext): + """ + Main entry point for search API. + Routes to the appropriate handler based on the HTTP method and resource path. + + :param event: Standard API Gateway event, API schema documented in the CDK ApiStack + :param context: Lambda context + """ + # Extract the HTTP method and resource path + http_method = event.get('httpMethod') + resource_path = event.get('resource') + + # Route to the appropriate handler + api_method = (http_method, resource_path) + match api_method: + case ('POST', '/v1/compacts/{compact}/providers/search'): + return _search_providers(event, context) + case ('POST', '/v1/compacts/{compact}/privileges/export'): + return _export_privileges(event, context) + + # If we get here, the method/resource combination is not supported + raise CCInvalidRequestException(f'Unsupported method or resource: {http_method} {resource_path}') + + +def _search_providers(event: dict, context: LambdaContext): # noqa: ARG001 unused-argument + """ + Search providers using OpenSearch. + + This endpoint accepts an OpenSearch DSL query body and returns sanitized provider records. + Pagination follows OpenSearch DSL using `from`/`size` or `search_after` with `sort`. + + See: https://docs.opensearch.org/latest/search-plugins/searching-data/paginate/ + + :param event: Standard API Gateway event, API schema documented in the CDK ApiStack + :param LambdaContext context: + :return: Dictionary with providers array and pagination metadata + """ + compact = event['pathParameters']['compact'] + + # Parse and validate the request body using the schema + body = _parse_and_validate_request_body(event) + + # Build the OpenSearch search body + search_body = _build_opensearch_search_body(body, size_override=MAX_PROVIDER_PAGE_SIZE) + + # Build the index name for this compact + index_name = f'compact_{compact}_providers' + + logger.info('Executing OpenSearch provider search', compact=compact, index_name=index_name) + + # Execute the search + response = opensearch_client.search(index_name=index_name, body=search_body) + + # Extract hits from the response + hits_data = response.get('hits', {}) + hits = hits_data.get('hits', []) + total = hits_data.get('total', {}) + + # Sanitize the provider records using ProviderGeneralResponseSchema + general_schema = ProviderGeneralResponseSchema() + sanitized_providers = [] + last_sort = None + + for hit in hits: + source = hit.get('_source', {}) + try: + sanitized_provider = general_schema.load(source) + # Verify compact matches path parameter + if sanitized_provider.get('compact') != compact: + logger.error( + 'Provider compact field does not match path parameter', + # This case is most likely the result of abuse or misconfiguration. + # We log the request body for triaging purposes. We redact the leaf values + # from the request body to obscure PII. + request_body=_redact_leaf_values(body), + provider_id=source.get('providerId'), + provider_compact=sanitized_provider.get('compact'), + path_compact=compact, + ) + # do not include the provider in the results + total['value'] -= 1 + continue + sanitized_providers.append(sanitized_provider) + # Track the sort values from the last hit for search_after pagination + last_sort = hit.get('sort') + except ValidationError as e: + # Log the error but continue processing other records + logger.error( + 'Failed to sanitize provider record', + provider_id=source.get('providerId'), + errors=e.messages, + ) + + # Build response + response_body = { + 'providers': sanitized_providers, + 'total': total, + } + + # Include sort values from last hit to enable search_after pagination + if last_sort is not None: + response_body['lastSort'] = last_sort + + return response_body + + +def _export_privileges(event: dict, context: LambdaContext): # noqa: ARG001 unused-argument + """ + Export privileges to a CSV file in S3 and return a presigned URL for download. + + This endpoint accepts an OpenSearch DSL query body, retrieves all matching privilege records, + converts them to CSV format, stores the file in S3, and returns a presigned URL for download. + + If the query includes a nested query on privileges with `inner_hits`, only the matched + privileges will be returned. Otherwise, all privileges for matching providers are returned. + See https://docs.opensearch.org/latest/search-plugins/searching-data/inner-hits/ for more information + about inner_hits. + + Example nested query with inner_hits: + { + "query": { + "nested": { + "path": "privileges", + "query": { "term": { "privileges.jurisdiction": "ky" } }, + "inner_hits": {} + } + } + } + + :param event: Standard API Gateway event, API schema documented in the CDK ApiStack + :param LambdaContext context: + :return: Dictionary with fileUrl containing presigned URL to download the CSV file + """ + compact = event['pathParameters']['compact'] + + # Get the caller's cognito user id + caller_user_id = _get_caller_user_id(event) + + # Parse and validate the request body using the schema + body = _parse_and_validate_export_request_body(event) + + # Build the OpenSearch search body + search_body = _build_export_search_body(body) + + # Build the index name for this compact + index_name = f'compact_{compact}_providers' + + logger.info('Executing OpenSearch privilege export', compact=compact, index_name=index_name) + + # Execute the search + response = opensearch_client.search(index_name=index_name, body=search_body) + + # Extract hits from the response + hits_data = response.get('hits', {}) + hits = hits_data.get('hits', []) + total = hits_data['total'] + + if total['value'] >= MAX_MATCH_TOTAL_ALLOWED: + logger.info('request scope too large for current implementation, returning 400 with custom response') + raise CCInvalidRequestCustomResponseException( + response_body={ + 'message': 'Search scope too broad. Please narrow your search.', + } + ) + + # Extract and flatten privileges from provider records + flattened_privileges = [] + privilege_schema = StatePrivilegeGeneralResponseSchema() + + for hit in hits: + provider = hit.get('_source', {}) + # Check if inner_hits are present for privileges + # If so, use only the matched privileges; otherwise, use all privileges + # see https://docs.opensearch.org/latest/search-plugins/searching-data/inner-hits/ for more information + # about inner_hits. + inner_hits = hit.get('inner_hits', {}) + privileges_inner_hits = inner_hits.get('privileges', {}).get('hits', {}).get('hits', []) + + if privileges_inner_hits: + # Use only the privileges that matched the nested query + matched_privileges = [ih.get('_source', {}) for ih in privileges_inner_hits] + provider_privileges = _extract_flattened_privileges_from_list( + privileges=matched_privileges, + licenses=provider.get('licenses', []), + provider=provider, + ) + else: + # No inner_hits, return all privileges for this provider + provider_privileges = _extract_flattened_privileges(provider) + + for flattened_privilege in provider_privileges: + try: + # Sanitize using StatePrivilegeGeneralResponseSchema + sanitized_privilege = privilege_schema.load(flattened_privilege) + # Verify compact matches path parameter + if sanitized_privilege.get('compact') != compact: + logger.error( + 'Privilege compact field does not match path parameter', + # This case is most likely the result of abuse or misconfiguration. + # We log the request body for triaging purposes. We redact the leaf values + # from the request body to obscure PII. + request_body=_redact_leaf_values(body), + provider_id=provider.get('providerId'), + privilege_id=flattened_privilege.get('privilegeId'), + privilege_compact=sanitized_privilege.get('compact'), + path_compact=compact, + ) + # do not include the privilege in the results + continue + flattened_privileges.append(sanitized_privilege) + except ValidationError as e: + logger.error( + 'Failed to sanitize flattened privilege record', + provider_id=provider.get('providerId'), + privilege_id=flattened_privilege.get('privilegeId'), + errors=e.messages, + ) + # We don't want to return partial privilege reports + # If we experience a failure here we need to exit + raise CCInternalException('Failed to process privilege results') from e + + logger.info('Found privileges to export', count=len(flattened_privileges)) + + # If no privileges were found, return 404 + if not flattened_privileges: + raise CCNotFoundException('The search parameters did not match any privileges.') + + # Generate CSV content from the flattened privileges + csv_content = _generate_csv_content(flattened_privileges) + + # Generate S3 key path + request_datetime = config.current_standard_datetime.isoformat() + s3_key = f'compact/{compact}/privilegeSearch/caller/{caller_user_id}/time/{request_datetime}/export.csv' + + # Upload CSV to S3 + logger.info('Uploading CSV to S3', bucket=config.export_results_bucket_name, key=s3_key) + config.s3_client.put_object( + Bucket=config.export_results_bucket_name, + Key=s3_key, + Body=csv_content.encode('utf-8'), + ContentType='text/csv', + ) + + # Generate presigned URL for download + presigned_url = config.s3_client.generate_presigned_url( + 'get_object', + Params={ + 'Bucket': config.export_results_bucket_name, + 'Key': s3_key, + }, + ExpiresIn=PRESIGNED_URL_EXPIRATION_SECONDS, + ) + + logger.info('Generated presigned URL for export', url_expires_in=PRESIGNED_URL_EXPIRATION_SECONDS) + + return {'fileUrl': presigned_url} + + +def _parse_and_validate_request_body(event: dict) -> dict: + """ + Parse and validate the request body using the SearchProvidersRequestSchema. + + :param event: API Gateway event + :return: Validated request body + :raises CCInvalidRequestException: If the request body is invalid + """ + try: + schema = SearchProvidersRequestSchema() + return schema.loads(event.get('body', '{}')) + except ValidationError as e: + logger.warning('Invalid request body', errors=e.messages) + raise CCInvalidRequestException(f'Invalid request: {e.messages}') from e + + +def _parse_and_validate_export_request_body(event: dict) -> dict: + """ + Parse and validate the request body for export endpoints. + + Export endpoints only accept the query parameter, no pagination. + + :param event: API Gateway event + :return: Validated request body with query + :raises CCInvalidRequestException: If the request body is invalid + """ + try: + schema = ExportPrivilegesRequestSchema() + return schema.loads(event.get('body', '{}')) + except ValidationError as e: + logger.warning('Invalid request body', errors=e.messages) + raise CCInvalidRequestException(f'Invalid request: {e.messages}') from e + + +def _get_caller_user_id(event: dict) -> str: + """ + Get the caller's cognito user id from the event. + + :param event: API Gateway event + :return: The caller's user id (sub claim from cognito token) + :raises CCInvalidRequestException: If user id cannot be extracted + """ + try: + return event['requestContext']['authorizer']['claims']['sub'] + except (KeyError, TypeError) as e: + # the api auth wrapper should have detected this earlier, so if get here there is an issue with the + # setup. Raise an internal exception + logger.error('Could not extract user id from event', error=str(e)) + raise CCInternalException('Could not determine caller id for privilege report export') from e + + +def _redact_leaf_values(data: dict | list | str | int | bool | None) -> dict | list | str: + """ + Recursively redact all leaf field values in a data structure. + + This function preserves the structure of nested dictionaries + and lists while replacing all leaf values with "". + + :param data: The data structure to redact (dict, list, or primitive value) + :return: A copy of the data structure with all leaf values redacted + """ + if isinstance(data, dict): + return {key: _redact_leaf_values(value) for key, value in data.items()} + if isinstance(data, list): + return [_redact_leaf_values(item) for item in data] + + # Primitive value (str, int, float, bool, None) - this is a leaf, redact it + return '' + + +def _build_opensearch_search_body(body: dict, size_override: int) -> dict: + """ + Build the OpenSearch search body from the validated request. + + :param body: Validated request body + :return: OpenSearch search body + :raises CCInvalidRequestException: If search_after is used without sort + """ + search_body = { + 'query': body['query'], + } + + # Add pagination parameters following OpenSearch DSL + # 'from_' in Python maps to 'from' in the JSON (due to data_key in schema) + from_param = body.get('from_') + if from_param is not None: + search_body['from'] = from_param + + search_body['size'] = body.get('size', size_override) + + # Add sort if provided - required for search_after pagination + sort = body.get('sort') + if sort is not None: + search_body['sort'] = sort + + # Add search_after for cursor-based pagination + search_after = body.get('search_after') + if search_after is not None: + search_body['search_after'] = search_after + # search_after requires sort to be specified + if 'sort' not in search_body: + raise CCInvalidRequestException('sort is required when using search_after pagination') + + return search_body + + +def _build_export_search_body(body: dict) -> dict: + """ + Build the OpenSearch search body for export requests. + + Export requests retrieve all matching results in a single request, up to MAX_MATCH_TOTAL_ALLOWED. + OpenSearch's default index.max_result_window is 10,000, which aligns with our limit. + If there are more results than the limit, the export will fail with a 400 error. + + :param body: Validated request body + :return: OpenSearch search body + """ + return { + 'query': body.get('query', {'match_all': {}}), + 'size': MAX_MATCH_TOTAL_ALLOWED, + } + + +def _generate_csv_content(privileges: list[dict]) -> str: + """ + Generate CSV content from a list of privilege records. + + :param privileges: List of flattened privilege records + :return: CSV content as a string + """ + output = io.StringIO() + writer = csv.DictWriter(output, fieldnames=PRIVILEGE_CSV_FIELDS, extrasaction='ignore') + + # Write header row + writer.writeheader() + + # Write data rows + for privilege in privileges: + writer.writerow(privilege) + + return output.getvalue() + + +def _extract_flattened_privileges(provider: dict) -> list[dict]: + """ + Extract and flatten all privileges from a provider document. + + This function combines privilege data with license data to create flattened + privilege records similar to what the state API returns. + + :param provider: Provider document from OpenSearch + :return: List of flattened privilege records + """ + privileges = provider.get('privileges', []) + licenses = provider.get('licenses', []) + + return _extract_flattened_privileges_from_list( + privileges=privileges, + licenses=licenses, + provider=provider, + ) + + +def _extract_flattened_privileges_from_list( + privileges: list[dict], + licenses: list[dict], + provider: dict, +) -> list[dict]: + """ + Flatten a list of privileges by combining with license data. + + This function is used both for extracting all privileges from a provider document + and for processing only the matched privileges from inner_hits. + + :param privileges: List of privilege records to flatten + :param licenses: List of license records from the provider + :param provider: Provider document (for email and provider_id logging) + :return: List of flattened privilege records + """ + if not privileges: + return [] + + flattened_privileges = [] + + for privilege in privileges: + # Find matching license based on licenseJurisdiction and licenseType + matching_license = _find_matching_license( + licenses=licenses, + license_jurisdiction=privilege.get('licenseJurisdiction'), + license_type=privilege.get('licenseType'), + ) + + if matching_license is None: + logger.warning( + 'No matching license found for privilege', + provider_id=provider.get('providerId'), + privilege_id=privilege.get('privilegeId'), + license_jurisdiction=privilege.get('licenseJurisdiction'), + license_type=privilege.get('licenseType'), + ) + # Skip this privilege if no matching license is found + continue + + flattened_privilege = _create_flattened_privilege(privilege, matching_license, provider) + flattened_privileges.append(flattened_privilege) + + return flattened_privileges + + +def _find_matching_license(licenses: list[dict], license_jurisdiction: str, license_type: str) -> dict | None: + """ + Find a license that matches the given jurisdiction and license type. + + :param licenses: List of license records + :param license_jurisdiction: The jurisdiction to match + :param license_type: The license type to match + :return: The matching license or None if not found + """ + for license_record in licenses: + if ( + license_record.get('jurisdiction') == license_jurisdiction + and license_record.get('licenseType') == license_type + ): + return license_record + return None + + +def _create_flattened_privilege(privilege: dict, license_record: dict, provider: dict) -> dict: + """ + Create a flattened privilege record by combining privilege and license data. + + This mirrors the logic in state_api.py _create_flattened_privilege function. + + :param privilege: Privilege record + :param license_record: Matching license record + :param provider: Provider record (for email if registered) + :return: Flattened privilege record with combined data + """ + # Start with privilege data and set type + flattened = dict(privilege) + flattened['type'] = 'statePrivilege' + + # Add compactConnectRegisteredEmailAddress if present + if provider.get('compactConnectRegisteredEmailAddress') is not None: + flattened['compactConnectRegisteredEmailAddress'] = provider.get('compactConnectRegisteredEmailAddress') + + # Remove fields from license that would conflict with privilege fields + license_copy = dict(license_record) + conflicting_fields = { + 'providerId', + 'compact', + 'jurisdiction', + 'licenseType', + 'type', + 'pk', + 'sk', + 'dateOfIssuance', + 'dateOfRenewal', + 'dateOfUpdate', + 'dateOfExpiration', + 'status', + 'administratorSetStatus', + # Also remove nested objects that don't belong in flattened output + 'adverseActions', + 'investigations', + } + for field in conflicting_fields: + license_copy.pop(field, None) + + # Merge license data into flattened record + # License fields like givenName, familyName, npi, etc. get added + flattened.update(license_copy) + + return flattened diff --git a/backend/compact-connect/lambdas/python/search/opensearch_client.py b/backend/compact-connect/lambdas/python/search/opensearch_client.py new file mode 100644 index 000000000..0fc2214b0 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/opensearch_client.py @@ -0,0 +1,338 @@ +import time + +import boto3 +from cc_common.config import config, logger +from cc_common.exceptions import CCInternalException, CCInvalidRequestException +from opensearchpy import AWSV4SignerAuth, OpenSearch, RequestsHttpConnection +from opensearchpy.exceptions import ConnectionTimeout, RequestError, TransportError + +# Retry configuration for operations +MAX_RETRY_ATTEMPTS = 5 +INITIAL_BACKOFF_SECONDS = 2 +MAX_BACKOFF_SECONDS = 32 + +DEFAULT_TIMEOUT = 30 + + +class OpenSearchClient: + def __init__(self, timeout: int = DEFAULT_TIMEOUT): + lambda_credentials = boto3.Session().get_credentials() + auth = AWSV4SignerAuth(credentials=lambda_credentials, region=config.environment_region, service='es') + self._client = OpenSearch( + hosts=[{'host': config.opensearch_host_endpoint, 'port': 443}], + http_auth=auth, + use_ssl=True, + verify_certs=True, + connection_class=RequestsHttpConnection, + timeout=timeout, + pool_maxsize=20, + ) + + def create_index(self, index_name: str, index_mapping: dict) -> None: + """ + Create an index with the specified mapping. + + :param index_name: The name of the index to create + :param index_mapping: The index configuration including settings and mappings + :raises CCInternalException: If all retry attempts fail + """ + self._execute_with_retry( + operation=lambda: self._client.indices.create(index=index_name, body=index_mapping), + operation_name=f'create_index({index_name})', + ) + + def index_exists(self, index_name: str) -> bool: + """ + Check if an index exists. + + :param index_name: The name of the index to check + :return: True if the index exists, False otherwise + :raises CCInternalException: If all retry attempts fail + """ + return self._execute_with_retry( + operation=lambda: self._client.indices.exists(index=index_name), + operation_name=f'index_exists({index_name})', + ) + + def alias_exists(self, alias_name: str) -> bool: + """ + Check if an alias exists. + + :param alias_name: The name of the alias to check + :return: True if the alias exists, False otherwise + :raises CCInternalException: If all retry attempts fail + """ + return self._execute_with_retry( + operation=lambda: self._client.indices.exists_alias(name=alias_name), + operation_name=f'alias_exists({alias_name})', + ) + + def create_alias(self, index_name: str, alias_name: str) -> None: + """ + Create an alias pointing to the specified index. + + :param index_name: The index to create the alias for + :param alias_name: The name of the alias to create + :raises CCInternalException: If all retry attempts fail + """ + self._execute_with_retry( + operation=lambda: self._client.indices.put_alias(index=index_name, name=alias_name), + operation_name=f'create_alias({alias_name} -> {index_name})', + ) + + def cluster_health(self) -> dict: + """ + Get the cluster health status. + + Implements retry logic with exponential backoff for transient connection issues. + This is useful for checking if the cluster is responsive, especially after + a new domain is created. + + :return: The cluster health response from OpenSearch + :raises CCInternalException: If all retry attempts fail + """ + return self._execute_with_retry( + operation=lambda: self._client.cluster.health(), + operation_name='cluster_health', + ) + + def _execute_with_retry(self, operation: callable, operation_name: str): + """ + Execute an operation with retry logic and exponential backoff. + + This handles transient connection issues that can occur when: + - OpenSearch domain was just created and is still warming up + - Network connectivity issues within the VPC + - Temporary high load on the OpenSearch cluster + + :param operation: A callable that performs the operation + :param operation_name: A descriptive name for the operation (for logging) + :return: The result of the operation + :raises CCInternalException: If all retry attempts fail + """ + last_exception = None + backoff_seconds = INITIAL_BACKOFF_SECONDS + + for attempt in range(1, MAX_RETRY_ATTEMPTS + 1): + try: + return operation() + except (ConnectionTimeout, TransportError) as e: + last_exception = e + if attempt < MAX_RETRY_ATTEMPTS: + logger.warning( + 'Operation failed, retrying with backoff', + operation=operation_name, + attempt=attempt, + max_attempts=MAX_RETRY_ATTEMPTS, + backoff_seconds=backoff_seconds, + error=str(e), + ) + time.sleep(backoff_seconds) + # Exponential backoff with cap + backoff_seconds = min(backoff_seconds * 2, MAX_BACKOFF_SECONDS) + else: + logger.error( + 'Operation failed after max retry attempts', + operation=operation_name, + attempts=MAX_RETRY_ATTEMPTS, + error=str(e), + ) + + # All retry attempts failed + raise CCInternalException( + f'{operation_name} failed after {MAX_RETRY_ATTEMPTS} attempts. Last error: {last_exception}' + ) + + def search(self, index_name: str, body: dict) -> dict: + """ + Execute a search query against the specified index. + + :param index_name: The name of the index to search + :param body: The OpenSearch query body + :return: The search response from OpenSearch + :raises CCInvalidRequestException: If the query is invalid (400 error) or times out + """ + try: + return self._client.search(index=index_name, body=body) + except ConnectionTimeout as e: + logger.warning( + 'OpenSearch search request timed out', + index_name=index_name, + error=str(e), + ) + # We are returning this as an invalid request exception so the UI client picks it up as + # a 400 and displays the message to the client + raise CCInvalidRequestException( + 'Search request timed out. Please try again or narrow your search criteria.' + ) from e + except RequestError as e: + if e.status_code == 400: + # Extract the error message from the RequestError + error_message = self._extract_opensearch_error_reason(e) + logger.warning( + 'OpenSearch search request failed', + index_name=index_name, + status_code=e.status_code, + error_message=error_message, + ) + raise CCInvalidRequestException(f'Invalid search query: {error_message}') from e + # Re-raise non-400 RequestErrors + raise + + @staticmethod + def _extract_opensearch_error_reason(e: RequestError) -> str: + """ + Extract a human-readable error reason from an OpenSearch RequestError. + + The error info structure is typically: + {"error": {"root_cause": [{"type": "...", "reason": "..."}], ...}, "status": 400} + + :param e: The RequestError exception + :return: The extracted error reason, or a fallback string representation + """ + if not e.info: + return str(e.error) + + try: + # Navigate to error.root_cause[0].reason + root_causes = e.info.get('error', {}).get('root_cause', []) + if root_causes and isinstance(root_causes, list) and len(root_causes) > 0: + reason = root_causes[0].get('reason') + if reason: + return str(reason) + except (AttributeError, TypeError, KeyError): + # If navigation fails, fall back to string representation + logger.warning( + 'Failed to extract error reason from OpenSearch RequestError', + error=str(e), + ) + return str(e.error) + + def bulk_index(self, index_name: str, documents: list[dict], id_field: str = 'providerId') -> dict: + """ + Bulk index multiple documents into the specified index. + + This method implements retry logic with exponential backoff to handle transient + connection issues (e.g., ConnectionTimeout, TransportError). If all retry attempts + fail, a CCInternalException is raised to signal the caller to handle the failure. + + :param index_name: The name of the index to write to + :param documents: List of documents to index + :param id_field: The field name to use as the document ID (default: 'providerId') + :return: The bulk response from OpenSearch + :raises CCInternalException: If all retry attempts fail due to connection issues + """ + if not documents: + return {'items': [], 'errors': False} + + actions = [] + for doc in documents: + actions.append({'index': {'_id': doc[id_field]}}) + actions.append(doc) + + return self._bulk_index_with_retry(actions=actions, index_name=index_name, document_count=len(documents)) + + def bulk_delete(self, index_name: str, document_ids: list[str]) -> set[str]: + """ + Bulk delete multiple documents from the specified index. + + This method implements retry logic with exponential backoff to handle transient + connection issues (e.g., ConnectionTimeout, TransportError). If all retry attempts + fail, a CCInternalException is raised to signal the caller to handle the failure. + + :param index_name: The name of the index to delete from + :param document_ids: List of document IDs to delete + :return: A list of document ids that failed to delete (if any) + :raises CCInternalException: If all retry attempts fail due to connection issues + """ + failed_document_ids = set() + if not document_ids: + return failed_document_ids + + actions = [] + for doc_id in document_ids: + actions.append({'delete': {'_id': doc_id}}) + + response = self._bulk_operation_with_retry( + actions=actions, index_name=index_name, operation_count=len(document_ids), operation_type='delete' + ) + + # Check for individual delete failures + if response.get('errors'): + for item in response.get('items', []): + delete_result = item.get('delete', {}) + if delete_result.get('error'): + doc_id = delete_result.get('_id') + # 404 (not_found) is not an error for delete - the document was already gone + if delete_result.get('status') != 404: + logger.error( + 'Document deletion failed', + provider_id=doc_id, + error=delete_result.get('error'), + ) + failed_document_ids.add(doc_id) + + return failed_document_ids + + def _bulk_index_with_retry(self, actions: list, index_name: str, document_count: int) -> dict: + """ + Execute bulk index with retry logic and exponential backoff. + + :param actions: The bulk actions to execute + :param index_name: The name of the index to write to + :param document_count: Number of documents being indexed (for logging) + :return: The bulk response from OpenSearch + :raises CCInternalException: If all retry attempts fail + """ + return self._bulk_operation_with_retry( + actions=actions, index_name=index_name, operation_count=document_count, operation_type='index' + ) + + def _bulk_operation_with_retry( + self, actions: list, index_name: str, operation_count: int, operation_type: str + ) -> dict: + """ + Execute bulk operation with retry logic and exponential backoff. + + :param actions: The bulk actions to execute + :param index_name: The name of the index to operate on + :param operation_count: Number of operations being performed (for logging) + :param operation_type: Type of operation ('index' or 'delete') for logging + :return: The bulk response from OpenSearch + :raises CCInternalException: If all retry attempts fail + """ + last_exception = None + backoff_seconds = INITIAL_BACKOFF_SECONDS + + for attempt in range(1, MAX_RETRY_ATTEMPTS + 1): + try: + return self._client.bulk(body=actions, index=index_name) + except (ConnectionTimeout, TransportError) as e: + last_exception = e + if attempt < MAX_RETRY_ATTEMPTS: + logger.warning( + f'Bulk {operation_type} attempt failed, retrying with backoff', + attempt=attempt, + max_attempts=MAX_RETRY_ATTEMPTS, + backoff_seconds=backoff_seconds, + index_name=index_name, + operation_count=operation_count, + error=str(e), + ) + time.sleep(backoff_seconds) + # Exponential backoff with cap + backoff_seconds = min(backoff_seconds * 2, MAX_BACKOFF_SECONDS) + else: + logger.error( + f'Bulk {operation_type} failed after max retry attempts', + attempts=MAX_RETRY_ATTEMPTS, + index_name=index_name, + operation_count=operation_count, + error=str(e), + ) + + # All retry attempts failed + raise CCInternalException( + f'Failed to bulk {operation_type} {operation_count} documents to {index_name} ' + f'after {MAX_RETRY_ATTEMPTS} attempts. Last error: {last_exception}' + ) diff --git a/backend/compact-connect/lambdas/python/search/requirements-dev.in b/backend/compact-connect/lambdas/python/search/requirements-dev.in new file mode 100644 index 000000000..e0c3124af --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/requirements-dev.in @@ -0,0 +1 @@ +moto[dynamodb]>=5.0.12, <6 diff --git a/backend/compact-connect/lambdas/python/search/requirements-dev.txt b/backend/compact-connect/lambdas/python/search/requirements-dev.txt new file mode 100644 index 000000000..1d8dccf31 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/requirements-dev.txt @@ -0,0 +1,68 @@ +# +# This file is autogenerated by pip-compile with Python 3.14 +# by the following command: +# +# pip-compile --no-emit-index-url --no-strip-extras lambdas/python/search/requirements-dev.in +# +boto3==1.42.11 + # via moto +botocore==1.42.11 + # via + # boto3 + # moto + # s3transfer +certifi==2025.11.12 + # via requests +cffi==2.0.0 + # via cryptography +charset-normalizer==3.4.4 + # via requests +cryptography==46.0.3 + # via moto +docker==7.1.0 + # via moto +idna==3.11 + # via requests +jinja2==3.1.6 + # via moto +jmespath==1.0.1 + # via + # boto3 + # botocore +markupsafe==3.0.3 + # via + # jinja2 + # werkzeug +moto[dynamodb]==5.1.18 + # via -r lambdas/python/search/requirements-dev.in +py-partiql-parser==0.6.3 + # via moto +pycparser==2.23 + # via cffi +python-dateutil==2.9.0.post0 + # via + # botocore + # moto +pyyaml==6.0.3 + # via responses +requests==2.32.5 + # via + # docker + # moto + # responses +responses==0.25.8 + # via moto +s3transfer==0.16.0 + # via boto3 +six==1.17.0 + # via python-dateutil +urllib3==2.6.2 + # via + # botocore + # docker + # requests + # responses +werkzeug==3.1.4 + # via moto +xmltodict==1.0.2 + # via moto diff --git a/backend/compact-connect/lambdas/python/search/requirements.in b/backend/compact-connect/lambdas/python/search/requirements.in new file mode 100644 index 000000000..0c9e7c499 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/requirements.in @@ -0,0 +1,2 @@ +# common requirements are managed in the common requirements.in file +opensearch-py>=3.1.0, <4.0.0 diff --git a/backend/compact-connect/lambdas/python/search/requirements.txt b/backend/compact-connect/lambdas/python/search/requirements.txt new file mode 100644 index 000000000..2352dd4f0 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/requirements.txt @@ -0,0 +1,36 @@ +# +# This file is autogenerated by pip-compile with Python 3.14 +# by the following command: +# +# pip-compile --no-emit-index-url --no-strip-extras lambdas/python/search/requirements.in +# +certifi==2025.11.12 + # via + # opensearch-py + # requests +charset-normalizer==3.4.4 + # via requests +events==0.5 + # via opensearch-py +grpcio==1.76.0 + # via opensearch-protobufs +idna==3.11 + # via requests +opensearch-protobufs==0.19.0 + # via opensearch-py +opensearch-py==3.1.0 + # via -r lambdas/python/search/requirements.in +protobuf==6.33.2 + # via opensearch-protobufs +python-dateutil==2.9.0.post0 + # via opensearch-py +requests==2.32.5 + # via opensearch-py +six==1.17.0 + # via python-dateutil +typing-extensions==4.15.0 + # via grpcio +urllib3==2.6.2 + # via + # opensearch-py + # requests diff --git a/backend/compact-connect/lambdas/python/search/tests/__init__.py b/backend/compact-connect/lambdas/python/search/tests/__init__.py new file mode 100644 index 000000000..d00640792 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/tests/__init__.py @@ -0,0 +1,106 @@ +import json +import os +from unittest import TestCase +from unittest.mock import MagicMock + +from aws_lambda_powertools.utilities.typing import LambdaContext + + +class TstLambdas(TestCase): + @classmethod + def setUpClass(cls): + os.environ.update( + { + # Set to 'true' to enable debug logging + 'DEBUG': 'true', + 'ALLOWED_ORIGINS': '["https://example.org"]', + 'AWS_DEFAULT_REGION': 'us-east-1', + 'AWS_REGION': 'us-east-1', + 'ENVIRONMENT_NAME': 'test', + 'COMPACTS': '["aslp", "octp", "coun"]', + 'PROVIDER_TABLE_NAME': 'provider-table', + 'PROV_DATE_OF_UPDATE_INDEX_NAME': 'providerDateOfUpdate', + 'PROV_FAM_GIV_MID_INDEX_NAME': 'providerFamGivMid', + 'LICENSE_GSI_NAME': 'licenseGSI', + 'LICENSE_UPLOAD_DATE_INDEX_NAME': 'licenseUploadDateGSI', + 'OPENSEARCH_HOST_ENDPOINT': 'vpc-providersearchd-5bzuqxhpxffk-w6dkpddu.us-east-1.es.amazonaws.com', + 'EXPORT_RESULTS_BUCKET_NAME': 'test-export-results-bucket', + 'JURISDICTIONS': json.dumps( + [ + 'al', + 'ak', + 'az', + 'ar', + 'ca', + 'co', + 'ct', + 'de', + 'dc', + 'fl', + 'ga', + 'hi', + 'id', + 'il', + 'in', + 'ia', + 'ks', + 'ky', + 'la', + 'me', + 'md', + 'ma', + 'mi', + 'mn', + 'ms', + 'mo', + 'mt', + 'ne', + 'nv', + 'nh', + 'nj', + 'nm', + 'ny', + 'nc', + 'nd', + 'oh', + 'ok', + 'or', + 'pa', + 'pr', + 'ri', + 'sc', + 'sd', + 'tn', + 'tx', + 'ut', + 'vt', + 'va', + 'vi', + 'wa', + 'wv', + 'wi', + 'wy', + ] + ), + 'LICENSE_TYPES': json.dumps( + { + 'aslp': [ + {'name': 'audiologist', 'abbreviation': 'aud'}, + {'name': 'speech-language pathologist', 'abbreviation': 'slp'}, + ], + 'octp': [ + {'name': 'occupational therapist', 'abbreviation': 'ot'}, + {'name': 'occupational therapy assistant', 'abbreviation': 'ota'}, + ], + 'coun': [{'name': 'licensed professional counselor', 'abbreviation': 'lpc'}], + }, + ), + }, + ) + # Monkey-patch config object to be sure we have it based + # on the env vars we set above + import cc_common.config + + cls.config = cc_common.config._Config() # noqa: SLF001 protected-access + cc_common.config.config = cls.config + cls.mock_context = MagicMock(name='MockLambdaContext', spec=LambdaContext) diff --git a/backend/compact-connect/lambdas/python/search/tests/function/__init__.py b/backend/compact-connect/lambdas/python/search/tests/function/__init__.py new file mode 100644 index 000000000..3d3f139f9 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/tests/function/__init__.py @@ -0,0 +1,93 @@ +import os + +import boto3 +from moto import mock_aws + +from tests import TstLambdas + + +@mock_aws +class TstFunction(TstLambdas): + """Base class to set up Moto mocking and create mock AWS resources for functional testing""" + + def setUp(self): # noqa: N801 invalid-name + super().setUp() + # we want to see any diffs in failed tests, regardless of how large the object is + self.maxDiff = None + + self.build_resources() + # This must be imported within the tests, since they import modules which require + # environment variables that are not set until the TstLambdas class is initialized + from common_test.test_data_generator import TestDataGenerator + + self.test_data_generator = TestDataGenerator + + self.addCleanup(self.delete_resources) + + def build_resources(self): + self.create_provider_table() + self.create_export_results_bucket() + + def delete_resources(self): + self._provider_table.delete() + # must delete all objects in the bucket before deleting the bucket + self._bucket.objects.delete() + self._bucket.delete() + + def create_export_results_bucket(self): + """Create the mock S3 bucket for export results""" + self._bucket = boto3.resource('s3').create_bucket(Bucket=os.environ['EXPORT_RESULTS_BUCKET_NAME']) + + def create_provider_table(self): + self._provider_table = boto3.resource('dynamodb').create_table( + AttributeDefinitions=[ + {'AttributeName': 'pk', 'AttributeType': 'S'}, + {'AttributeName': 'sk', 'AttributeType': 'S'}, + {'AttributeName': 'providerFamGivMid', 'AttributeType': 'S'}, + {'AttributeName': 'providerDateOfUpdate', 'AttributeType': 'S'}, + {'AttributeName': 'licenseGSIPK', 'AttributeType': 'S'}, + {'AttributeName': 'licenseGSISK', 'AttributeType': 'S'}, + {'AttributeName': 'licenseUploadDateGSIPK', 'AttributeType': 'S'}, + {'AttributeName': 'licenseUploadDateGSISK', 'AttributeType': 'S'}, + ], + TableName=os.environ['PROVIDER_TABLE_NAME'], + KeySchema=[{'AttributeName': 'pk', 'KeyType': 'HASH'}, {'AttributeName': 'sk', 'KeyType': 'RANGE'}], + BillingMode='PAY_PER_REQUEST', + GlobalSecondaryIndexes=[ + { + 'IndexName': os.environ['PROV_FAM_GIV_MID_INDEX_NAME'], + 'KeySchema': [ + {'AttributeName': 'sk', 'KeyType': 'HASH'}, + {'AttributeName': 'providerFamGivMid', 'KeyType': 'RANGE'}, + ], + 'Projection': {'ProjectionType': 'ALL'}, + }, + { + 'IndexName': os.environ['PROV_DATE_OF_UPDATE_INDEX_NAME'], + 'KeySchema': [ + {'AttributeName': 'sk', 'KeyType': 'HASH'}, + {'AttributeName': 'providerDateOfUpdate', 'KeyType': 'RANGE'}, + ], + 'Projection': {'ProjectionType': 'ALL'}, + }, + { + 'IndexName': os.environ['LICENSE_GSI_NAME'], + 'KeySchema': [ + {'AttributeName': 'licenseGSIPK', 'KeyType': 'HASH'}, + {'AttributeName': 'licenseGSISK', 'KeyType': 'RANGE'}, + ], + 'Projection': {'ProjectionType': 'ALL'}, + }, + { + 'IndexName': os.environ['LICENSE_UPLOAD_DATE_INDEX_NAME'], + 'KeySchema': [ + {'AttributeName': 'licenseUploadDateGSIPK', 'KeyType': 'HASH'}, + {'AttributeName': 'licenseUploadDateGSISK', 'KeyType': 'RANGE'}, + ], + 'Projection': { + 'ProjectionType': 'INCLUDE', + 'NonKeyAttributes': ['providerId'], + }, + }, + ], + ) diff --git a/backend/compact-connect/lambdas/python/search/tests/function/test_manage_opensearch_indices.py b/backend/compact-connect/lambdas/python/search/tests/function/test_manage_opensearch_indices.py new file mode 100644 index 000000000..2ae84681d --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/tests/function/test_manage_opensearch_indices.py @@ -0,0 +1,559 @@ +from unittest.mock import Mock, call, patch + +from moto import mock_aws + +from . import TstFunction + + +@mock_aws +class TestOpenSearchIndexManager(TstFunction): + """Test suite for OpenSearchIndexManager custom resource.""" + + def setUp(self): + super().setUp() + + def _create_event(self, request_type: str, properties: dict = None) -> dict: + """Create a CloudFormation custom resource event.""" + default_properties = { + 'numberOfShards': 1, + 'numberOfReplicas': 0, + } + if properties: + default_properties.update(properties) + return { + 'RequestType': request_type, + 'ResourceProperties': default_properties, + } + + def _when_testing_mock_opensearch_client( + self, + mock_opensearch_client, + alias_exists_return_value: bool | dict = False, + index_exists_return_value: bool | dict = False, + ): + """ + Configure the mock OpenSearchClient for testing. + + :param mock_opensearch_client: The patched OpenSearchClient class + :param alias_exists_return_value: Either a boolean (applied to all aliases) + or a dict mapping alias names to booleans + :param index_exists_return_value: Either a boolean (applied to all indices) + or a dict mapping index names to booleans + :return: The mock client instance + """ + mock_client_instance = Mock() + mock_opensearch_client.return_value = mock_client_instance + + # Configure cluster_health mock (used by _wait_for_domain_ready) + mock_client_instance.cluster_health.return_value = { + 'status': 'green', + 'number_of_nodes': 1, + 'cluster_name': 'test-cluster', + } + + # Configure alias_exists mock + if isinstance(alias_exists_return_value, dict): + mock_client_instance.alias_exists.side_effect = lambda alias_name: alias_exists_return_value.get( + alias_name, False + ) + else: + mock_client_instance.alias_exists.return_value = alias_exists_return_value + + # Configure index_exists mock + if isinstance(index_exists_return_value, dict): + mock_client_instance.index_exists.side_effect = lambda index_name: index_exists_return_value.get( + index_name, False + ) + else: + mock_client_instance.index_exists.return_value = index_exists_return_value + + return mock_client_instance + + @patch('handlers.manage_opensearch_indices.OpenSearchClient') + def test_on_create_creates_versioned_indices_and_aliases_for_all_compacts_when_none_exist( + self, mock_opensearch_client + ): + """Test that on_create creates versioned indices and aliases for all compacts when they don't exist.""" + from handlers.manage_opensearch_indices import on_event + + # Set up the mock opensearch client - no aliases or indices exist + mock_client_instance = self._when_testing_mock_opensearch_client( + mock_opensearch_client, alias_exists_return_value=False, index_exists_return_value=False + ) + + # Create the event for a 'Create' request with explicit shard/replica configuration + event = self._create_event('Create', {'numberOfShards': 2, 'numberOfReplicas': 1}) + + # Call the handler + on_event(event, self.mock_context) + + # Assert that the OpenSearchClient was instantiated + mock_opensearch_client.assert_called_once() + + # Assert that alias_exists was called for each compact + expected_alias_exists_calls = [ + call('compact_aslp_providers'), + call('compact_octp_providers'), + call('compact_coun_providers'), + ] + mock_client_instance.alias_exists.assert_has_calls(expected_alias_exists_calls, any_order=False) + self.assertEqual(3, mock_client_instance.alias_exists.call_count) + + # Assert that create_index was called for each compact with versioned names + self.assertEqual(3, mock_client_instance.create_index.call_count) + + # Verify the versioned index names in create_index calls + create_index_calls = mock_client_instance.create_index.call_args_list + index_names_created = [call_args[0][0] for call_args in create_index_calls] + self.assertEqual( + ['compact_aslp_providers_v1', 'compact_octp_providers_v1', 'compact_coun_providers_v1'], + index_names_created, + ) + + # Assert that create_alias was called for each compact + self.assertEqual(3, mock_client_instance.create_alias.call_count) + expected_alias_calls = [ + call('compact_aslp_providers_v1', 'compact_aslp_providers'), + call('compact_octp_providers_v1', 'compact_octp_providers'), + call('compact_coun_providers_v1', 'compact_coun_providers'), + ] + mock_client_instance.create_alias.assert_has_calls(expected_alias_calls, any_order=False) + + # Verify the mapping was passed to create_index with correct shard/replica configuration + for call_args in create_index_calls: + index_mapping = call_args[0][1] + # Verify the index settings use the provided shard/replica values + self.assertEqual(2, index_mapping['settings']['index']['number_of_shards']) + self.assertEqual(1, index_mapping['settings']['index']['number_of_replicas']) + # Verify the mapping has the expected structure + self.assertEqual( + { + 'mappings': { + 'properties': { + 'birthMonthDay': {'type': 'keyword'}, + 'compact': {'type': 'keyword'}, + 'compactConnectRegisteredEmailAddress': {'type': 'keyword'}, + 'compactEligibility': {'type': 'keyword'}, + 'currentHomeJurisdiction': {'type': 'keyword'}, + 'dateOfExpiration': {'type': 'date'}, + 'dateOfUpdate': {'type': 'date'}, + 'familyName': { + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}}, + 'type': 'text', + }, + 'givenName': { + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}}, + 'type': 'text', + }, + 'jurisdictionUploadedCompactEligibility': {'type': 'keyword'}, + 'jurisdictionUploadedLicenseStatus': {'type': 'keyword'}, + 'licenseJurisdiction': {'type': 'keyword'}, + 'licenseStatus': {'type': 'keyword'}, + 'licenses': { + 'properties': { + 'adverseActions': { + 'properties': { + 'actionAgainst': {'type': 'keyword'}, + 'adverseActionId': {'type': 'keyword'}, + 'clinicalPrivilegeActionCategories': {'type': 'keyword'}, + 'clinicalPrivilegeActionCategory': {'type': 'keyword'}, + 'compact': {'type': 'keyword'}, + 'creationDate': {'type': 'date'}, + 'dateOfUpdate': {'type': 'date'}, + 'effectiveLiftDate': {'type': 'date'}, + 'effectiveStartDate': {'type': 'date'}, + 'encumbranceType': {'type': 'keyword'}, + 'jurisdiction': {'type': 'keyword'}, + 'licenseType': {'type': 'keyword'}, + 'licenseTypeAbbreviation': {'type': 'keyword'}, + 'liftingUser': {'type': 'keyword'}, + 'providerId': {'type': 'keyword'}, + 'submittingUser': {'type': 'keyword'}, + 'type': {'type': 'keyword'}, + }, + 'type': 'nested', + }, + 'compact': {'type': 'keyword'}, + 'compactEligibility': {'type': 'keyword'}, + 'dateOfExpiration': {'type': 'date'}, + 'dateOfIssuance': {'type': 'date'}, + 'dateOfRenewal': {'type': 'date'}, + 'dateOfUpdate': {'type': 'date'}, + 'emailAddress': {'type': 'keyword'}, + 'familyName': { + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}}, + 'type': 'text', + }, + 'givenName': { + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}}, + 'type': 'text', + }, + 'homeAddressCity': { + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}}, + 'type': 'text', + }, + 'homeAddressPostalCode': {'type': 'keyword'}, + 'homeAddressState': {'type': 'keyword'}, + 'homeAddressStreet1': {'type': 'text'}, + 'homeAddressStreet2': {'type': 'text'}, + 'investigationStatus': {'type': 'keyword'}, + 'investigations': { + 'properties': { + 'compact': {'type': 'keyword'}, + 'dateOfUpdate': {'type': 'date'}, + 'investigationId': {'type': 'keyword'}, + 'jurisdiction': {'type': 'keyword'}, + 'licenseType': {'type': 'keyword'}, + 'status': {'type': 'keyword'}, + 'type': {'type': 'keyword'}, + }, + 'type': 'nested', + }, + 'jurisdiction': {'type': 'keyword'}, + 'jurisdictionUploadedCompactEligibility': {'type': 'keyword'}, + 'jurisdictionUploadedLicenseStatus': {'type': 'keyword'}, + 'licenseNumber': {'type': 'keyword'}, + 'licenseStatus': {'type': 'keyword'}, + 'licenseStatusName': {'type': 'keyword'}, + 'licenseType': {'type': 'keyword'}, + 'middleName': { + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}}, + 'type': 'text', + }, + 'npi': {'type': 'keyword'}, + 'phoneNumber': {'type': 'keyword'}, + 'providerId': {'type': 'keyword'}, + 'suffix': {'type': 'keyword'}, + 'type': {'type': 'keyword'}, + }, + 'type': 'nested', + }, + 'middleName': { + 'analyzer': 'custom_ascii_analyzer', + 'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}}, + 'type': 'text', + }, + 'militaryAffiliations': { + 'properties': { + 'affiliationType': {'type': 'keyword'}, + 'compact': {'type': 'keyword'}, + 'dateOfUpdate': {'type': 'date'}, + 'dateOfUpload': {'type': 'date'}, + 'fileNames': {'type': 'keyword'}, + 'providerId': {'type': 'keyword'}, + 'status': {'type': 'keyword'}, + 'type': {'type': 'keyword'}, + }, + 'type': 'nested', + }, + 'militaryStatus': {'type': 'keyword'}, + 'militaryStatusNote': {'type': 'text'}, + 'npi': {'type': 'keyword'}, + 'privilegeJurisdictions': {'type': 'keyword'}, + 'privileges': { + 'properties': { + 'activeSince': {'type': 'date'}, + 'administratorSetStatus': {'type': 'keyword'}, + 'adverseActions': { + 'properties': { + 'actionAgainst': {'type': 'keyword'}, + 'adverseActionId': {'type': 'keyword'}, + 'clinicalPrivilegeActionCategories': {'type': 'keyword'}, + 'clinicalPrivilegeActionCategory': {'type': 'keyword'}, + 'compact': {'type': 'keyword'}, + 'creationDate': {'type': 'date'}, + 'dateOfUpdate': {'type': 'date'}, + 'effectiveLiftDate': {'type': 'date'}, + 'effectiveStartDate': {'type': 'date'}, + 'encumbranceType': {'type': 'keyword'}, + 'jurisdiction': {'type': 'keyword'}, + 'licenseType': {'type': 'keyword'}, + 'licenseTypeAbbreviation': {'type': 'keyword'}, + 'liftingUser': {'type': 'keyword'}, + 'providerId': {'type': 'keyword'}, + 'submittingUser': {'type': 'keyword'}, + 'type': {'type': 'keyword'}, + }, + 'type': 'nested', + }, + 'attestations': { + 'properties': { + 'attestationId': {'type': 'keyword'}, + 'version': {'type': 'keyword'}, + }, + 'type': 'nested', + }, + 'compact': {'type': 'keyword'}, + 'compactTransactionId': {'type': 'keyword'}, + 'dateOfExpiration': {'type': 'date'}, + 'dateOfIssuance': {'type': 'date'}, + 'dateOfRenewal': {'type': 'date'}, + 'dateOfUpdate': {'type': 'date'}, + 'investigationStatus': {'type': 'keyword'}, + 'investigations': { + 'properties': { + 'compact': {'type': 'keyword'}, + 'dateOfUpdate': {'type': 'date'}, + 'investigationId': {'type': 'keyword'}, + 'jurisdiction': {'type': 'keyword'}, + 'licenseType': {'type': 'keyword'}, + 'status': {'type': 'keyword'}, + 'type': {'type': 'keyword'}, + }, + 'type': 'nested', + }, + 'jurisdiction': {'type': 'keyword'}, + 'licenseJurisdiction': {'type': 'keyword'}, + 'licenseType': {'type': 'keyword'}, + 'privilegeId': {'type': 'keyword'}, + 'providerId': {'type': 'keyword'}, + 'status': {'type': 'keyword'}, + 'type': {'type': 'keyword'}, + }, + 'type': 'nested', + }, + 'providerDateOfUpdate': {'type': 'date'}, + 'providerFamGivMid': {'type': 'keyword'}, + 'providerId': {'type': 'keyword'}, + 'suffix': {'type': 'keyword'}, + 'type': {'type': 'keyword'}, + } + }, + 'settings': { + 'analysis': { + 'analyzer': { + 'custom_ascii_analyzer': { + 'filter': ['lowercase', 'custom_ascii_folding'], + 'tokenizer': 'standard', + 'type': 'custom', + } + }, + 'filter': {'custom_ascii_folding': {'preserve_original': True, 'type': 'asciifolding'}}, + }, + 'index': {'number_of_replicas': 1, 'number_of_shards': 2}, + }, + }, + index_mapping, + ) + + @patch('handlers.manage_opensearch_indices.OpenSearchClient') + def test_on_create_skips_index_and_alias_creation_when_all_aliases_exist(self, mock_opensearch_client): + """Test that on_create skips index and alias creation when aliases already exist.""" + from handlers.manage_opensearch_indices import on_event + + # Set up the mock opensearch client - all aliases exist (meaning indices are already set up) + mock_client_instance = self._when_testing_mock_opensearch_client( + mock_opensearch_client, alias_exists_return_value=True + ) + + # Create the event for a 'Create' request + event = self._create_event('Create') + + # Call the handler + on_event(event, self.mock_context) + + # Assert that the OpenSearchClient was instantiated + mock_opensearch_client.assert_called_once() + + # Assert that alias_exists was called for each compact + self.assertEqual(3, mock_client_instance.alias_exists.call_count) + + # Assert that index_exists was NOT called since aliases already exist + mock_client_instance.index_exists.assert_not_called() + + # Assert that create_index was NOT called since aliases already exist + mock_client_instance.create_index.assert_not_called() + + # Assert that create_alias was NOT called since aliases already exist + mock_client_instance.create_alias.assert_not_called() + + @patch('handlers.manage_opensearch_indices.OpenSearchClient') + def test_on_create_only_creates_missing_indices_and_aliases(self, mock_opensearch_client): + """Test that on_create only creates indices and aliases that don't exist.""" + from handlers.manage_opensearch_indices import on_event + + # Set up the mock opensearch client - only aslp alias exists + mock_client_instance = self._when_testing_mock_opensearch_client( + mock_opensearch_client, + alias_exists_return_value={ + 'compact_aslp_providers': True, + 'compact_octp_providers': False, + 'compact_coun_providers': False, + }, + index_exists_return_value=False, + ) + + # Create the event for a 'Create' request + event = self._create_event('Create') + + # Call the handler + on_event(event, self.mock_context) + + # Assert that alias_exists was called for each compact + self.assertEqual(3, mock_client_instance.alias_exists.call_count) + + # Assert that index_exists was called only for missing aliases (octp and coun) + self.assertEqual(2, mock_client_instance.index_exists.call_count) + + # Assert that create_index was called only for missing indices (octp and coun) + self.assertEqual(2, mock_client_instance.create_index.call_count) + + # Verify the correct versioned indices were created + create_index_calls = mock_client_instance.create_index.call_args_list + index_names_created = [call_args[0][0] for call_args in create_index_calls] + self.assertEqual(['compact_octp_providers_v1', 'compact_coun_providers_v1'], index_names_created) + + # Assert that create_alias was called only for missing aliases (octp and coun) + self.assertEqual(2, mock_client_instance.create_alias.call_count) + expected_alias_calls = [ + call('compact_octp_providers_v1', 'compact_octp_providers'), + call('compact_coun_providers_v1', 'compact_coun_providers'), + ] + mock_client_instance.create_alias.assert_has_calls(expected_alias_calls, any_order=False) + + @patch('handlers.manage_opensearch_indices.OpenSearchClient') + def test_on_create_creates_alias_only_when_index_exists_but_alias_does_not(self, mock_opensearch_client): + """Test that on_create creates only the alias when the index exists but the alias doesn't.""" + from handlers.manage_opensearch_indices import on_event + + # Set up the mock opensearch client - index exists but alias doesn't (edge case) + mock_client_instance = self._when_testing_mock_opensearch_client( + mock_opensearch_client, + alias_exists_return_value=False, + index_exists_return_value={ + 'compact_aslp_providers_v1': True, + 'compact_octp_providers_v1': True, + 'compact_coun_providers_v1': True, + }, + ) + + # Create the event for a 'Create' request + event = self._create_event('Create') + + # Call the handler + on_event(event, self.mock_context) + + # Assert that alias_exists was called for each compact + self.assertEqual(3, mock_client_instance.alias_exists.call_count) + + # Assert that index_exists was called for each compact + self.assertEqual(3, mock_client_instance.index_exists.call_count) + + # Assert that create_index was NOT called since indices already exist + mock_client_instance.create_index.assert_not_called() + + # Assert that create_alias was called for each compact (to create the missing aliases) + self.assertEqual(3, mock_client_instance.create_alias.call_count) + expected_alias_calls = [ + call('compact_aslp_providers_v1', 'compact_aslp_providers'), + call('compact_octp_providers_v1', 'compact_octp_providers'), + call('compact_coun_providers_v1', 'compact_coun_providers'), + ] + mock_client_instance.create_alias.assert_has_calls(expected_alias_calls, any_order=False) + + @patch('handlers.manage_opensearch_indices.OpenSearchClient') + def test_on_update_is_noop(self, mock_opensearch_client): + """Test that on_update does not create or modify indices.""" + from handlers.manage_opensearch_indices import on_event + + # Create the event for an 'Update' request + event = self._create_event('Update') + + # Call the handler + result = on_event(event, self.mock_context) + + # Assert that the OpenSearchClient was NOT instantiated + mock_opensearch_client.assert_not_called() + + # Result should be None (no-op) + self.assertIsNone(result) + + @patch('handlers.manage_opensearch_indices.OpenSearchClient') + def test_on_delete_is_noop(self, mock_opensearch_client): + """Test that on_delete does not delete indices.""" + from handlers.manage_opensearch_indices import on_event + + # Create the event for a 'Delete' request + event = self._create_event('Delete') + + # Call the handler + result = on_event(event, self.mock_context) + + # Assert that the OpenSearchClient was NOT instantiated + mock_opensearch_client.assert_not_called() + + # Result should be None (no-op) + self.assertIsNone(result) + + @patch('handlers.manage_opensearch_indices.time.sleep') + @patch('handlers.manage_opensearch_indices.OpenSearchClient') + def test_on_create_retries_when_domain_not_immediately_responsive(self, mock_opensearch_client, mock_sleep): + """Test that on_create retries connecting to the domain when it's not immediately responsive.""" + from cc_common.exceptions import CCInternalException + from handlers.manage_opensearch_indices import on_event + + # First two calls fail, third succeeds + mock_client_instance = Mock() + mock_client_instance.cluster_health.return_value = { + 'status': 'green', + 'number_of_nodes': 1, + } + mock_client_instance.alias_exists.return_value = True # Skip index creation for simplicity + + call_count = 0 + + def side_effect(): + nonlocal call_count + call_count += 1 + if call_count <= 2: + raise CCInternalException('cluster_health failed after 5 attempts. Last error: ConnectionTimeout') + return mock_client_instance + + mock_opensearch_client.side_effect = side_effect + + # Create the event for a 'Create' request + event = self._create_event('Create') + + # Call the handler + on_event(event, self.mock_context) + + # Assert that OpenSearchClient was instantiated 3 times (2 failures + 1 success) + self.assertEqual(3, mock_opensearch_client.call_count) + + # Assert that sleep was called twice (once between each retry) + self.assertEqual(2, mock_sleep.call_count) + + @patch('handlers.manage_opensearch_indices.time.sleep') + @patch('handlers.manage_opensearch_indices.OpenSearchClient') + def test_on_create_raises_after_max_retries(self, mock_opensearch_client, mock_sleep): # noqa ARG002 unused-argument + """Test that on_create raises CCInternalException after max retries are exhausted.""" + from cc_common.exceptions import CCInternalException + from handlers.manage_opensearch_indices import ( + DOMAIN_READINESS_MAX_ATTEMPTS, + on_event, + ) + + # All calls fail + mock_opensearch_client.side_effect = CCInternalException( + 'cluster_health failed after 5 attempts. Last error: ConnectionTimeout' + ) + + # Create the event for a 'Create' request + event = self._create_event('Create') + + # Call the handler and expect an exception + with self.assertRaises(CCInternalException) as context: + on_event(event, self.mock_context) + + # Verify the error message mentions the number of attempts + self.assertIn(str(DOMAIN_READINESS_MAX_ATTEMPTS), str(context.exception)) + self.assertIn('did not become responsive', str(context.exception)) + + # Assert that OpenSearchClient was instantiated max attempts times + self.assertEqual(DOMAIN_READINESS_MAX_ATTEMPTS, mock_opensearch_client.call_count) diff --git a/backend/compact-connect/lambdas/python/search/tests/function/test_populate_provider_documents.py b/backend/compact-connect/lambdas/python/search/tests/function/test_populate_provider_documents.py new file mode 100644 index 000000000..932f79cb3 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/tests/function/test_populate_provider_documents.py @@ -0,0 +1,348 @@ +from unittest.mock import MagicMock, Mock, call, patch + +from common_test.test_constants import ( + DEFAULT_LICENSE_EXPIRATION_DATE, + DEFAULT_LICENSE_ISSUANCE_DATE, + DEFAULT_LICENSE_RENEWAL_DATE, + DEFAULT_LICENSE_UPDATE_DATE_OF_UPDATE, + DEFAULT_PROVIDER_UPDATE_DATETIME, + DEFAULT_REGISTERED_EMAIL_ADDRESS, +) +from moto import mock_aws + +from . import TstFunction + +MOCK_ASLP_PROVIDER_ID = '00000000-0000-0000-0000-000000000001' +MOCK_OCTP_PROVIDER_ID = '00000000-0000-0000-0000-000000000002' +MOCK_COUN_PROVIDER_ID = '00000000-0000-0000-0000-000000000003' + +test_license_type_mapping = { + 'aslp': 'audiologist', + 'octp': 'occupational therapist', + 'coun': 'licensed professional counselor', +} +test_provider_id_mapping = { + 'aslp': MOCK_ASLP_PROVIDER_ID, + 'octp': MOCK_OCTP_PROVIDER_ID, + 'coun': MOCK_COUN_PROVIDER_ID, +} + + +@mock_aws +class TestPopulateProviderDocuments(TstFunction): + """Test suite for populate provider documents handler.""" + + def setUp(self): + super().setUp() + + def _put_test_provider_and_license_record_in_dynamodb_table(self, compact): + self.test_data_generator.put_default_provider_record_in_provider_table( + value_overrides={ + 'compact': compact, + 'providerId': test_provider_id_mapping[compact], + 'givenName': f'test{compact}GivenName', + 'familyName': f'test{compact}FamilyName', + }, + date_of_update_override=DEFAULT_PROVIDER_UPDATE_DATETIME, + ) + self.test_data_generator.put_default_license_record_in_provider_table( + value_overrides={ + 'compact': compact, + 'providerId': test_provider_id_mapping[compact], + 'givenName': f'test{compact}GivenName', + 'familyName': f'test{compact}FamilyName', + 'licenseType': test_license_type_mapping[compact], + }, + date_of_update_override=DEFAULT_LICENSE_UPDATE_DATE_OF_UPDATE, + ) + + def _put_failed_ingest_record_in_search_event_state_table( + self, compact: str, provider_id: str, sequence_number: str + ): + """Put a failed ingest record in the search event state table for testing.""" + import time + from datetime import timedelta + + pk = f'COMPACT#{compact}#FAILED_INGEST' + sk = f'PROVIDER#{provider_id}#SEQUENCE#{sequence_number}' + ttl = int(time.time()) + int(timedelta(days=7).total_seconds()) + + self.config.search_event_state_table.put_item( + Item={ + 'pk': pk, + 'sk': sk, + 'compact': compact, + 'providerId': provider_id, + 'sequenceNumber': sequence_number, + 'ttl': ttl, + } + ) + + def _when_testing_mock_opensearch_client(self, mock_opensearch_client, bulk_index_response: dict = None): + if not bulk_index_response: + bulk_index_response = {'items': [], 'errors': False} + + # Create a mock instance that will be returned by the OpenSearchClient constructor + mock_client_instance = Mock() + mock_opensearch_client.return_value = mock_client_instance + mock_client_instance.bulk_index.return_value = bulk_index_response + return mock_client_instance + + def _generate_expected_call_for_document(self, compact): + # Use timezone(timedelta(0), '+0000') to match how the code creates UTC timezone + return call( + index_name=f'compact_{compact}_providers', + documents=[ + { + 'providerId': test_provider_id_mapping[compact], + 'type': 'provider', + 'dateOfUpdate': DEFAULT_PROVIDER_UPDATE_DATETIME, + 'compact': compact, + 'licenseJurisdiction': 'oh', + 'currentHomeJurisdiction': 'oh', + 'licenseStatus': 'inactive', + 'compactEligibility': 'ineligible', + 'npi': '0608337260', + 'givenName': f'test{compact}GivenName', + 'middleName': 'Gunnar', + 'familyName': f'test{compact}FamilyName', + 'dateOfExpiration': DEFAULT_LICENSE_EXPIRATION_DATE, + 'compactConnectRegisteredEmailAddress': DEFAULT_REGISTERED_EMAIL_ADDRESS, + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'privilegeJurisdictions': ['ne'], + 'birthMonthDay': '06-06', + 'licenses': [ + { + 'providerId': test_provider_id_mapping[compact], + 'type': 'license', + 'dateOfUpdate': DEFAULT_LICENSE_UPDATE_DATE_OF_UPDATE, + 'compact': compact, + 'jurisdiction': 'oh', + 'licenseType': test_license_type_mapping[compact], + 'licenseStatusName': 'DEFINITELY_A_HUMAN', + 'licenseStatus': 'inactive', + 'jurisdictionUploadedLicenseStatus': 'active', + 'compactEligibility': 'ineligible', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'npi': '0608337260', + 'licenseNumber': 'A0608337260', + 'givenName': f'test{compact}GivenName', + 'middleName': 'Gunnar', + 'familyName': f'test{compact}FamilyName', + 'dateOfIssuance': DEFAULT_LICENSE_ISSUANCE_DATE, + 'dateOfRenewal': DEFAULT_LICENSE_RENEWAL_DATE, + 'dateOfExpiration': DEFAULT_LICENSE_EXPIRATION_DATE, + 'homeAddressStreet1': '123 A St.', + 'homeAddressStreet2': 'Apt 321', + 'homeAddressCity': 'Columbus', + 'homeAddressState': 'oh', + 'homeAddressPostalCode': '43004', + 'emailAddress': 'björk@example.com', + 'phoneNumber': '+13213214321', + 'adverseActions': [], + 'investigations': [], + } + ], + 'privileges': [], + 'militaryAffiliations': [], + } + ], + ) + + @patch('handlers.populate_provider_documents.OpenSearchClient') + def test_provider_records_from_all_three_compacts_are_indexed_in_expected_index(self, mock_opensearch_client): + from handlers.populate_provider_documents import TIME_THRESHOLD_MS, populate_provider_documents + + # Set up the mock opensearch client + mock_client_instance = self._when_testing_mock_opensearch_client(mock_opensearch_client) + + compacts = ['aslp', 'octp', 'coun'] + # add a provider and license record for each of the three compacts + for compact in compacts: + self._put_test_provider_and_license_record_in_dynamodb_table(compact) + + # mock the context to always return time above the cutoff threshold + mock_context = MagicMock() + mock_context.get_remaining_time_in_millis.return_value = TIME_THRESHOLD_MS + 60000 + + # now run the handler + result = populate_provider_documents({}, mock_context) + + # Assert that the OpenSearchClient was instantiated + mock_opensearch_client.assert_called_once() + + # Assert that bulk indexing was called once for each compact (3 times total) + self.assertEqual(3, mock_client_instance.bulk_index.call_count) + + # Get all calls to bulk_index and verify each compact was indexed + bulk_index_calls = mock_client_instance.bulk_index.call_args_list + self.assertEqual(self._generate_expected_call_for_document('aslp'), bulk_index_calls[0]) + self.assertEqual(self._generate_expected_call_for_document('octp'), bulk_index_calls[1]) + self.assertEqual(self._generate_expected_call_for_document('coun'), bulk_index_calls[2]) + + # Verify the result statistics + self.assertEqual( + { + 'compacts_processed': [ + {'compact': 'aslp', 'providers_failed': 0, 'providers_indexed': 1, 'providers_processed': 1}, + {'compact': 'octp', 'providers_failed': 0, 'providers_indexed': 1, 'providers_processed': 1}, + {'compact': 'coun', 'providers_failed': 0, 'providers_indexed': 1, 'providers_processed': 1}, + ], + 'completed': True, + 'errors': [], + 'total_providers_failed': 0, + 'total_providers_indexed': 3, + 'total_providers_processed': 3, + }, + result, + ) + + @patch('handlers.populate_provider_documents.OpenSearchClient') + def test_pagination_across_invocations_when_time_limit_reached(self, mock_opensearch_client): + """Test that the handler properly paginates across multiple invocations when approaching time limit. + + This test verifies: + 1. When the time limit is reached, the handler returns pagination info + 2. The pagination info can be passed to the next invocation to resume processing + 3. All records are eventually indexed across multiple invocations + """ + from handlers.populate_provider_documents import TIME_THRESHOLD_MS, populate_provider_documents + + # Time values for mocking (in milliseconds) + time_before_cutoff = TIME_THRESHOLD_MS + 60000 # before cutoff time, continue processing + time_after_cutoff = TIME_THRESHOLD_MS - 1000 # after cutoff time, trigger timeout + + # Set up the mock opensearch client + mock_client_instance = self._when_testing_mock_opensearch_client(mock_opensearch_client) + + compacts = ['aslp', 'octp', 'coun'] + # Add a provider and license record for each of the three compacts + for compact in compacts: + self._put_test_provider_and_license_record_in_dynamodb_table(compact) + + # First invocation: Mock time to trigger timeout after processing first compact (aslp) + # The time check happens at the START of each while loop iteration: + # - Call 1: Processing aslp, plenty of time -> continue + # - Call 2: About to process octp, low time -> timeout and return + mock_context = MagicMock() + mock_context.get_remaining_time_in_millis.side_effect = [time_before_cutoff, time_after_cutoff] + + # Run the first invocation + first_result = populate_provider_documents({}, mock_context) + + # Verify first invocation returned incomplete with pagination info + self.assertFalse(first_result['completed']) + self.assertIn('resumeFrom', first_result) + self.assertEqual('octp', first_result['resumeFrom']['startingCompact']) + # startingLastKey should be None since we haven't started processing octp yet + self.assertIsNone(first_result['resumeFrom']['startingLastKey']) + + # Verify only aslp was indexed in first invocation + self.assertEqual(1, first_result['total_providers_indexed']) + self.assertEqual(1, mock_client_instance.bulk_index.call_count) + + # Second invocation: Use the resumeFrom values as input + # Reset the mock for the second invocation + mock_opensearch_client.reset_mock() + mock_client_instance = self._when_testing_mock_opensearch_client(mock_opensearch_client) + + # Mock time to allow completion - needs enough calls for both octp and coun + # - Call 1: Processing octp, plenty of time -> continue + # - Call 2: Processing coun, plenty of time -> continue + mock_context.get_remaining_time_in_millis.side_effect = [time_before_cutoff, time_before_cutoff] + + # Build the resume event from the first result + resume_event = { + 'startingCompact': first_result['resumeFrom']['startingCompact'], + 'startingLastKey': first_result['resumeFrom']['startingLastKey'], + } + + # Run the second invocation with pagination info + second_result = populate_provider_documents(resume_event, mock_context) + + # Verify second invocation completed successfully + self.assertTrue(second_result['completed']) + self.assertNotIn('resumeFrom', second_result) + + # Verify octp and coun were indexed in second invocation + self.assertEqual(2, second_result['total_providers_indexed']) + self.assertEqual(2, mock_client_instance.bulk_index.call_count) + + # Verify the correct indices were called + bulk_index_calls = mock_client_instance.bulk_index.call_args_list + self.assertEqual(self._generate_expected_call_for_document('octp'), bulk_index_calls[0]) + self.assertEqual(self._generate_expected_call_for_document('coun'), bulk_index_calls[1]) + + @patch('handlers.populate_provider_documents.OpenSearchClient') + def test_returns_pagination_info_when_bulk_indexing_fails_after_retries(self, mock_opensearch_client): + """Test that the handler returns pagination info when bulk indexing fails after max retries. + + This test verifies: + 1. When CCInternalException is raised by bulk_index, the handler catches it + 2. The response includes resumeFrom with the batch_start_key for retry + 3. The developer can use this info to retry from the exact point of failure + """ + from cc_common.exceptions import CCInternalException + from handlers.populate_provider_documents import TIME_THRESHOLD_MS, populate_provider_documents + + # Set up the mock opensearch client to raise CCInternalException on second compact + mock_client_instance = Mock() + mock_opensearch_client.return_value = mock_client_instance + + # First compact (aslp) succeeds, second compact (octp) fails with CCInternalException + mock_client_instance.bulk_index.side_effect = [ + {'items': [], 'errors': False}, # aslp succeeds + CCInternalException('Connection timeout after 5 retries'), # octp fails + ] + + compacts = ['aslp', 'octp', 'coun'] + # Add a provider and license record for each compact + for compact in compacts: + self._put_test_provider_and_license_record_in_dynamodb_table(compact) + + # Mock the context to always return time above the cutoff threshold + mock_context = MagicMock() + mock_context.get_remaining_time_in_millis.return_value = TIME_THRESHOLD_MS + 60000 + + # Run the handler + result = populate_provider_documents({}, mock_context) + + # Verify the result indicates incomplete processing + self.assertFalse(result['completed']) + self.assertIn('resumeFrom', result) + + # Verify resumeFrom points to octp with the batch_start_key (None since it's the first batch) + self.assertEqual('octp', result['resumeFrom']['startingCompact']) + # startingLastKey should be None since it was the first batch of octp + self.assertIsNone(result['resumeFrom']['startingLastKey']) + + # Verify aslp was indexed but octp was not + self.assertEqual(1, result['total_providers_indexed']) + + # Verify errors list contains the failure info + self.assertEqual(1, len(result['errors'])) + self.assertEqual('octp', result['errors'][0]['compact']) + self.assertIn('Connection timeout', result['errors'][0]['error']) + + # Now verify that using resumeFrom allows completing the indexing + mock_opensearch_client.reset_mock() + mock_client_instance = Mock() + mock_opensearch_client.return_value = mock_client_instance + mock_client_instance.bulk_index.return_value = {'items': [], 'errors': False} + + # Build the resume event from the first result + resume_event = { + 'startingCompact': result['resumeFrom']['startingCompact'], + } + + # Run the second invocation + second_result = populate_provider_documents(resume_event, mock_context) + + # Verify second invocation completed successfully + self.assertTrue(second_result['completed']) + self.assertNotIn('resumeFrom', second_result) + + # Verify octp and coun were indexed + self.assertEqual(2, second_result['total_providers_indexed']) + self.assertEqual(2, mock_client_instance.bulk_index.call_count) diff --git a/backend/compact-connect/lambdas/python/search/tests/function/test_provider_update_ingest.py b/backend/compact-connect/lambdas/python/search/tests/function/test_provider_update_ingest.py new file mode 100644 index 000000000..f61add67b --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/tests/function/test_provider_update_ingest.py @@ -0,0 +1,791 @@ +import json +from unittest.mock import MagicMock, patch + +from common_test.test_constants import ( + DEFAULT_LICENSE_EXPIRATION_DATE, + DEFAULT_LICENSE_ISSUANCE_DATE, + DEFAULT_LICENSE_RENEWAL_DATE, + DEFAULT_LICENSE_UPDATE_DATE_OF_UPDATE, + DEFAULT_PROVIDER_UPDATE_DATETIME, + DEFAULT_REGISTERED_EMAIL_ADDRESS, +) +from moto import mock_aws + +from . import TstFunction + +MOCK_ASLP_PROVIDER_ID = '00000000-0000-0000-0000-000000000001' +MOCK_OCTP_PROVIDER_ID = '00000000-0000-0000-0000-000000000002' + +TEST_LICENSE_TYPE_MAPPING = { + 'aslp': 'audiologist', + 'octp': 'occupational therapist', +} +TEST_PROVIDER_ID_MAPPING = { + 'aslp': MOCK_ASLP_PROVIDER_ID, + 'octp': MOCK_OCTP_PROVIDER_ID, +} + + +@mock_aws +class TestProviderUpdateIngest(TstFunction): + """Test suite for provider update ingest handler.""" + + def setUp(self): + super().setUp() + + def _put_test_provider_and_license_record_in_dynamodb_table(self, compact: str, provider_id: str = None): + """Helper to create test provider and license records in DynamoDB.""" + if provider_id is None: + provider_id = TEST_PROVIDER_ID_MAPPING[compact] + + self.test_data_generator.put_default_provider_record_in_provider_table( + value_overrides={ + 'compact': compact, + 'providerId': provider_id, + 'givenName': f'test{compact}GivenName', + 'familyName': f'test{compact}FamilyName', + }, + date_of_update_override=DEFAULT_PROVIDER_UPDATE_DATETIME, + ) + self.test_data_generator.put_default_license_record_in_provider_table( + value_overrides={ + 'compact': compact, + 'providerId': provider_id, + 'givenName': f'test{compact}GivenName', + 'familyName': f'test{compact}FamilyName', + 'licenseType': TEST_LICENSE_TYPE_MAPPING[compact], + }, + date_of_update_override=DEFAULT_LICENSE_UPDATE_DATE_OF_UPDATE, + ) + + def _create_dynamodb_stream_record( + self, + compact: str, + provider_id: str, + sequence_number: str, + event_name: str = 'MODIFY', + include_old_image: bool = True, + ) -> dict: + """ + Create a DynamoDB stream record in the format received by Lambda. + + DynamoDB stream records contain the image data in a specific format where + each attribute is wrapped with its type indicator (e.g., {'S': 'value'} for strings). + + :param compact: The compact abbreviation + :param provider_id: The provider ID + :param sequence_number: The stream sequence number + :param event_name: The event type (INSERT, MODIFY, REMOVE) + :param include_old_image: Whether to include OldImage (False for INSERT events) + """ + image_data = { + 'pk': {'S': f'{compact}#PROVIDER#{provider_id}'}, + 'sk': {'S': f'{compact}#PROVIDER'}, + 'compact': {'S': compact}, + 'providerId': {'S': provider_id}, + 'type': {'S': 'provider'}, + 'givenName': {'S': f'test{compact}GivenName'}, + 'familyName': {'S': f'test{compact}FamilyName'}, + } + + dynamodb_data = { + 'ApproximateCreationDateTime': 1234567890, + 'Keys': { + 'pk': {'S': f'{compact}#PROVIDER#{provider_id}'}, + 'sk': {'S': f'{compact}#PROVIDER'}, + }, + 'NewImage': image_data, + 'SequenceNumber': sequence_number, + 'SizeBytes': 256, + 'StreamViewType': 'NEW_AND_OLD_IMAGES', + } + + # Include OldImage only if requested (MODIFY events have both, INSERT events only have NewImage) + if include_old_image: + dynamodb_data['OldImage'] = image_data + + return { + 'eventID': f'event-{sequence_number}', + 'eventName': event_name, + 'eventVersion': '1.1', + 'eventSource': 'aws:dynamodb', + 'awsRegion': 'us-east-1', + 'dynamodb': dynamodb_data, + 'eventSourceARN': 'arn:aws:dynamodb:us-east-1:123456789012:table/provider-table/stream/1234', + } + + def _when_testing_mock_opensearch_client(self, mock_opensearch_client, bulk_index_response: dict = None): + """Helper to configure the mock OpenSearch client.""" + if not bulk_index_response: + bulk_index_response = {'items': [], 'errors': False} + + # mock_opensearch_client is the patched instance, not the class + mock_opensearch_client.bulk_index.return_value = bulk_index_response + return mock_opensearch_client + + def _generate_expected_document(self, compact: str, provider_id: str = None) -> dict: + """Generate the expected document that should be indexed into OpenSearch.""" + if provider_id is None: + provider_id = TEST_PROVIDER_ID_MAPPING[compact] + + return { + 'providerId': provider_id, + 'type': 'provider', + 'dateOfUpdate': DEFAULT_PROVIDER_UPDATE_DATETIME, + 'compact': compact, + 'licenseJurisdiction': 'oh', + 'currentHomeJurisdiction': 'oh', + 'licenseStatus': 'inactive', + 'compactEligibility': 'ineligible', + 'npi': '0608337260', + 'givenName': f'test{compact}GivenName', + 'middleName': 'Gunnar', + 'familyName': f'test{compact}FamilyName', + 'dateOfExpiration': DEFAULT_LICENSE_EXPIRATION_DATE, + 'compactConnectRegisteredEmailAddress': DEFAULT_REGISTERED_EMAIL_ADDRESS, + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'privilegeJurisdictions': ['ne'], + 'birthMonthDay': '06-06', + 'licenses': [ + { + 'providerId': provider_id, + 'type': 'license', + 'dateOfUpdate': DEFAULT_LICENSE_UPDATE_DATE_OF_UPDATE, + 'compact': compact, + 'jurisdiction': 'oh', + 'licenseType': TEST_LICENSE_TYPE_MAPPING[compact], + 'licenseStatusName': 'DEFINITELY_A_HUMAN', + 'licenseStatus': 'inactive', + 'jurisdictionUploadedLicenseStatus': 'active', + 'compactEligibility': 'ineligible', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'npi': '0608337260', + 'licenseNumber': 'A0608337260', + 'givenName': f'test{compact}GivenName', + 'middleName': 'Gunnar', + 'familyName': f'test{compact}FamilyName', + 'dateOfIssuance': DEFAULT_LICENSE_ISSUANCE_DATE, + 'dateOfRenewal': DEFAULT_LICENSE_RENEWAL_DATE, + 'dateOfExpiration': DEFAULT_LICENSE_EXPIRATION_DATE, + 'homeAddressStreet1': '123 A St.', + 'homeAddressStreet2': 'Apt 321', + 'homeAddressCity': 'Columbus', + 'homeAddressState': 'oh', + 'homeAddressPostalCode': '43004', + 'emailAddress': 'björk@example.com', + 'phoneNumber': '+13213214321', + 'adverseActions': [], + 'investigations': [], + } + ], + 'privileges': [], + 'militaryAffiliations': [], + } + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_opensearch_client_called_with_expected_parameters(self, mock_opensearch_client): + """Test that OpenSearch client is called with expected parameters when indexing a record.""" + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Set up mock OpenSearch client + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + # Create provider and license records in DynamoDB + self._put_test_provider_and_license_record_in_dynamodb_table('aslp') + + # Create an SQS event with DynamoDB stream record in the body + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number', + ) + ), + } + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Assert that bulk_index was called once with expected parameters + self.assertEqual(1, mock_opensearch_client.bulk_index.call_count) + + # Verify the call arguments + call_args = mock_opensearch_client.bulk_index.call_args + self.assertEqual('compact_aslp_providers', call_args.kwargs['index_name']) + self.assertEqual([self._generate_expected_document('aslp')], call_args.kwargs['documents']) + + # Verify no batch item failures + self.assertEqual({'batchItemFailures': []}, result) + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_provider_ids_are_deduped_only_one_document_indexed(self, mock_opensearch_client): + """Test that duplicate provider IDs in the batch are deduplicated.""" + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Set up mock OpenSearch client + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + # Create provider and license records in DynamoDB + self._put_test_provider_and_license_record_in_dynamodb_table('aslp') + + # Create multiple SQS records for the SAME provider (simulating multiple updates) + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number-1', + event_name='INSERT', + ) + ), + }, + { + 'messageId': '12346', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number-2', + event_name='MODIFY', + ) + ), + }, + { + 'messageId': '12347', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number-3', + event_name='MODIFY', + ) + ), + }, + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Assert that bulk_index was called only once despite 3 records + self.assertEqual(1, mock_opensearch_client.bulk_index.call_count) + + # Verify only ONE document was indexed (deduplication worked) + call_args = mock_opensearch_client.bulk_index.call_args + self.assertEqual(1, len(call_args.kwargs['documents'])) + self.assertEqual(MOCK_ASLP_PROVIDER_ID, call_args.kwargs['documents'][0]['providerId']) + + # Verify no batch item failures + self.assertEqual({'batchItemFailures': []}, result) + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_validation_failure_returns_batch_item_failure(self, mock_opensearch_client): + """Test that a record that fails validation is returned in batchItemFailures.""" + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Set up mock OpenSearch client + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + provider = self.test_data_generator.generate_default_provider( + value_overrides={ + 'compact': 'aslp', + 'providerId': MOCK_ASLP_PROVIDER_ID, + 'givenName': 'testGivenName', + 'familyName': 'testFamilyName', + } + ) + serialized_provider = provider.serialize_to_database_record() + # put invalid compact to fail validation + serialized_provider['compact'] = 'foo' + self.config.provider_table.put_item(Item=serialized_provider) + + # Create SQS event with DynamoDB stream record in the body + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number', + ) + ), + } + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Verify that the batch item failure is returned with the message ID + self.assertEqual(1, len(result['batchItemFailures'])) + self.assertEqual('12345', result['batchItemFailures'][0]['itemIdentifier']) + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_opensearch_indexing_failure_returns_batch_item_failure(self, mock_opensearch_client): + """Test that a record which fails to be indexed by OpenSearch is in batchItemFailures.""" + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Simulate OpenSearch returning an error for one document + mock_opensearch_client.bulk_index.return_value = { + 'errors': True, + 'items': [ + { + 'index': { + '_id': MOCK_ASLP_PROVIDER_ID, + '_index': 'compact_aslp_providers', + 'status': 201, + 'result': 'created', + } + }, + { + 'index': { + '_id': MOCK_OCTP_PROVIDER_ID, + '_index': 'compact_octp_providers', + 'status': 400, + 'error': { + 'type': 'mapper_parsing_exception', + 'reason': 'failed to parse field', + }, + } + }, + ], + } + + # Create provider and license records in DynamoDB for both compacts + self._put_test_provider_and_license_record_in_dynamodb_table('aslp') + self._put_test_provider_and_license_record_in_dynamodb_table('octp') + + # Create SQS events with DynamoDB stream records in the body for both providers + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number-1', + ) + ), + }, + { + 'messageId': '12346', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='octp', + provider_id=MOCK_OCTP_PROVIDER_ID, + sequence_number='some-sequence-number-2', + ) + ), + }, + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Verify that only the failed document's message ID is in batchItemFailures + self.assertEqual(1, len(result['batchItemFailures'])) + self.assertEqual('12346', result['batchItemFailures'][0]['itemIdentifier']) + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_bulk_index_exception_returns_all_batch_item_failures(self, mock_opensearch_client): + """Test that when bulk_index raises an exception, all providers are marked as failed.""" + from cc_common.exceptions import CCInternalException + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Set up mock OpenSearch client to raise an exception + mock_opensearch_client.bulk_index.side_effect = CCInternalException('Connection timeout after 5 retries') + + # Create provider and license records in DynamoDB for both compacts + self._put_test_provider_and_license_record_in_dynamodb_table('aslp') + self._put_test_provider_and_license_record_in_dynamodb_table('octp') + + # Create SQS events with DynamoDB stream records in the body for both providers + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number-1', + ) + ), + }, + { + 'messageId': '12346', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='octp', + provider_id=MOCK_OCTP_PROVIDER_ID, + sequence_number='some-sequence-number-2', + ) + ), + }, + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Verify that both records were returned in batch failures + self.assertEqual(2, len(result['batchItemFailures'])) + self.assertEqual('12345', result['batchItemFailures'][0]['itemIdentifier']) + self.assertEqual('12346', result['batchItemFailures'][1]['itemIdentifier']) + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_multiple_compacts_indexed_separately(self, mock_opensearch_client): + """Test that providers from different compacts are indexed in their respective indices.""" + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Set up mock OpenSearch client + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + # Create provider and license records for two different compacts + self._put_test_provider_and_license_record_in_dynamodb_table('aslp') + self._put_test_provider_and_license_record_in_dynamodb_table('octp') + + # Create SQS events with DynamoDB stream records in the body for both compacts + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number-1', + ) + ), + }, + { + 'messageId': '12346', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='octp', + provider_id=MOCK_OCTP_PROVIDER_ID, + sequence_number='some-sequence-number-2', + ) + ), + }, + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Assert that bulk_index was called for each compact that had providers + # Note: The handler iterates over all compacts, but only calls bulk_index if there are documents + call_args_list = mock_opensearch_client.bulk_index.call_args_list + + # Find the calls for aslp and octp + aslp_calls = [c for c in call_args_list if c.kwargs['index_name'] == 'compact_aslp_providers'] + octp_calls = [c for c in call_args_list if c.kwargs['index_name'] == 'compact_octp_providers'] + + self.assertEqual(1, len(aslp_calls)) + self.assertEqual(1, len(octp_calls)) + + # Verify each call has the correct document + self.assertEqual([self._generate_expected_document('aslp')], aslp_calls[0].kwargs['documents']) + self.assertEqual([self._generate_expected_document('octp')], octp_calls[0].kwargs['documents']) + + # Verify no batch item failures + self.assertEqual({'batchItemFailures': []}, result) + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_empty_records_returns_empty_batch_failures(self, mock_opensearch_client): + """Test that an empty Records list returns empty batchItemFailures.""" + from handlers.provider_update_ingest import provider_update_ingest_handler + + event = {'Records': []} + + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Verify empty response + self.assertEqual({'batchItemFailures': []}, result) + + # Verify OpenSearch client was never called + mock_opensearch_client.bulk_index.assert_not_called() + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_insert_event_without_old_image_indexes_successfully(self, mock_opensearch_client): + """Test that INSERT events (newly created records) without OldImage are processed correctly. + + When a new record is created in DynamoDB, the stream event contains only NewImage + and no OldImage. The handler should extract the compact and providerId from NewImage + and successfully index the document. + """ + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Set up mock OpenSearch client + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + # Create provider and license records in DynamoDB + self._put_test_provider_and_license_record_in_dynamodb_table('aslp') + + # Create an SQS event with DynamoDB stream record in the body for INSERT (no OldImage) + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number', + event_name='INSERT', + include_old_image=False, # INSERT events don't have OldImage + ) + ), + } + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Assert that bulk_index was called with the correct parameters + self.assertEqual(1, mock_opensearch_client.bulk_index.call_count) + + # Verify the call arguments + call_args = mock_opensearch_client.bulk_index.call_args + self.assertEqual('compact_aslp_providers', call_args.kwargs['index_name']) + self.assertEqual([self._generate_expected_document('aslp')], call_args.kwargs['documents']) + + # Verify no batch item failures for INSERT event + self.assertEqual({'batchItemFailures': []}, result) + + def _create_dynamodb_stream_record_with_old_image_only( + self, compact: str, provider_id: str, sequence_number: str + ) -> dict: + """Create a DynamoDB stream record for REMOVE events (only OldImage, no NewImage).""" + image_data = { + 'pk': {'S': f'{compact}#PROVIDER#{provider_id}'}, + 'sk': {'S': f'{compact}#PROVIDER'}, + 'compact': {'S': compact}, + 'providerId': {'S': provider_id}, + 'type': {'S': 'provider'}, + 'givenName': {'S': f'test{compact}GivenName'}, + 'familyName': {'S': f'test{compact}FamilyName'}, + } + + return { + 'eventID': f'event-{sequence_number}', + 'eventName': 'REMOVE', + 'eventVersion': '1.1', + 'eventSource': 'aws:dynamodb', + 'awsRegion': 'us-east-1', + 'dynamodb': { + 'ApproximateCreationDateTime': 1234567890, + 'Keys': { + 'pk': {'S': f'{compact}#PROVIDER#{provider_id}'}, + 'sk': {'S': f'{compact}#PROVIDER'}, + }, + 'OldImage': image_data, # REMOVE events only have OldImage + 'SequenceNumber': sequence_number, + 'SizeBytes': 256, + 'StreamViewType': 'NEW_AND_OLD_IMAGES', + }, + 'eventSourceARN': 'arn:aws:dynamodb:us-east-1:123456789012:table/provider-table/stream/1234', + } + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_remove_event_with_only_old_image_indexes_successfully(self, mock_opensearch_client): + """Test that REMOVE events (deleted records) with only OldImage are processed correctly. + + When a record is deleted from DynamoDB, the stream event contains only OldImage + and no NewImage. The handler should extract the compact and providerId from OldImage + and still index/update the document (to reflect the latest state of the provider). + """ + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Set up mock OpenSearch client + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + # Create provider and license records in DynamoDB + self._put_test_provider_and_license_record_in_dynamodb_table('aslp') + + # Create an SQS event with DynamoDB stream record in the body for REMOVE (only OldImage, no NewImage) + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record_with_old_image_only( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number', + ) + ), + } + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Assert that bulk_index was called with the correct parameters + self.assertEqual(1, mock_opensearch_client.bulk_index.call_count) + + # Verify the call arguments + call_args = mock_opensearch_client.bulk_index.call_args + self.assertEqual('compact_aslp_providers', call_args.kwargs['index_name']) + self.assertEqual([self._generate_expected_document('aslp')], call_args.kwargs['documents']) + + # Verify no batch item failures for REMOVE event + self.assertEqual({'batchItemFailures': []}, result) + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_provider_deleted_from_index_when_no_records_found(self, mock_opensearch_client): + """Test that when no provider records are found (CCNotFoundException), bulk_delete is called. + + This scenario occurs when a provider is completely removed from the system, + such as during a license upload rollback. The handler should call bulk_delete + to remove the provider document from the OpenSearch index. + """ + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Set up mock OpenSearch client + mock_opensearch_client.bulk_index.return_value = {'items': [], 'errors': False} + mock_opensearch_client.bulk_delete.return_value = set() # bulk_delete returns a set of failed IDs + + # Do NOT create any provider records in DynamoDB - this simulates the provider being deleted + + # Create an SQS event with DynamoDB stream record in the body for a provider that no longer exists + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number', + event_name='REMOVE', + include_old_image=False, + ) + ), + } + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Assert that bulk_index was NOT called (no documents to index) + mock_opensearch_client.bulk_index.assert_not_called() + + # Assert that bulk_delete WAS called with the correct parameters + self.assertEqual(1, mock_opensearch_client.bulk_delete.call_count) + call_args = mock_opensearch_client.bulk_delete.call_args + self.assertEqual('compact_aslp_providers', call_args.kwargs['index_name']) + self.assertEqual([MOCK_ASLP_PROVIDER_ID], call_args.kwargs['document_ids']) + + # Verify no batch item failures (deletion is expected behavior, not a failure) + self.assertEqual({'batchItemFailures': []}, result) + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_bulk_delete_failure_returns_batch_item_failure(self, mock_opensearch_client): + """Test that when bulk_delete fails, the provider is returned in batchItemFailures.""" + from cc_common.exceptions import CCInternalException + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Set up mock OpenSearch client - bulk_delete raises exception + mock_opensearch_client.bulk_delete.side_effect = CCInternalException('Connection timeout after 5 retries') + + # Do NOT create any provider records in DynamoDB - this simulates the provider being deleted + + # Create an SQS event with DynamoDB stream record in the body for a provider that no longer exists + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number', + event_name='REMOVE', + include_old_image=False, + ) + ), + } + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Verify that the batch item failure is returned with the message ID + self.assertEqual(1, len(result['batchItemFailures'])) + self.assertEqual('12345', result['batchItemFailures'][0]['itemIdentifier']) + + @patch('handlers.provider_update_ingest.opensearch_client') + def test_bulk_delete_404_not_found_does_not_return_batch_item_failure(self, mock_opensearch_client): + """Test that when bulk_delete returns 404 (document not found), it is NOT treated as a failure. + + This scenario occurs when a provider document has already been deleted from OpenSearch + (e.g., a previous delete succeeded, or the document never existed in the index). + The 404 response should be ignored since the desired end state (document not in index) + has been achieved. + """ + from handlers.provider_update_ingest import provider_update_ingest_handler + + # Simulate OpenSearch bulk delete response when document doesn't exist + # bulk_delete returns a set of failed document IDs, empty set means no failures (404 is ignored) + mock_opensearch_client.bulk_delete.return_value = set() + + # Do NOT create any provider records in DynamoDB - this simulates the provider being deleted + + # Create a DynamoDB stream event for a provider that no longer exists + event = { + 'Records': [ + { + 'messageId': '12345', + 'body': json.dumps( + self._create_dynamodb_stream_record( + compact='aslp', + provider_id=MOCK_ASLP_PROVIDER_ID, + sequence_number='some-sequence-number', + event_name='REMOVE', + include_old_image=False, + ) + ), + } + ] + } + + # Run the handler + mock_context = MagicMock() + result = provider_update_ingest_handler(event, mock_context) + + # Assert that bulk_delete was called + self.assertEqual(1, mock_opensearch_client.bulk_delete.call_count) + + # Verify NO batch item failures - 404 is not treated as an error + self.assertEqual({'batchItemFailures': []}, result) diff --git a/backend/compact-connect/lambdas/python/search/tests/function/test_search_privileges.py b/backend/compact-connect/lambdas/python/search/tests/function/test_search_privileges.py new file mode 100644 index 000000000..3ada04c72 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/tests/function/test_search_privileges.py @@ -0,0 +1,833 @@ +import json +from unittest.mock import patch + +from moto import mock_aws + +from . import TstFunction + + +@mock_aws +class TestExportPrivileges(TstFunction): + """Test suite for search_api_handler - privilege export functionality.""" + + def setUp(self): + super().setUp() + + def _create_api_event( + self, + compact: str, + body: dict = None, + resource_override: str = None, + scopes_override: str = None, + ) -> dict: + """Create a standard API Gateway event for export_privileges.""" + return { + 'resource': '/v1/compacts/{compact}/privileges/export' if not resource_override else resource_override, + 'path': f'/v1/compacts/{compact}/privileges/export', + 'httpMethod': 'POST', + 'headers': { + 'accept': 'application/json', + 'content-type': 'application/json', + 'Content-Type': 'application/json', + 'origin': 'https://example.org', + 'Host': 'api.test.example.com', + }, + 'multiValueHeaders': {}, + 'queryStringParameters': None, + 'pathParameters': {'compact': compact}, + 'requestContext': { + 'resourcePath': '/v1/compacts/{compact}/privileges/export', + 'httpMethod': 'POST', + 'authorizer': { + 'claims': { + 'sub': 'test-user-id', + 'cognito:username': 'test-user', + 'scope': f'openid email {compact}/readGeneral' if not scopes_override else scopes_override, + } + }, + }, + 'body': json.dumps(body) if body else None, + 'isBase64Encoded': False, + } + + def _when_testing_mock_opensearch_client(self, mock_opensearch_client, search_response: dict = None): + """ + Configure the mock OpenSearchClient for testing. + + :param mock_opensearch_client: The patched opensearch_client instance + :param search_response: The response to return from the search method + :return: The mock client instance + """ + if not search_response: + search_response = { + 'hits': { + 'total': {'value': 0, 'relation': 'eq'}, + 'hits': [], + } + } + + # mock_opensearch_client is the patched instance, not the class + mock_opensearch_client.search.return_value = search_response + return mock_opensearch_client + + def _create_mock_provider_hit_with_privileges( + self, + provider_id: str = '00000000-0000-0000-0000-000000000001', + compact: str = 'aslp', + sort_values: list = None, + ) -> dict: + """Create a mock OpenSearch hit for a provider document with privileges and licenses.""" + hit = { + '_index': f'compact_{compact}_providers', + '_id': provider_id, + '_score': 1.0, + '_source': { + 'providerId': provider_id, + 'type': 'provider', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': compact, + 'licenseJurisdiction': 'oh', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'givenName': 'John', + 'familyName': 'Doe', + 'dateOfExpiration': '2025-12-31', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'birthMonthDay': '06-15', + 'licenses': [ + { + 'providerId': provider_id, + 'type': 'license-home', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': compact, + 'jurisdiction': 'oh', + 'licenseType': 'audiologist', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'givenName': 'John', + 'familyName': 'Doe', + 'dateOfIssuance': '2020-01-01', + 'dateOfRenewal': '2024-01-01', + 'dateOfExpiration': '2025-12-31', + 'npi': '1234567890', + 'licenseNumber': 'AUD-12345', + } + ], + 'privileges': [ + { + 'type': 'privilege', + 'providerId': provider_id, + 'compact': compact, + 'jurisdiction': 'ky', + 'licenseJurisdiction': 'oh', + 'licenseType': 'audiologist', + 'dateOfIssuance': '2024-01-15', + 'dateOfRenewal': '2024-01-15', + 'dateOfExpiration': '2025-01-15', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'administratorSetStatus': 'active', + 'privilegeId': 'PRIV-001', + 'status': 'active', + } + ], + }, + } + if sort_values: + hit['sort'] = sort_values + return hit + + @patch('handlers.search.opensearch_client') + def test_privilege_export_returns_presigned_url(self, mock_opensearch_client): + """Test that privilege export returns a presigned URL to a CSV file.""" + from handlers.search import search_api_handler + + # Create a mock response with provider hits containing privileges + mock_hit = self._create_mock_provider_hit_with_privileges() + search_response = { + 'hits': { + 'total': {'value': 1, 'relation': 'eq'}, + 'hits': [mock_hit], + } + } + self._when_testing_mock_opensearch_client(mock_opensearch_client, search_response=search_response) + + event = self._create_api_event('aslp', body={'query': {'match_all': {}}}) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(200, response['statusCode']) + body = json.loads(response['body']) + + # Verify response contains fileUrl + self.assertIn('fileUrl', body) + self.assertIsInstance(body['fileUrl'], str) + # Verify the URL contains expected parts + self.assertIn('test-export-results-bucket', body['fileUrl']) + self.assertIn('compact/aslp/privilegeSearch', body['fileUrl']) + self.assertIn('test-user-id', body['fileUrl']) # caller user id from event + self.assertIn('export.csv', body['fileUrl']) + + # Verify the CSV file was uploaded to S3 by checking the bucket + import boto3 + + s3_client = boto3.client('s3') + response = s3_client.list_objects_v2( + Bucket='test-export-results-bucket', Prefix='compact/aslp/privilegeSearch/caller/test-user-id' + ) + self.assertEqual(1, response['KeyCount']) + + # Get the CSV content and verify it contains the expected data + key = response['Contents'][0]['Key'] + csv_obj = s3_client.get_object(Bucket='test-export-results-bucket', Key=key) + csv_content = csv_obj['Body'].read().decode('utf-8') + + # Verify CSV contains header and data + self.assertIn('type,providerId,compact,jurisdiction', csv_content) + self.assertIn('statePrivilege', csv_content) + self.assertIn('00000000-0000-0000-0000-000000000001', csv_content) + self.assertIn('PRIV-001', csv_content) + + @patch('handlers.search.opensearch_client') + def test_privilege_export_with_empty_results_returns_404(self, mock_opensearch_client): + """Test that privilege export with no results returns a 404 error.""" + from handlers.search import search_api_handler + + search_response = { + 'hits': { + 'total': {'value': 0, 'relation': 'eq'}, + 'hits': [], + } + } + self._when_testing_mock_opensearch_client(mock_opensearch_client, search_response=search_response) + + event = self._create_api_event('aslp', body={'query': {'match_all': {}}}) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(404, response['statusCode']) + body = json.loads(response['body']) + + # Verify response contains error message + self.assertIn('message', body) + self.assertEqual('The search parameters did not match any privileges.', body['message']) + + # Verify no CSV file was uploaded to S3 + import boto3 + + s3_client = boto3.client('s3') + response = s3_client.list_objects_v2( + Bucket='test-export-results-bucket', Prefix='compact/aslp/privilegeSearch/caller/test-user-id' + ) + # Should have no objects + self.assertEqual(0, response.get('KeyCount', 0)) + + @patch('handlers.search.opensearch_client') + def test_privilege_export_skips_provider_without_privileges_returns_404(self, mock_opensearch_client): + """Test that providers without privileges result in a 404 error.""" + from handlers.search import search_api_handler + + # Create a provider hit without privileges + hit = { + '_index': 'compact_aslp_providers', + '_id': 'provider-1', + '_score': 1.0, + '_source': { + 'providerId': 'provider-1', + 'type': 'provider', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': 'aslp', + 'licenseJurisdiction': 'oh', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'givenName': 'Jane', + 'familyName': 'Smith', + 'dateOfExpiration': '2025-12-31', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'birthMonthDay': '03-20', + 'licenses': [], + 'privileges': [], + }, + } + search_response = { + 'hits': { + 'total': {'value': 1, 'relation': 'eq'}, + 'hits': [hit], + } + } + self._when_testing_mock_opensearch_client(mock_opensearch_client, search_response=search_response) + + event = self._create_api_event('aslp', body={'query': {'match_all': {}}}) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(404, response['statusCode']) + body = json.loads(response['body']) + + # Verify response contains error message + self.assertEqual('The search parameters did not match any privileges.', body['message']) + + # Verify no CSV file was uploaded to S3 + import boto3 + + s3_client = boto3.client('s3') + response = s3_client.list_objects_v2( + Bucket='test-export-results-bucket', Prefix='compact/aslp/privilegeSearch/caller/test-user-id' + ) + # Should have no objects + self.assertEqual(0, response.get('KeyCount', 0)) + + @patch('handlers.search.opensearch_client') + def test_privilege_export_with_multiple_inner_hits_exports_all_matched(self, mock_opensearch_client): + """Test that when inner_hits contains multiple matches, all are exported to CSV. + see https://docs.opensearch.org/latest/search-plugins/searching-data/inner-hits/ for more information + about inner_hits. + """ + from handlers.search import search_api_handler + + provider_id = '00000000-0000-0000-0000-000000000001' + compact = 'aslp' + + # Create a provider with multiple privileges, inner_hits matches two of them + hit = { + '_index': f'compact_{compact}_providers', + '_id': provider_id, + '_score': 1.0, + '_source': { + 'providerId': provider_id, + 'type': 'provider', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': compact, + 'licenseJurisdiction': 'oh', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'givenName': 'John', + 'familyName': 'Doe', + 'dateOfExpiration': '2025-12-31', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'birthMonthDay': '06-15', + 'licenses': [ + { + 'providerId': provider_id, + 'type': 'license-home', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': compact, + 'jurisdiction': 'oh', + 'licenseType': 'audiologist', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'givenName': 'John', + 'familyName': 'Doe', + 'dateOfIssuance': '2020-01-01', + 'dateOfRenewal': '2024-01-01', + 'dateOfExpiration': '2025-12-31', + 'npi': '1234567890', + 'licenseNumber': 'AUD-12345', + } + ], + # Provider has THREE privileges + 'privileges': [ + { + 'type': 'privilege', + 'providerId': provider_id, + 'compact': compact, + 'jurisdiction': 'ky', + 'licenseJurisdiction': 'oh', + 'licenseType': 'audiologist', + 'dateOfIssuance': '2024-01-15', + 'dateOfRenewal': '2024-01-15', + 'dateOfExpiration': '2025-01-15', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'administratorSetStatus': 'active', + 'privilegeId': 'PRIV-KY-001', + 'status': 'active', + }, + { + 'type': 'privilege', + 'providerId': provider_id, + 'compact': compact, + 'jurisdiction': 'ne', + 'licenseJurisdiction': 'oh', + 'licenseType': 'audiologist', + 'dateOfIssuance': '2024-02-01', + 'dateOfRenewal': '2024-02-01', + 'dateOfExpiration': '2025-02-01', + 'dateOfUpdate': '2024-02-01T10:30:00+00:00', + 'administratorSetStatus': 'active', + 'privilegeId': 'PRIV-NE-001', + 'status': 'active', + }, + { + 'type': 'privilege', + 'providerId': provider_id, + 'compact': compact, + 'jurisdiction': 'co', + 'licenseJurisdiction': 'oh', + 'licenseType': 'audiologist', + 'dateOfIssuance': '2024-03-01', + 'dateOfRenewal': '2024-03-01', + 'dateOfExpiration': '2025-03-01', + 'dateOfUpdate': '2024-03-01T10:30:00+00:00', + 'administratorSetStatus': 'inactive', + 'privilegeId': 'PRIV-CO-001', + 'status': 'inactive', + }, + ], + }, + # inner_hits contains TWO active privileges (simulating nested query for status: active) + 'inner_hits': { + 'privileges': { + 'hits': { + 'total': {'value': 2, 'relation': 'eq'}, + 'hits': [ + { + '_index': f'compact_{compact}_providers', + '_id': provider_id, + '_nested': {'field': 'privileges', 'offset': 0}, + '_score': 1.0, + '_source': { + 'type': 'privilege', + 'providerId': provider_id, + 'compact': compact, + 'jurisdiction': 'ky', + 'licenseJurisdiction': 'oh', + 'licenseType': 'audiologist', + 'dateOfIssuance': '2024-01-15', + 'dateOfRenewal': '2024-01-15', + 'dateOfExpiration': '2025-01-15', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'administratorSetStatus': 'active', + 'privilegeId': 'PRIV-KY-001', + 'status': 'active', + }, + }, + { + '_index': f'compact_{compact}_providers', + '_id': provider_id, + '_nested': {'field': 'privileges', 'offset': 1}, + '_score': 1.0, + '_source': { + 'type': 'privilege', + 'providerId': provider_id, + 'compact': compact, + 'jurisdiction': 'ne', + 'licenseJurisdiction': 'oh', + 'licenseType': 'audiologist', + 'dateOfIssuance': '2024-02-01', + 'dateOfRenewal': '2024-02-01', + 'dateOfExpiration': '2025-02-01', + 'dateOfUpdate': '2024-02-01T10:30:00+00:00', + 'administratorSetStatus': 'active', + 'privilegeId': 'PRIV-NE-001', + 'status': 'active', + }, + }, + ], + } + } + }, + } + + search_response = { + 'hits': { + 'total': {'value': 1, 'relation': 'eq'}, + 'hits': [hit], + } + } + self._when_testing_mock_opensearch_client(mock_opensearch_client, search_response=search_response) + + event = self._create_api_event('aslp', body={'query': {'match_all': {}}}) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(200, response['statusCode']) + body = json.loads(response['body']) + + # Verify response contains fileUrl + self.assertIn('fileUrl', body) + + # Verify the CSV contains only the 2 matched privileges + import boto3 + + s3_client = boto3.client('s3') + response = s3_client.list_objects_v2( + Bucket='test-export-results-bucket', Prefix='compact/aslp/privilegeSearch/caller/test-user-id' + ) + key = response['Contents'][0]['Key'] + csv_obj = s3_client.get_object(Bucket='test-export-results-bucket', Key=key) + csv_content = csv_obj['Body'].read().decode('utf-8') + + lines = csv_content.strip().split('\n') + self.assertEqual(3, len(lines)) # Header + 2 data rows + self.assertEqual( + 'type,providerId,compact,jurisdiction,licenseType,privilegeId,status,compactEligibility,dateOfExpiration,dateOfIssuance,dateOfRenewal,familyName,givenName,middleName,suffix,licenseJurisdiction,licenseStatus,licenseStatusName,licenseNumber,npi\r', + lines[0], + ) + self.assertEqual( + 'statePrivilege,00000000-0000-0000-0000-000000000001,aslp,ky,audiologist,PRIV-KY-001,active,eligible,2025-01-15,2024-01-15,2024-01-15,Doe,John,,,oh,active,,AUD-12345,1234567890\r', + lines[1], + ) + self.assertEqual( + 'statePrivilege,00000000-0000-0000-0000-000000000001,aslp,ne,audiologist,PRIV-NE-001,active,eligible,2025-02-01,2024-02-01,2024-02-01,Doe,John,,,oh,active,,AUD-12345,1234567890', + lines[2], + ) + + @patch('handlers.search.opensearch_client') + def test_privilege_export_without_inner_hits_exports_all_privileges(self, mock_opensearch_client): + """Test that without inner_hits, all privileges for matching providers are exported.""" + from handlers.search import search_api_handler + + provider_id = '00000000-0000-0000-0000-000000000001' + compact = 'aslp' + + # Create a provider with multiple privileges and NO inner_hits + hit = { + '_index': f'compact_{compact}_providers', + '_id': provider_id, + '_score': 1.0, + '_source': { + 'providerId': provider_id, + 'type': 'provider', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': compact, + 'licenseJurisdiction': 'oh', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'givenName': 'John', + 'familyName': 'Doe', + 'dateOfExpiration': '2025-12-31', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'birthMonthDay': '06-15', + 'licenses': [ + { + 'providerId': provider_id, + 'type': 'license-home', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': compact, + 'jurisdiction': 'oh', + 'licenseType': 'audiologist', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'givenName': 'John', + 'familyName': 'Doe', + 'dateOfIssuance': '2020-01-01', + 'dateOfRenewal': '2024-01-01', + 'dateOfExpiration': '2025-12-31', + 'npi': '1234567890', + 'licenseNumber': 'AUD-12345', + } + ], + # Provider has THREE privileges + 'privileges': [ + { + 'type': 'privilege', + 'providerId': provider_id, + 'compact': compact, + 'jurisdiction': 'ky', + 'licenseJurisdiction': 'oh', + 'licenseType': 'audiologist', + 'dateOfIssuance': '2024-01-15', + 'dateOfRenewal': '2024-01-15', + 'dateOfExpiration': '2025-01-15', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'administratorSetStatus': 'active', + 'privilegeId': 'PRIV-KY-001', + 'status': 'active', + }, + { + 'type': 'privilege', + 'providerId': provider_id, + 'compact': compact, + 'jurisdiction': 'ne', + 'licenseJurisdiction': 'oh', + 'licenseType': 'audiologist', + 'dateOfIssuance': '2024-02-01', + 'dateOfRenewal': '2024-02-01', + 'dateOfExpiration': '2025-02-01', + 'dateOfUpdate': '2024-02-01T10:30:00+00:00', + 'administratorSetStatus': 'active', + 'privilegeId': 'PRIV-NE-001', + 'status': 'active', + }, + { + 'type': 'privilege', + 'providerId': provider_id, + 'compact': compact, + 'jurisdiction': 'co', + 'licenseJurisdiction': 'oh', + 'licenseType': 'audiologist', + 'dateOfIssuance': '2024-03-01', + 'dateOfRenewal': '2024-03-01', + 'dateOfExpiration': '2025-03-01', + 'dateOfUpdate': '2024-03-01T10:30:00+00:00', + 'administratorSetStatus': 'inactive', + 'privilegeId': 'PRIV-CO-001', + 'status': 'inactive', + }, + ], + }, + # No inner_hits - regular query without nested + } + + search_response = { + 'hits': { + 'total': {'value': 1, 'relation': 'eq'}, + 'hits': [hit], + } + } + self._when_testing_mock_opensearch_client(mock_opensearch_client, search_response=search_response) + + event = self._create_api_event('aslp', body={'query': {'match_all': {}}}) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(200, response['statusCode']) + body = json.loads(response['body']) + + # Verify response contains fileUrl + self.assertIn('fileUrl', body) + + # Verify the CSV contains all 3 privileges + import boto3 + + s3_client = boto3.client('s3') + response = s3_client.list_objects_v2( + Bucket='test-export-results-bucket', Prefix='compact/aslp/privilegeSearch/caller/test-user-id' + ) + key = response['Contents'][0]['Key'] + csv_obj = s3_client.get_object(Bucket='test-export-results-bucket', Key=key) + csv_content = csv_obj['Body'].read().decode('utf-8') + + lines = csv_content.strip().split('\n') + self.assertEqual(4, len(lines)) # Header + 3 data rows + self.assertEqual( + 'type,providerId,compact,jurisdiction,licenseType,privilegeId,status,compactEligibility,dateOfExpiration,dateOfIssuance,dateOfRenewal,familyName,givenName,middleName,suffix,licenseJurisdiction,licenseStatus,licenseStatusName,licenseNumber,npi\r', + lines[0], + ) + self.assertEqual( + 'statePrivilege,00000000-0000-0000-0000-000000000001,aslp,ky,audiologist,PRIV-KY-001,active,eligible,2025-01-15,2024-01-15,2024-01-15,Doe,John,,,oh,active,,AUD-12345,1234567890\r', + lines[1], + ) + self.assertEqual( + 'statePrivilege,00000000-0000-0000-0000-000000000001,aslp,ne,audiologist,PRIV-NE-001,active,eligible,2025-02-01,2024-02-01,2024-02-01,Doe,John,,,oh,active,,AUD-12345,1234567890\r', + lines[2], + ) + self.assertEqual( + 'statePrivilege,00000000-0000-0000-0000-000000000001,aslp,co,audiologist,PRIV-CO-001,inactive,eligible,2025-03-01,2024-03-01,2024-03-01,Doe,John,,,oh,active,,AUD-12345,1234567890', + lines[3], + ) + + def test_unsupported_route_returns_400(self): + """Test that unsupported routes return a 400 error.""" + from handlers.search import search_api_handler + + # Create event with unsupported route + event = self._create_api_event(compact='aslp', resource_override='/v1/compacts/aslp/unknown/search') + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(400, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('Unsupported method or resource', body['message']) + + def test_missing_scopes_returns_403(self): + """Test that missing auth scope returns a 403 error.""" + from handlers.search import search_api_handler + + # Create event with unsupported route + event = self._create_api_event(compact='aslp', scopes_override='openid email') + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(403, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('Access denied', body['message']) + + def test_export_query_with_index_key_returns_400(self): + """Test that export queries containing 'index' key are rejected with 400 error.""" + from handlers.search import search_api_handler + + # Test with 'index' key (terms lookup attack pattern) + event = self._create_api_event( + 'aslp', + body={ + 'query': { + 'terms': { + 'providerId': { + 'index': 'compact_octp_providers', + 'id': 'some-uuid', + 'path': 'providerId', + } + } + } + }, + ) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(400, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('Cross-index queries are not allowed', body['message']) + self.assertIn("'index'", body['message']) + + def test_export_query_with_underscore_index_key_returns_400(self): + """Test that export queries containing '_index' key are rejected with 400 error.""" + from handlers.search import search_api_handler + + # Test with '_index' key (more_like_this attack pattern) + event = self._create_api_event( + 'aslp', + body={ + 'query': { + 'more_like_this': { + 'fields': ['familyName', 'givenName'], + 'like': [ + { + '_index': 'compact_octp_providers', + '_id': 'target-provider-uuid', + } + ], + } + } + }, + ) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(400, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('Cross-index queries are not allowed', body['message']) + self.assertIn("'_index'", body['message']) + + def test_export_query_with_nested_index_key_returns_400(self): + """Test that export queries with nested 'index' key at any level are rejected.""" + from handlers.search import search_api_handler + + # Test with 'index' key nested deep in the query structure + event = self._create_api_event( + 'aslp', + body={ + 'query': { + 'bool': { + 'should': [ + { + 'terms': { + 'familyName.keyword': { + 'index': 'compact_octp_providers', + 'id': 'target-uuid', + 'path': 'familyName.keyword', + } + } + } + ] + } + } + }, + ) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(400, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('Cross-index queries are not allowed', body['message']) + self.assertIn("'index'", body['message']) + + @patch('handlers.search.opensearch_client') + def test_privilege_with_mismatched_compact_is_filtered_from_response(self, mock_opensearch_client): + """Test that a privilege with a compact field that doesn't match the path parameter is filtered from results.""" + from handlers.search import search_api_handler + + provider_id = '00000000-0000-0000-0000-000000000001' + # Create a provider hit with a privilege that has a different compact than the path parameter + hit = { + '_index': 'compact_aslp_providers', + '_id': provider_id, + '_score': 1.0, + '_source': { + 'providerId': provider_id, + 'type': 'provider', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': 'aslp', + 'licenseJurisdiction': 'oh', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'givenName': 'John', + 'familyName': 'Doe', + 'dateOfExpiration': '2025-12-31', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'birthMonthDay': '06-15', + 'licenses': [ + { + 'providerId': provider_id, + 'type': 'license-home', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': 'aslp', + 'jurisdiction': 'oh', + 'licenseType': 'audiologist', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'givenName': 'John', + 'familyName': 'Doe', + 'dateOfIssuance': '2020-01-01', + 'dateOfRenewal': '2024-01-01', + 'dateOfExpiration': '2025-12-31', + 'npi': '1234567890', + 'licenseNumber': 'AUD-12345', + } + ], + 'privileges': [ + { + 'type': 'privilege', + 'providerId': provider_id, + 'compact': 'octp', # Different from path parameter 'aslp' + 'jurisdiction': 'ky', + 'licenseJurisdiction': 'oh', + 'licenseType': 'audiologist', + 'dateOfIssuance': '2024-01-15', + 'dateOfRenewal': '2024-01-15', + 'dateOfExpiration': '2025-01-15', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'administratorSetStatus': 'active', + 'privilegeId': 'PRIV-001', + 'status': 'active', + } + ], + }, + } + search_response = { + 'hits': { + 'total': {'value': 1, 'relation': 'eq'}, + 'hits': [hit], + } + } + self._when_testing_mock_opensearch_client(mock_opensearch_client, search_response=search_response) + + # Currently, with our safeguards in place, it is not possible for a bad actor to reach across + # indices when searching. This may change in the future with new OpenSearch features that are added + # over time. Because we don't have a valid query to trigger this branch of logic, we're just using a + # generic query here in place of some future query that can get past our safeguards and search provider + # data across compact indices. The mock above is returning a provider from a different compact to + # trigger the branch of logic where we catch this discrepancy, log the error so an alert fires, and + # filter the document from the response + custom_query = {'match_all': {}} + + # Request for 'aslp' compact but privilege has 'octp' compact + event = self._create_api_event('aslp', body={'query': custom_query}) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(404, response['statusCode']) + body = json.loads(response['body']) + self.assertEqual('The search parameters did not match any privileges.', body['message']) diff --git a/backend/compact-connect/lambdas/python/search/tests/function/test_search_providers.py b/backend/compact-connect/lambdas/python/search/tests/function/test_search_providers.py new file mode 100644 index 000000000..63860110b --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/tests/function/test_search_providers.py @@ -0,0 +1,524 @@ +import json +from unittest.mock import patch + +from cc_common.exceptions import CCInvalidRequestException +from moto import mock_aws + +from . import TstFunction + + +@mock_aws +class TestSearchProviders(TstFunction): + """Test suite for search_api_handler - provider search functionality.""" + + def setUp(self): + super().setUp() + + def _create_api_event( + self, + compact: str, + body: dict = None, + scopes_override: str = None, + ) -> dict: + """Create a standard API Gateway event for search_providers.""" + return { + 'resource': '/v1/compacts/{compact}/providers/search', + 'path': f'/v1/compacts/{compact}/providers/search', + 'httpMethod': 'POST', + 'headers': { + 'accept': 'application/json', + 'content-type': 'application/json', + 'Content-Type': 'application/json', + 'origin': 'https://example.org', + 'Host': 'api.test.example.com', + }, + 'multiValueHeaders': {}, + 'queryStringParameters': None, + 'pathParameters': {'compact': compact}, + 'requestContext': { + 'resourcePath': '/v1/compacts/{compact}/providers/search', + 'httpMethod': 'POST', + 'authorizer': { + 'claims': { + 'sub': 'test-user-id', + 'cognito:username': 'test-user', + 'scope': f'openid email {compact}/readGeneral' if not scopes_override else scopes_override, + } + }, + }, + 'body': json.dumps(body) if body else None, + 'isBase64Encoded': False, + } + + def _when_testing_mock_opensearch_client(self, mock_opensearch_client, search_response: dict = None): + """ + Configure the mock OpenSearchClient for testing. + + :param mock_opensearch_client: The patched opensearch_client instance + :param search_response: The response to return from the search method + :return: The mock client instance + """ + if not search_response: + search_response = { + 'hits': { + 'total': {'value': 0, 'relation': 'eq'}, + 'hits': [], + } + } + + # mock_opensearch_client is the patched instance, not the class + mock_opensearch_client.search.return_value = search_response + return mock_opensearch_client + + def _create_mock_provider_hit( + self, + provider_id: str = '00000000-0000-0000-0000-000000000001', + compact: str = 'aslp', + sort_values: list = None, + ) -> dict: + """Create a mock OpenSearch hit for a provider document.""" + hit = { + '_index': f'compact_{compact}_providers', + '_id': provider_id, + '_score': 1.0, + '_source': { + 'providerId': provider_id, + 'type': 'provider', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': compact, + 'licenseJurisdiction': 'oh', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'givenName': 'John', + 'familyName': 'Doe', + 'dateOfExpiration': '2025-12-31', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'birthMonthDay': '06-15', + # adding a couple of fields that are not recognized in the + # ProviderGeneralResponseSchema. Although these are not currently + # stored in OpenSearch, this mock data ensures we are sanitizing + # these private fields by the search serialization logic + 'someNewField': 'somePrivateValue', + 'ssnLastFour': '1234', + 'emailAddress': 'someemail@address.com', + 'dateOfBirth': '1984-12-11', + }, + } + if sort_values: + hit['sort'] = sort_values + return hit + + @patch('handlers.search.opensearch_client') + def test_basic_search_with_match_all_query(self, mock_opensearch_client): + """Test that a basic search with no query uses match_all.""" + from handlers.search import search_api_handler + + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + # Create event with minimal body - just the required query field + event = self._create_api_event(compact='aslp', body={'query': {'match_all': {}}}) + + response = search_api_handler(event, self.mock_context) + + # Verify search was called + mock_opensearch_client.search.assert_called_once() + + # Verify the search was called with correct parameters + mock_opensearch_client.search.assert_called_once_with( + index_name='compact_aslp_providers', body={'query': {'match_all': {}}, 'size': 100} + ) + + # Verify response structure + self.assertEqual(200, response['statusCode']) + body = json.loads(response['body']) + self.assertEqual({'providers': [], 'total': {'relation': 'eq', 'value': 0}}, body) + + @patch('handlers.search.opensearch_client') + def test_search_with_custom_query(self, mock_opensearch_client): + """Test that a custom OpenSearch query is passed through correctly.""" + from handlers.search import search_api_handler + + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + # Create a custom bool query + custom_query = { + 'bool': { + 'must': [ + {'match': {'givenName': 'John'}}, + {'term': {'licenseStatus': 'active'}}, + ] + } + } + event = self._create_api_event('aslp', body={'query': custom_query, 'from': 20}) + + search_api_handler(event, self.mock_context) + + # Verify the custom query was passed through + mock_opensearch_client.search.assert_called_once_with( + index_name='compact_aslp_providers', + body={ + 'query': {'bool': {'must': [{'match': {'givenName': 'John'}}, {'term': {'licenseStatus': 'active'}}]}}, + 'size': 100, + 'from': 20, + }, + ) + + @patch('handlers.search.opensearch_client') + def test_search_size_capped_at_max(self, mock_opensearch_client): + """Test that size parameter is capped at MAX_SIZE (100).""" + from handlers.search import search_api_handler + + # Request size larger than MAX_SIZE + event = self._create_api_event('aslp', body={'query': {'match_all': {}}, 'size': 500}) + + result = search_api_handler(event, self.mock_context) + self.assertEqual(400, result['statusCode']) + self.assertEqual( + { + 'message': 'Invalid request: ' + "{'size': ['Must be greater than or equal to 1 and less than or equal to 100.']}" + }, + json.loads(result['body']), + ) + mock_opensearch_client.search.assert_not_called() + + @patch('handlers.search.opensearch_client') + def test_search_with_sort_parameter(self, mock_opensearch_client): + """Test that sort parameter is included in the search body.""" + from handlers.search import search_api_handler + + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + sort_config = [{'providerId': 'asc'}, {'dateOfUpdate': 'desc'}] + search_after_values = ['provider-uuid-123'] + event = self._create_api_event( + 'aslp', + body={ + 'query': {'match_all': {}}, + 'sort': sort_config, + 'search_after': search_after_values, + }, + ) + + search_api_handler(event, self.mock_context) + + mock_opensearch_client.search.assert_called_once_with( + index_name='compact_aslp_providers', + body={ + 'query': {'match_all': {}}, + 'size': 100, + 'sort': sort_config, + 'search_after': search_after_values, + }, + ) + + @patch('handlers.search.opensearch_client') + def test_search_after_without_sort_returns_400(self, mock_opensearch_client): + """Test that search_after without sort raises an error.""" + from handlers.search import search_api_handler + + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + # search_after without sort should fail + event = self._create_api_event( + 'aslp', + body={ + 'query': {'match_all': {}}, + 'search_after': ['provider-uuid-123'], + }, + ) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(400, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('sort is required when using search_after pagination', body['message']) + + def test_invalid_request_body_returns_400(self): + """Test that an invalid request body returns a 400 error.""" + from handlers.search import search_api_handler + + # Create event with missing required 'query' field + event = self._create_api_event('aslp', body={'size': 10}) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(400, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('Invalid request', body['message']) + + @patch('handlers.search.opensearch_client') + def test_search_returns_sanitized_providers(self, mock_opensearch_client): + """Test that provider records are sanitized through ProviderGeneralResponseSchema.""" + from handlers.search import search_api_handler + + # Create a mock response with provider hits + mock_hit = self._create_mock_provider_hit() + search_response = { + 'hits': { + 'total': {'value': 1, 'relation': 'eq'}, + 'hits': [mock_hit], + } + } + self._when_testing_mock_opensearch_client(mock_opensearch_client, search_response=search_response) + + event = self._create_api_event('aslp', body={'query': {'match_all': {}}}) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(200, response['statusCode']) + body = json.loads(response['body']) + self.assertEqual( + { + 'providers': [ + { + 'birthMonthDay': '06-15', + 'compact': 'aslp', + 'compactEligibility': 'eligible', + 'dateOfExpiration': '2025-12-31', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'familyName': 'Doe', + 'givenName': 'John', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'jurisdictionUploadedLicenseStatus': 'active', + 'licenseJurisdiction': 'oh', + 'licenseStatus': 'active', + 'privilegeJurisdictions': [], + 'providerId': '00000000-0000-0000-0000-000000000001', + 'type': 'provider', + } + ], + 'total': {'relation': 'eq', 'value': 1}, + }, + body, + ) + + @patch('handlers.search.opensearch_client') + def test_search_response_includes_last_sort_for_pagination(self, mock_opensearch_client): + """Test that lastSort is included in response for search_after pagination.""" + from handlers.search import search_api_handler + + # Create hits with sort values + mock_hit = self._create_mock_provider_hit(sort_values=['provider-uuid-123', '2024-01-15T10:30:00+00:00']) + search_response = { + 'hits': { + 'total': {'value': 1, 'relation': 'eq'}, + 'hits': [mock_hit], + } + } + self._when_testing_mock_opensearch_client(mock_opensearch_client, search_response=search_response) + + event = self._create_api_event( + 'aslp', + body={ + 'query': {'match_all': {}}, + 'sort': [{'providerId': 'asc'}, {'dateOfUpdate': 'asc'}], + }, + ) + + response = search_api_handler(event, self.mock_context) + + body = json.loads(response['body']) + self.assertIn('lastSort', body) + self.assertEqual(['provider-uuid-123', '2024-01-15T10:30:00+00:00'], body['lastSort']) + + @patch('handlers.search.opensearch_client') + def test_search_uses_correct_index_for_compact(self, mock_opensearch_client): + """Test that the correct index name is used based on the compact parameter.""" + from handlers.search import search_api_handler + + self._when_testing_mock_opensearch_client(mock_opensearch_client) + + # Test with different compacts + for compact in ['aslp', 'octp', 'coun']: + mock_opensearch_client.reset_mock() + + event = self._create_api_event(compact, body={'query': {'match_all': {}}}) + search_api_handler(event, self.mock_context) + + call_args = mock_opensearch_client.search.call_args + self.assertEqual(f'compact_{compact}_providers', call_args.kwargs['index_name']) + + def test_missing_scopes_returns_403(self): + """Test that missing auth scope returns a 403 error.""" + from handlers.search import search_api_handler + + # Create event with unsupported route + event = self._create_api_event(compact='aslp', scopes_override='openid email') + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(403, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('Access denied', body['message']) + + def test_query_with_index_key_returns_400(self): + """Test that queries containing 'index' key are rejected with 400 error.""" + from handlers.search import search_api_handler + + # Test with 'index' key (terms lookup attack pattern) + event = self._create_api_event( + 'aslp', + body={ + 'query': { + 'terms': { + 'providerId': { + 'index': 'compact_octp_providers', + 'id': 'some-uuid', + 'path': 'providerId', + } + } + } + }, + ) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(400, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('Cross-index queries are not allowed', body['message']) + self.assertIn("'index'", body['message']) + + def test_query_with_underscore_index_key_returns_400(self): + """Test that queries containing '_index' key are rejected with 400 error.""" + from handlers.search import search_api_handler + + # Test with '_index' key (more_like_this attack pattern) + event = self._create_api_event( + 'aslp', + body={ + 'query': { + 'more_like_this': { + 'fields': ['familyName', 'givenName'], + 'like': [ + { + '_index': 'compact_octp_providers', + '_id': 'target-provider-uuid', + } + ], + } + } + }, + ) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(400, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('Cross-index queries are not allowed', body['message']) + self.assertIn("'_index'", body['message']) + + def test_query_with_nested_index_key_returns_400(self): + """Test that queries with nested 'index' key at any level are rejected.""" + from handlers.search import search_api_handler + + # Test with 'index' key nested deep in the query structure + event = self._create_api_event( + 'aslp', + body={ + 'query': { + 'bool': { + 'should': [ + { + 'terms': { + 'familyName.keyword': { + 'index': 'compact_octp_providers', + 'id': 'target-uuid', + 'path': 'familyName.keyword', + } + } + } + ] + } + } + }, + ) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(400, response['statusCode']) + body = json.loads(response['body']) + self.assertIn('Cross-index queries are not allowed', body['message']) + self.assertIn("'index'", body['message']) + + @patch('handlers.search.opensearch_client') + def test_opensearch_request_error_returns_400_with_error_message(self, mock_opensearch_client): + """Test that OpenSearch RequestError with status 400 returns error message to caller.""" + from handlers.search import search_api_handler + + # Create a RequestError with realistic OpenSearch error structure + error_reason = ( + 'Invalid search query: Text fields are not optimised for operations that require per-document field data ' + 'like aggregations and sorting, so these operations are disabled by default. ' + 'Please use a keyword field instead.' + ) + mock_opensearch_client.search.side_effect = CCInvalidRequestException(error_reason) + + event = self._create_api_event( + 'aslp', + body={ + 'query': {'match_all': {}}, + 'sort': [{'familyName': 'asc'}], # Sorting on text field causes this error + }, + ) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(400, response['statusCode']) + body = json.loads(response['body']) + self.assertEqual(error_reason, body['message']) + + @patch('handlers.search.opensearch_client') + def test_provider_with_mismatched_compact_is_filtered_from_response(self, mock_opensearch_client): + """Test that a provider with a compact field that doesn't match the path parameter is filtered from results.""" + from handlers.search import search_api_handler + + # Create a provider hit with a different compact than the path parameter + provider_id = '00000000-0000-0000-0000-000000000001' + hit = { + '_index': 'compact_aslp_providers', + '_id': provider_id, + '_score': 1.0, + '_source': { + 'providerId': provider_id, + 'type': 'provider', + 'dateOfUpdate': '2024-01-15T10:30:00+00:00', + 'compact': 'octp', # Different from path parameter 'aslp' + 'licenseJurisdiction': 'oh', + 'licenseStatus': 'active', + 'compactEligibility': 'eligible', + 'givenName': 'John', + 'familyName': 'Doe', + 'dateOfExpiration': '2025-12-31', + 'jurisdictionUploadedLicenseStatus': 'active', + 'jurisdictionUploadedCompactEligibility': 'eligible', + 'birthMonthDay': '06-15', + }, + } + search_response = { + 'hits': { + 'total': {'value': 1, 'relation': 'eq'}, + 'hits': [hit], + } + } + self._when_testing_mock_opensearch_client(mock_opensearch_client, search_response=search_response) + + # Currently, with our safeguards in place, it is not possible for a bad actor to reach across + # indices when searching. This may change in the future with new OpenSearch features that are added + # over time. Because we don't have a valid query to trigger this branch of logic, we're just using a + # generic query here in place of some future query that can get past our safeguards and search provider + # data across compact indices. The mock above is returning a provider from a different compact to + # trigger the branch of logic where we catch this discrepancy, log the error so an alert fires, and + # filter the document from the response + custom_query = {'match_all': {}} + + # Request for 'aslp' compact but provider has 'octp' compact + event = self._create_api_event('aslp', body={'query': custom_query}) + + response = search_api_handler(event, self.mock_context) + + self.assertEqual(200, response['statusCode']) + body = json.loads(response['body']) + # should be empty list with total value of 0 + self.assertEqual({'providers': [], 'total': {'relation': 'eq', 'value': 0}}, body) diff --git a/backend/compact-connect/lambdas/python/search/tests/unit/__init__.py b/backend/compact-connect/lambdas/python/search/tests/unit/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/compact-connect/lambdas/python/search/tests/unit/test_opensearch_client.py b/backend/compact-connect/lambdas/python/search/tests/unit/test_opensearch_client.py new file mode 100644 index 000000000..62e5eb630 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/tests/unit/test_opensearch_client.py @@ -0,0 +1,498 @@ +# ruff: noqa ARG002 unused-argument +from unittest import TestCase +from unittest.mock import MagicMock, patch + +from cc_common.exceptions import CCInternalException, CCInvalidRequestException +from opensearchpy.exceptions import ConnectionTimeout, RequestError, TransportError + + +class TestOpenSearchClient(TestCase): + """Test suite for OpenSearchClient to verify internal client calls.""" + + def _create_client_with_mock(self): + """Create an OpenSearchClient with a mocked internal client.""" + with ( + patch('opensearch_client.boto3'), + patch('opensearch_client.config'), + patch('opensearch_client.OpenSearch') as mock_opensearch_class, + ): + mock_internal_client = MagicMock() + mock_opensearch_class.return_value = mock_internal_client + + from opensearch_client import OpenSearchClient + + client = OpenSearchClient() + return client, mock_internal_client + + def test_create_index_calls_internal_client_with_expected_arguments(self): + """Test that create_index calls the internal client's indices.create method correctly.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + index_mapping = { + 'settings': {'number_of_shards': 1}, + 'mappings': {'properties': {'field1': {'type': 'text'}}}, + } + + client.create_index(index_name=index_name, index_mapping=index_mapping) + + mock_internal_client.indices.create.assert_called_once_with( + index=index_name, + body=index_mapping, + ) + + def test_index_exists_calls_internal_client_with_expected_arguments(self): + """Test that index_exists calls the internal client's indices.exists method correctly.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + mock_internal_client.indices.exists.return_value = True + + result = client.index_exists(index_name=index_name) + + mock_internal_client.indices.exists.assert_called_once_with(index=index_name) + self.assertTrue(result) + + def test_alias_exists_calls_internal_client_with_expected_arguments(self): + """Test that alias_exists calls the internal client's indices.exists_alias method correctly.""" + client, mock_internal_client = self._create_client_with_mock() + + alias_name = 'test_alias' + mock_internal_client.indices.exists_alias.return_value = True + + result = client.alias_exists(alias_name=alias_name) + + mock_internal_client.indices.exists_alias.assert_called_once_with(name=alias_name) + self.assertTrue(result) + + def test_alias_exists_returns_false_when_alias_does_not_exist(self): + """Test that alias_exists returns False when the alias does not exist.""" + client, mock_internal_client = self._create_client_with_mock() + + alias_name = 'nonexistent_alias' + mock_internal_client.indices.exists_alias.return_value = False + + result = client.alias_exists(alias_name=alias_name) + + mock_internal_client.indices.exists_alias.assert_called_once_with(name=alias_name) + self.assertFalse(result) + + def test_create_alias_calls_internal_client_with_expected_arguments(self): + """Test that create_alias calls the internal client's indices.put_alias method correctly.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index_v1' + alias_name = 'test_alias' + + client.create_alias(index_name=index_name, alias_name=alias_name) + + mock_internal_client.indices.put_alias.assert_called_once_with(index=index_name, name=alias_name) + + def test_search_calls_internal_client_with_expected_arguments(self): + """Test that search calls the internal client's search method correctly.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + query_body = { + 'query': { + 'match': {'givenName': 'John'}, + }, + } + expected_response = { + 'hits': { + 'total': {'value': 1}, + 'hits': [{'_source': {'givenName': 'John', 'familyName': 'Doe'}}], + }, + } + mock_internal_client.search.return_value = expected_response + + result = client.search(index_name=index_name, body=query_body) + + mock_internal_client.search.assert_called_once_with(index=index_name, body=query_body) + self.assertEqual(expected_response, result) + + def test_search_raises_cc_invalid_request_exception_on_400_request_error(self): + """Test that search raises CCInvalidRequestException when OpenSearch returns a 400 RequestError.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + query_body = {'query': {'match_all': {}}, 'sort': [{'familyName': 'asc'}]} + + # Simulate OpenSearch returning a 400 error with realistic error structure + error_reason = ( + 'Text fields are not optimised for operations that require per-document field data ' + 'like aggregations and sorting, so these operations are disabled by default.' + ) + error_info = { + 'error': { + 'root_cause': [ + { + 'type': 'illegal_argument_exception', + 'reason': error_reason, + } + ], + 'type': 'search_phase_execution_exception', + 'reason': 'all shards failed', + }, + 'status': 400, + } + mock_internal_client.search.side_effect = RequestError(400, 'search_phase_execution_exception', error_info) + + with self.assertRaises(CCInvalidRequestException) as context: + client.search(index_name=index_name, body=query_body) + + # Verify the exception message extracts the reason from root_cause + self.assertEqual( + f'Invalid search query: {error_reason}', + str(context.exception), + ) + + def test_search_raises_cc_invalid_request_exception_with_fallback_on_missing_root_cause(self): + """Test that search falls back to error type when root_cause is missing.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + query_body = {'query': {'match_all': {}}} + + # Simulate OpenSearch returning a 400 error without root_cause structure + mock_internal_client.search.side_effect = RequestError(400, 'parsing_exception', None) + + with self.assertRaises(CCInvalidRequestException) as context: + client.search(index_name=index_name, body=query_body) + + # Verify the exception falls back to the error type + self.assertEqual( + 'Invalid search query: parsing_exception', + str(context.exception), + ) + + def test_search_reraises_non_400_request_error(self): + """Test that search re-raises RequestError for non-400 status codes.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + query_body = {'query': {'match_all': {}}} + + # Simulate OpenSearch returning a 500 error + mock_internal_client.search.side_effect = RequestError(500, 'internal_error', 'Something went wrong') + + with self.assertRaises(RequestError) as context: + client.search(index_name=index_name, body=query_body) + + self.assertEqual(500, context.exception.status_code) + + def test_search_raises_cc_invalid_request_exception_on_timeout(self): + """Test that search raises CCInvalidRequestException when the request times out.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + query_body = {'query': {'match_all': {}}} + + # Simulate OpenSearch timing out + mock_internal_client.search.side_effect = ConnectionTimeout('Connection timed out', 503, 'Read timed out') + + with self.assertRaises(CCInvalidRequestException) as context: + client.search(index_name=index_name, body=query_body) + + # Verify the exception message tells the user to try again + self.assertEqual( + 'Search request timed out. Please try again or narrow your search criteria.', + str(context.exception), + ) + + def test_bulk_index_calls_internal_client_with_expected_arguments(self): + """Test that bulk_index calls the internal client's bulk method correctly.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + documents = [ + {'providerId': 'provider-1', 'givenName': 'John', 'familyName': 'Doe'}, + {'providerId': 'provider-2', 'givenName': 'Jane', 'familyName': 'Smith'}, + ] + expected_response = { + 'errors': False, + 'items': [{'index': {'_id': 'provider-1'}}, {'index': {'_id': 'provider-2'}}], + } + mock_internal_client.bulk.return_value = expected_response + + result = client.bulk_index(index_name=index_name, documents=documents) + + expected_actions = [ + {'index': {'_id': 'provider-1'}}, + {'providerId': 'provider-1', 'givenName': 'John', 'familyName': 'Doe'}, + {'index': {'_id': 'provider-2'}}, + {'providerId': 'provider-2', 'givenName': 'Jane', 'familyName': 'Smith'}, + ] + mock_internal_client.bulk.assert_called_once_with(body=expected_actions, index=index_name) + self.assertEqual(expected_response, result) + + def test_bulk_index_uses_custom_id_field(self): + """Test that bulk_index uses a custom id_field when specified.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + documents = [ + {'customId': 'custom-1', 'name': 'Document 1'}, + {'customId': 'custom-2', 'name': 'Document 2'}, + ] + mock_internal_client.bulk.return_value = {'errors': False, 'items': []} + + client.bulk_index(index_name=index_name, documents=documents, id_field='customId') + + expected_actions = [ + {'index': {'_id': 'custom-1'}}, + {'customId': 'custom-1', 'name': 'Document 1'}, + {'index': {'_id': 'custom-2'}}, + {'customId': 'custom-2', 'name': 'Document 2'}, + ] + mock_internal_client.bulk.assert_called_once_with(body=expected_actions, index=index_name) + + def test_bulk_index_returns_early_for_empty_documents(self): + """Test that bulk_index returns early without calling the internal client for empty documents.""" + client, mock_internal_client = self._create_client_with_mock() + + result = client.bulk_index(index_name='test_index', documents=[]) + + mock_internal_client.bulk.assert_not_called() + self.assertEqual({'items': [], 'errors': False}, result) + + @patch('opensearch_client.time.sleep') + def test_bulk_index_retries_on_connection_timeout_and_succeeds(self, mock_sleep): + """Test that bulk_index retries on ConnectionTimeout and eventually succeeds.""" + from opensearch_client import INITIAL_BACKOFF_SECONDS + + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + documents = [{'providerId': 'provider-1', 'givenName': 'John'}] + expected_response = {'errors': False, 'items': [{'index': {'_id': 'provider-1'}}]} + + # First two calls fail with ConnectionTimeout, third succeeds + mock_internal_client.bulk.side_effect = [ + ConnectionTimeout('Connection timed out', 503, 'some error'), + ConnectionTimeout('Connection timed out', 503, 'some error'), + expected_response, + ] + + result = client.bulk_index(index_name=index_name, documents=documents) + + # Verify bulk was called 3 times + self.assertEqual(3, mock_internal_client.bulk.call_count) + # Verify sleep was called with exponential backoff + self.assertEqual(2, mock_sleep.call_count) + mock_sleep.assert_any_call(INITIAL_BACKOFF_SECONDS) + mock_sleep.assert_any_call(INITIAL_BACKOFF_SECONDS * 2) + # Verify we got the successful response + self.assertEqual(expected_response, result) + + @patch('opensearch_client.time.sleep') + def test_bulk_index_retries_on_transport_error_and_succeeds(self, mock_sleep): + """Test that bulk_index retries on TransportError and eventually succeeds.""" + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + documents = [{'providerId': 'provider-1', 'givenName': 'John'}] + expected_response = {'errors': False, 'items': [{'index': {'_id': 'provider-1'}}]} + + # First call fails with TransportError, second succeeds + mock_internal_client.bulk.side_effect = [ + TransportError(503, 'ReadTimeout'), + expected_response, + ] + + result = client.bulk_index(index_name=index_name, documents=documents) + + # Verify bulk was called 2 times + self.assertEqual(2, mock_internal_client.bulk.call_count) + # Verify sleep was called once + self.assertEqual(1, mock_sleep.call_count) + self.assertEqual(expected_response, result) + + @patch('opensearch_client.time.sleep') + def test_bulk_index_raises_cc_internal_exception_after_max_retries(self, mock_sleep): + """Test that bulk_index raises CCInternalException after all retry attempts fail.""" + from cc_common.exceptions import CCInternalException + from opensearch_client import MAX_RETRY_ATTEMPTS + + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + documents = [{'providerId': 'provider-1', 'givenName': 'John'}] + + # All calls fail with ConnectionTimeout + mock_internal_client.bulk.side_effect = ConnectionTimeout('Connection timed out', 503, 'some error') + + with self.assertRaises(CCInternalException) as context: + client.bulk_index(index_name=index_name, documents=documents) + + # Verify bulk was called MAX_RETRY_ATTEMPTS times + self.assertEqual(MAX_RETRY_ATTEMPTS, mock_internal_client.bulk.call_count) + # Verify sleep was called MAX_RETRY_ATTEMPTS - 1 times (no sleep after last failure) + self.assertEqual(MAX_RETRY_ATTEMPTS - 1, mock_sleep.call_count) + # Verify the exception message contains useful info + self.assertIn('Failed to bulk index', str(context.exception)) + self.assertIn(index_name, str(context.exception)) + self.assertIn(str(MAX_RETRY_ATTEMPTS), str(context.exception)) + + @patch('opensearch_client.time.sleep') + def test_bulk_index_exponential_backoff_caps_at_max(self, mock_sleep): + """Test that exponential backoff is capped at MAX_BACKOFF_SECONDS.""" + from opensearch_client import MAX_BACKOFF_SECONDS + + client, mock_internal_client = self._create_client_with_mock() + + index_name = 'test_index' + documents = [{'providerId': 'provider-1', 'givenName': 'John'}] + + # All calls fail + mock_internal_client.bulk.side_effect = ConnectionTimeout('Connection timed out', 503, 'some error') + + with self.assertRaises(CCInternalException): + client.bulk_index(index_name=index_name, documents=documents) + + # Verify backoff values: 2, 4, 8, 16 (all should be <= MAX_BACKOFF_SECONDS) + # With MAX_RETRY_ATTEMPTS = 5, we have 4 sleeps + sleep_calls = [call[0][0] for call in mock_sleep.call_args_list] + for sleep_value in sleep_calls: + self.assertLessEqual(sleep_value, MAX_BACKOFF_SECONDS) + + +class TestOpenSearchClientIndexManagementRetry(TestCase): + """Test suite for OpenSearchClient index management operations with retry logic.""" + + def _create_client_with_mock(self): + """Create an OpenSearchClient with a mocked internal client.""" + with ( + patch('opensearch_client.boto3'), + patch('opensearch_client.config'), + patch('opensearch_client.OpenSearch') as mock_opensearch_class, + ): + mock_internal_client = MagicMock() + mock_opensearch_class.return_value = mock_internal_client + + from opensearch_client import OpenSearchClient + + client = OpenSearchClient() + return client, mock_internal_client + + @patch('opensearch_client.time.sleep') + def test_create_index_retries_on_connection_timeout_and_succeeds(self, mock_sleep): + """Test that create_index retries on ConnectionTimeout and eventually succeeds.""" + from opensearch_client import INITIAL_BACKOFF_SECONDS + + client, mock_internal_client = self._create_client_with_mock() + + # First call fails, second succeeds + mock_internal_client.indices.create.side_effect = [ + ConnectionTimeout('Connection timed out', 503, 'some error'), + {'acknowledged': True}, + ] + + # Should not raise + client.create_index(index_name='test_index', index_mapping={'settings': {}}) + + # Verify create was called 2 times + self.assertEqual(2, mock_internal_client.indices.create.call_count) + # Verify sleep was called once + self.assertEqual(1, mock_sleep.call_count) + mock_sleep.assert_called_with(INITIAL_BACKOFF_SECONDS) + + @patch('opensearch_client.time.sleep') + def test_create_index_raises_after_max_retries(self, mock_sleep): + """Test that create_index raises CCInternalException after max retries.""" + from opensearch_client import MAX_RETRY_ATTEMPTS + + client, mock_internal_client = self._create_client_with_mock() + + # All calls fail + mock_internal_client.indices.create.side_effect = ConnectionTimeout('Connection timed out', 503, 'some error') + + with self.assertRaises(CCInternalException) as context: + client.create_index(index_name='test_index', index_mapping={'settings': {}}) + + # Verify create was called MAX_RETRY_ATTEMPTS times + self.assertEqual(MAX_RETRY_ATTEMPTS, mock_internal_client.indices.create.call_count) + self.assertIn('create_index', str(context.exception)) + + @patch('opensearch_client.time.sleep') + def test_index_exists_retries_on_transport_error_and_succeeds(self, mock_sleep): + """Test that index_exists retries on TransportError and eventually succeeds.""" + client, mock_internal_client = self._create_client_with_mock() + + # First call fails, second succeeds + mock_internal_client.indices.exists.side_effect = [ + TransportError(503, 'ReadTimeout'), + True, + ] + + result = client.index_exists(index_name='test_index') + + self.assertTrue(result) + self.assertEqual(2, mock_internal_client.indices.exists.call_count) + + @patch('opensearch_client.time.sleep') + def test_alias_exists_retries_on_connection_timeout_and_succeeds(self, mock_sleep): + """Test that alias_exists retries on ConnectionTimeout and eventually succeeds.""" + client, mock_internal_client = self._create_client_with_mock() + + # First call fails, second succeeds + mock_internal_client.indices.exists_alias.side_effect = [ + ConnectionTimeout('Connection timed out', 503, 'some error'), + True, + ] + + result = client.alias_exists(alias_name='test_alias') + + self.assertTrue(result) + self.assertEqual(2, mock_internal_client.indices.exists_alias.call_count) + + @patch('opensearch_client.time.sleep') + def test_create_alias_retries_on_connection_timeout_and_succeeds(self, mock_sleep): + """Test that create_alias retries on ConnectionTimeout and eventually succeeds.""" + client, mock_internal_client = self._create_client_with_mock() + + # First call fails, second succeeds + mock_internal_client.indices.put_alias.side_effect = [ + ConnectionTimeout('Connection timed out', 503, 'some error'), + {'acknowledged': True}, + ] + + # Should not raise + client.create_alias(index_name='test_index', alias_name='test_alias') + + self.assertEqual(2, mock_internal_client.indices.put_alias.call_count) + + @patch('opensearch_client.time.sleep') + def test_cluster_health_retries_on_connection_timeout_and_succeeds(self, mock_sleep): + """Test that cluster_health retries on ConnectionTimeout and eventually succeeds.""" + client, mock_internal_client = self._create_client_with_mock() + + expected_response = {'status': 'green', 'number_of_nodes': 3} + + # First call fails, second succeeds + mock_internal_client.cluster.health.side_effect = [ + ConnectionTimeout('Connection timed out', 503, 'some error'), + expected_response, + ] + + result = client.cluster_health() + + self.assertEqual(expected_response, result) + self.assertEqual(2, mock_internal_client.cluster.health.call_count) + + @patch('opensearch_client.time.sleep') + def test_cluster_health_raises_after_max_retries(self, mock_sleep): + """Test that cluster_health raises CCInternalException after max retries.""" + from opensearch_client import MAX_RETRY_ATTEMPTS + + client, mock_internal_client = self._create_client_with_mock() + + # All calls fail + mock_internal_client.cluster.health.side_effect = ConnectionTimeout('Connection timed out', 503, 'some error') + + with self.assertRaises(CCInternalException) as context: + client.cluster_health() + + # Verify health was called MAX_RETRY_ATTEMPTS times + self.assertEqual(MAX_RETRY_ATTEMPTS, mock_internal_client.cluster.health.call_count) + self.assertIn('cluster_health', str(context.exception)) diff --git a/backend/compact-connect/lambdas/python/search/utils.py b/backend/compact-connect/lambdas/python/search/utils.py new file mode 100644 index 000000000..04fd50aa5 --- /dev/null +++ b/backend/compact-connect/lambdas/python/search/utils.py @@ -0,0 +1,41 @@ +""" +Utility functions for provider document processing and OpenSearch indexing. + +This module contains shared logic for processing provider records and preparing +them for OpenSearch indexing. It is used by both the populate_provider_documents +and provider_update_ingest handlers. +""" + +import json + +from cc_common.config import config +from cc_common.data_model.schema.provider.api import ProviderGeneralResponseSchema +from cc_common.utils import ResponseEncoder + + +def generate_provider_opensearch_document(compact: str, provider_id: str) -> dict: + """ + Process a single provider and return the sanitized document ready for indexing. + + :param compact: The compact abbreviation + :param provider_id: The provider ID to process + :return: Sanitized document ready for indexing + :raises CCNotFoundException: If the provider is not found + :raises ValidationError: If the provider data fails schema validation + """ + # Get complete provider records + provider_user_records = config.data_client.get_provider_user_records( + compact=compact, + provider_id=provider_id, + consistent_read=True, + ) + + # Generate API response object with all nested records + api_response = provider_user_records.generate_api_response_object() + + # Sanitize using ProviderGeneralResponseSchema + schema = ProviderGeneralResponseSchema() + sanitized_document = schema.load(api_response) + + # Serialize using ResponseEncoder to convert sets to lists and datetime objects to strings + return json.loads(json.dumps(sanitized_document, cls=ResponseEncoder)) diff --git a/backend/compact-connect/lambdas/python/staff-user-pre-token/requirements-dev.txt b/backend/compact-connect/lambdas/python/staff-user-pre-token/requirements-dev.txt index fa4f50392..b4c5e0e2c 100644 --- a/backend/compact-connect/lambdas/python/staff-user-pre-token/requirements-dev.txt +++ b/backend/compact-connect/lambdas/python/staff-user-pre-token/requirements-dev.txt @@ -4,9 +4,9 @@ # # pip-compile --no-emit-index-url --no-strip-extras lambdas/python/staff-user-pre-token/requirements-dev.in # -boto3==1.41.0 +boto3==1.42.11 # via moto -botocore==1.41.0 +botocore==1.42.11 # via # boto3 # moto @@ -33,7 +33,7 @@ markupsafe==3.0.3 # via # jinja2 # werkzeug -moto[dynamodb,s3]==5.1.17 +moto[dynamodb,s3]==5.1.18 # via -r lambdas/python/staff-user-pre-token/requirements-dev.in py-partiql-parser==0.6.3 # via moto @@ -54,17 +54,17 @@ requests==2.32.5 # responses responses==0.25.8 # via moto -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # docker # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto xmltodict==1.0.2 # via moto diff --git a/backend/compact-connect/lambdas/python/staff-users/requirements-dev.txt b/backend/compact-connect/lambdas/python/staff-users/requirements-dev.txt index 54e10b15f..5d98ff0c5 100644 --- a/backend/compact-connect/lambdas/python/staff-users/requirements-dev.txt +++ b/backend/compact-connect/lambdas/python/staff-users/requirements-dev.txt @@ -4,9 +4,9 @@ # # pip-compile --no-emit-index-url --no-strip-extras lambdas/python/staff-users/requirements-dev.in # -boto3==1.41.0 +boto3==1.42.11 # via moto -botocore==1.41.0 +botocore==1.42.11 # via # boto3 # moto @@ -33,13 +33,13 @@ jmespath==1.0.1 # via # boto3 # botocore -joserfc==1.4.3 +joserfc==1.6.0 # via moto markupsafe==3.0.3 # via # jinja2 # werkzeug -moto[cognitoidp,dynamodb,s3]==5.1.17 +moto[cognitoidp,dynamodb,s3]==5.1.18 # via -r lambdas/python/staff-users/requirements-dev.in py-partiql-parser==0.6.3 # via moto @@ -60,19 +60,19 @@ requests==2.32.5 # responses responses==0.25.8 # via moto -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil -tzdata==2025.2 +tzdata==2025.3 # via faker -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # docker # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto xmltodict==1.0.2 # via moto diff --git a/backend/compact-connect/pipeline/backend_stage.py b/backend/compact-connect/pipeline/backend_stage.py index 6a83de8a9..cf897ea11 100644 --- a/backend/compact-connect/pipeline/backend_stage.py +++ b/backend/compact-connect/pipeline/backend_stage.py @@ -15,9 +15,12 @@ from stacks.persistent_stack import PersistentStack from stacks.provider_users import ProviderUsersStack from stacks.reporting_stack import ReportingStack +from stacks.search_api_stack import SearchApiStack +from stacks.search_persistent_stack import SearchPersistentStack from stacks.state_api_stack import StateApiStack from stacks.state_auth import StateAuthStack from stacks.transaction_monitoring_stack import TransactionMonitoringStack +from stacks.vpc_stack import VpcStack class BackendStage(Stage): @@ -38,6 +41,16 @@ def __init__( environment = Environment(account=environment_context['account_id'], region=environment_context['region']) + # VPC Stack - provides networking infrastructure for OpenSearch and Lambda functions + self.vpc_stack = VpcStack( + self, + 'VpcStack', + env=environment, + environment_context=environment_context, + standard_tags=standard_tags, + environment_name=environment_name, + ) + self.persistent_stack = PersistentStack( self, 'PersistentStack', @@ -220,3 +233,26 @@ def __init__( # Explicitly declare the dependency to ensure proper deployment order self.data_migration_stack.add_dependency(self.api_stack) self.data_migration_stack.add_dependency(self.event_listener_stack) + + # Search Persistent Stack - OpenSearch Domain for advanced provider search + self.search_persistent_stack = SearchPersistentStack( + self, + 'SearchPersistentStack', + env=environment, + environment_context=environment_context, + standard_tags=standard_tags, + environment_name=environment_name, + vpc_stack=self.vpc_stack, + persistent_stack=self.persistent_stack, + ) + + self.search_api_stack = SearchApiStack( + self, + 'SearchAPIStack', + env=environment, + environment_context=environment_context, + standard_tags=standard_tags, + environment_name=environment_name, + persistent_stack=self.persistent_stack, + search_persistent_stack=self.search_persistent_stack, + ) diff --git a/backend/compact-connect/requirements-dev.txt b/backend/compact-connect/requirements-dev.txt index 6ea674de6..c3230bdc7 100644 --- a/backend/compact-connect/requirements-dev.txt +++ b/backend/compact-connect/requirements-dev.txt @@ -18,17 +18,17 @@ charset-normalizer==3.4.4 # via requests click==8.3.1 # via pip-tools -coverage[toml]==7.12.0 +coverage[toml]==7.13.0 # via # -r requirements-dev.in # pytest-cov -cyclonedx-python-lib==9.1.0 +cyclonedx-python-lib==11.6.0 # via pip-audit defusedxml==0.7.1 # via py-serializable faker==37.12.0 # via -r requirements-dev.in -filelock==3.20.0 +filelock==3.20.1 # via cachecontrol idna==3.11 # via requests @@ -42,7 +42,7 @@ mdurl==0.1.2 # via markdown-it-py msgpack==1.1.2 # via cachecontrol -packageurl-python==0.17.5 +packageurl-python==0.17.6 # via cyclonedx-python-lib packaging==25.0 # via @@ -52,13 +52,13 @@ packaging==25.0 # pytest pip-api==0.0.34 # via pip-audit -pip-audit==2.9.0 +pip-audit==2.10.0 # via -r requirements-dev.in pip-requirements-parser==32.0.1 # via pip-audit pip-tools==7.5.2 # via -r requirements-dev.in -platformdirs==4.5.0 +platformdirs==4.5.1 # via pip-audit pluggy==1.6.0 # via @@ -76,7 +76,7 @@ pyproject-hooks==1.2.0 # via # build # pip-tools -pytest==9.0.1 +pytest==9.0.2 # via # -r requirements-dev.in # pytest-cov @@ -88,15 +88,17 @@ requests==2.32.5 # pip-audit rich==14.2.0 # via pip-audit -ruff==0.14.5 +ruff==0.14.9 # via -r requirements-dev.in sortedcontainers==2.4.0 # via cyclonedx-python-lib -toml==0.10.2 +tomli==2.3.0 # via pip-audit -tzdata==2025.2 +tomli-w==1.2.0 + # via pip-audit +tzdata==2025.3 # via faker -urllib3==2.5.0 +urllib3==2.6.2 # via requests wheel==0.45.1 # via pip-tools diff --git a/backend/compact-connect/requirements.txt b/backend/compact-connect/requirements.txt index 7ead839a2..070ada8b7 100644 --- a/backend/compact-connect/requirements.txt +++ b/backend/compact-connect/requirements.txt @@ -12,11 +12,11 @@ aws-cdk-asset-awscli-v1==2.2.242 # via aws-cdk-lib aws-cdk-asset-node-proxy-agent-v6==2.1.0 # via aws-cdk-lib -aws-cdk-aws-lambda-python-alpha==2.225.0a0 +aws-cdk-aws-lambda-python-alpha==2.232.2a0 # via -r requirements.in aws-cdk-cloud-assembly-schema==48.20.0 # via aws-cdk-lib -aws-cdk-lib==2.225.0 +aws-cdk-lib==2.232.2 # via # -r requirements.in # aws-cdk-aws-lambda-python-alpha @@ -25,7 +25,7 @@ cattrs==25.3.0 # via jsii cdk-nag==2.37.55 # via -r requirements.in -constructs==10.4.3 +constructs==10.4.4 # via # -r requirements.in # aws-cdk-aws-lambda-python-alpha @@ -33,7 +33,7 @@ constructs==10.4.3 # cdk-nag importlib-resources==6.5.2 # via jsii -jsii==1.119.0 +jsii==1.121.0 # via # aws-cdk-asset-awscli-v1 # aws-cdk-asset-node-proxy-agent-v6 @@ -58,7 +58,7 @@ pyyaml==6.0.3 # via -r requirements.in six==1.17.0 # via python-dateutil -typeguard==4.2.1 +typeguard==2.13.3 # via # aws-cdk-asset-awscli-v1 # aws-cdk-asset-node-proxy-agent-v6 diff --git a/backend/compact-connect/resources/bootstrap-stack-beta.yaml b/backend/compact-connect/resources/bootstrap-stack-beta.yaml index 827283393..323675af7 100644 --- a/backend/compact-connect/resources/bootstrap-stack-beta.yaml +++ b/backend/compact-connect/resources/bootstrap-stack-beta.yaml @@ -107,6 +107,15 @@ Conditions: - Ref: PublicAccessBlockConfiguration Resources: + # Service-linked role for Amazon OpenSearch Service to access VPC resources + # This role allows OpenSearch to create and manage ENIs in VPCs + # NOTE: If this role already exists in the account, remove this resource from the template + OpenSearchServiceLinkedRole: + Type: AWS::IAM::ServiceLinkedRole + Properties: + AWSServiceName: opensearchservice.amazonaws.com + Description: Service-linked role for Amazon OpenSearch Service VPC access + FileAssetsBucketEncryptionKey: Type: AWS::KMS::Key Properties: @@ -614,6 +623,10 @@ Resources: - kms:* # AWS Lambda - lambda:* + # Amazon OpenSearch Service + - es:* + # Amazon EventBridge Pipes + - pipes:* # Amazon Route 53 - route53:* # Amazon S3 @@ -637,6 +650,80 @@ Resources: - sts:GetCallerIdentity - sts:TagSession Resource: "*" + # VPC Resources - Restricted EC2 permissions for VPC networking only + - Sid: AllowVpcNetworkingResources + Effect: Allow + Action: + # VPC management + - ec2:CreateVpc + - ec2:DeleteVpc + - ec2:DescribeVpcs + - ec2:ModifyVpcAttribute + - ec2:DescribeVpcAttribute + # Subnet management + - ec2:CreateSubnet + - ec2:DeleteSubnet + - ec2:DescribeSubnets + - ec2:ModifySubnetAttribute + # Route table management + - ec2:CreateRouteTable + - ec2:DeleteRouteTable + - ec2:DescribeRouteTables + - ec2:AssociateRouteTable + - ec2:DisassociateRouteTable + - ec2:CreateRoute + - ec2:DeleteRoute + - ec2:ReplaceRoute + # Security group management + - ec2:CreateSecurityGroup + - ec2:DeleteSecurityGroup + - ec2:DescribeSecurityGroups + - ec2:DescribeSecurityGroupRules + - ec2:AuthorizeSecurityGroupIngress + - ec2:AuthorizeSecurityGroupEgress + - ec2:RevokeSecurityGroupIngress + - ec2:RevokeSecurityGroupEgress + - ec2:UpdateSecurityGroupRuleDescriptionsIngress + - ec2:UpdateSecurityGroupRuleDescriptionsEgress + # VPC Endpoint management + - ec2:CreateVpcEndpoint + - ec2:DeleteVpcEndpoints + - ec2:DescribeVpcEndpoints + - ec2:ModifyVpcEndpoint + - ec2:DescribeVpcEndpointServices + - ec2:DescribePrefixLists + # VPC Flow Logs + - ec2:CreateFlowLogs + - ec2:DeleteFlowLogs + - ec2:DescribeFlowLogs + # Tagging + - ec2:CreateTags + - ec2:DeleteTags + # General describe operations needed by CDK + - ec2:DescribeAvailabilityZones + - ec2:DescribeNetworkInterfaces + Resource: "*" + # Explicitly deny EC2 instance operations + - Sid: DenyEc2InstanceOperations + Effect: Deny + Action: + - ec2:RunInstances + - ec2:StartInstances + - ec2:StopInstances + - ec2:TerminateInstances + - ec2:RebootInstances + - ec2:CreateImage + - ec2:RegisterImage + - ec2:ImportInstance + - ec2:ImportImage + - ec2:RequestSpotInstances + - ec2:RequestSpotFleet + - ec2:ModifyInstanceAttribute + - ec2:ModifySpotFleetRequest + - ec2:CreateLaunchTemplate + - ec2:CreateLaunchTemplateVersion + - ec2:ModifyLaunchTemplate + Resource: "*" - Sid: DenyDangerousActions Effect: Deny Action: diff --git a/backend/compact-connect/resources/bootstrap-stack-prod.yaml b/backend/compact-connect/resources/bootstrap-stack-prod.yaml index a18b8d50d..abb7a4331 100644 --- a/backend/compact-connect/resources/bootstrap-stack-prod.yaml +++ b/backend/compact-connect/resources/bootstrap-stack-prod.yaml @@ -107,6 +107,15 @@ Conditions: - Ref: PublicAccessBlockConfiguration Resources: + # Service-linked role for Amazon OpenSearch Service to access VPC resources + # This role allows OpenSearch to create and manage ENIs in VPCs + # NOTE: If this role already exists in the account, remove this resource from the template + OpenSearchServiceLinkedRole: + Type: AWS::IAM::ServiceLinkedRole + Properties: + AWSServiceName: opensearchservice.amazonaws.com + Description: Service-linked role for Amazon OpenSearch Service VPC access + FileAssetsBucketEncryptionKey: Type: AWS::KMS::Key Properties: @@ -614,6 +623,10 @@ Resources: - kms:* # AWS Lambda - lambda:* + # Amazon OpenSearch Service + - es:* + # Amazon EventBridge Pipes + - pipes:* # Amazon Route 53 - route53:* # Amazon S3 @@ -637,6 +650,80 @@ Resources: - sts:GetCallerIdentity - sts:TagSession Resource: "*" + # VPC Resources - Restricted EC2 permissions for VPC networking only + - Sid: AllowVpcNetworkingResources + Effect: Allow + Action: + # VPC management + - ec2:CreateVpc + - ec2:DeleteVpc + - ec2:DescribeVpcs + - ec2:ModifyVpcAttribute + - ec2:DescribeVpcAttribute + # Subnet management + - ec2:CreateSubnet + - ec2:DeleteSubnet + - ec2:DescribeSubnets + - ec2:ModifySubnetAttribute + # Route table management + - ec2:CreateRouteTable + - ec2:DeleteRouteTable + - ec2:DescribeRouteTables + - ec2:AssociateRouteTable + - ec2:DisassociateRouteTable + - ec2:CreateRoute + - ec2:DeleteRoute + - ec2:ReplaceRoute + # Security group management + - ec2:CreateSecurityGroup + - ec2:DeleteSecurityGroup + - ec2:DescribeSecurityGroups + - ec2:DescribeSecurityGroupRules + - ec2:AuthorizeSecurityGroupIngress + - ec2:AuthorizeSecurityGroupEgress + - ec2:RevokeSecurityGroupIngress + - ec2:RevokeSecurityGroupEgress + - ec2:UpdateSecurityGroupRuleDescriptionsIngress + - ec2:UpdateSecurityGroupRuleDescriptionsEgress + # VPC Endpoint management + - ec2:CreateVpcEndpoint + - ec2:DeleteVpcEndpoints + - ec2:DescribeVpcEndpoints + - ec2:ModifyVpcEndpoint + - ec2:DescribeVpcEndpointServices + - ec2:DescribePrefixLists + # VPC Flow Logs + - ec2:CreateFlowLogs + - ec2:DeleteFlowLogs + - ec2:DescribeFlowLogs + # Tagging + - ec2:CreateTags + - ec2:DeleteTags + # General describe operations needed by CDK + - ec2:DescribeAvailabilityZones + - ec2:DescribeNetworkInterfaces + Resource: "*" + # Explicitly deny EC2 instance operations + - Sid: DenyEc2InstanceOperations + Effect: Deny + Action: + - ec2:RunInstances + - ec2:StartInstances + - ec2:StopInstances + - ec2:TerminateInstances + - ec2:RebootInstances + - ec2:CreateImage + - ec2:RegisterImage + - ec2:ImportInstance + - ec2:ImportImage + - ec2:RequestSpotInstances + - ec2:RequestSpotFleet + - ec2:ModifyInstanceAttribute + - ec2:ModifySpotFleetRequest + - ec2:CreateLaunchTemplate + - ec2:CreateLaunchTemplateVersion + - ec2:ModifyLaunchTemplate + Resource: "*" - Sid: DenyDangerousActions Effect: Deny Action: diff --git a/backend/compact-connect/resources/bootstrap-stack-test.yaml b/backend/compact-connect/resources/bootstrap-stack-test.yaml index f7aa1b5f8..8f23c9dce 100644 --- a/backend/compact-connect/resources/bootstrap-stack-test.yaml +++ b/backend/compact-connect/resources/bootstrap-stack-test.yaml @@ -107,6 +107,15 @@ Conditions: - Ref: PublicAccessBlockConfiguration Resources: + # Service-linked role for Amazon OpenSearch Service to access VPC resources + # This role allows OpenSearch to create and manage ENIs in VPCs + # NOTE: If this role already exists in the account, remove this resource from the template + OpenSearchServiceLinkedRole: + Type: AWS::IAM::ServiceLinkedRole + Properties: + AWSServiceName: opensearchservice.amazonaws.com + Description: Service-linked role for Amazon OpenSearch Service VPC access + FileAssetsBucketEncryptionKey: Type: AWS::KMS::Key Properties: @@ -614,6 +623,10 @@ Resources: - kms:* # AWS Lambda - lambda:* + # Amazon OpenSearch Service + - es:* + # Amazon EventBridge Pipes + - pipes:* # Amazon Route 53 - route53:* # Amazon S3 @@ -637,6 +650,80 @@ Resources: - sts:GetCallerIdentity - sts:TagSession Resource: "*" + # VPC Resources - Restricted EC2 permissions for VPC networking only + - Sid: AllowVpcNetworkingResources + Effect: Allow + Action: + # VPC management + - ec2:CreateVpc + - ec2:DeleteVpc + - ec2:DescribeVpcs + - ec2:ModifyVpcAttribute + - ec2:DescribeVpcAttribute + # Subnet management + - ec2:CreateSubnet + - ec2:DeleteSubnet + - ec2:DescribeSubnets + - ec2:ModifySubnetAttribute + # Route table management + - ec2:CreateRouteTable + - ec2:DeleteRouteTable + - ec2:DescribeRouteTables + - ec2:AssociateRouteTable + - ec2:DisassociateRouteTable + - ec2:CreateRoute + - ec2:DeleteRoute + - ec2:ReplaceRoute + # Security group management + - ec2:CreateSecurityGroup + - ec2:DeleteSecurityGroup + - ec2:DescribeSecurityGroups + - ec2:DescribeSecurityGroupRules + - ec2:AuthorizeSecurityGroupIngress + - ec2:AuthorizeSecurityGroupEgress + - ec2:RevokeSecurityGroupIngress + - ec2:RevokeSecurityGroupEgress + - ec2:UpdateSecurityGroupRuleDescriptionsIngress + - ec2:UpdateSecurityGroupRuleDescriptionsEgress + # VPC Endpoint management + - ec2:CreateVpcEndpoint + - ec2:DeleteVpcEndpoints + - ec2:DescribeVpcEndpoints + - ec2:ModifyVpcEndpoint + - ec2:DescribeVpcEndpointServices + - ec2:DescribePrefixLists + # VPC Flow Logs + - ec2:CreateFlowLogs + - ec2:DeleteFlowLogs + - ec2:DescribeFlowLogs + # Tagging + - ec2:CreateTags + - ec2:DeleteTags + # General describe operations needed by CDK + - ec2:DescribeAvailabilityZones + - ec2:DescribeNetworkInterfaces + Resource: "*" + # Explicitly deny EC2 instance operations + - Sid: DenyEc2InstanceOperations + Effect: Deny + Action: + - ec2:RunInstances + - ec2:StartInstances + - ec2:StopInstances + - ec2:TerminateInstances + - ec2:RebootInstances + - ec2:CreateImage + - ec2:RegisterImage + - ec2:ImportInstance + - ec2:ImportImage + - ec2:RequestSpotInstances + - ec2:RequestSpotFleet + - ec2:ModifyInstanceAttribute + - ec2:ModifySpotFleetRequest + - ec2:CreateLaunchTemplate + - ec2:CreateLaunchTemplateVersion + - ec2:ModifyLaunchTemplate + Resource: "*" - Sid: DenyDangerousActions Effect: Deny Action: diff --git a/backend/compact-connect/stacks/persistent_stack/provider_table.py b/backend/compact-connect/stacks/persistent_stack/provider_table.py index 9ca0f49e4..2b59c458d 100644 --- a/backend/compact-connect/stacks/persistent_stack/provider_table.py +++ b/backend/compact-connect/stacks/persistent_stack/provider_table.py @@ -6,6 +6,7 @@ BillingMode, PointInTimeRecoverySpecification, ProjectionType, + StreamViewType, Table, TableEncryption, ) @@ -42,6 +43,7 @@ def __init__( deletion_protection=True if removal_policy == RemovalPolicy.RETAIN else False, partition_key=Attribute(name='pk', type=AttributeType.STRING), sort_key=Attribute(name='sk', type=AttributeType.STRING), + stream=StreamViewType.NEW_AND_OLD_IMAGES, **kwargs, ) self.provider_fam_giv_mid_index_name = 'providerFamGivMid' diff --git a/backend/compact-connect/stacks/search_api_stack/__init__.py b/backend/compact-connect/stacks/search_api_stack/__init__.py new file mode 100644 index 000000000..b3f84e9b5 --- /dev/null +++ b/backend/compact-connect/stacks/search_api_stack/__init__.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from common_constructs.security_profile import SecurityProfile +from common_constructs.stack import AppStack +from constructs import Construct + +from stacks import persistent_stack, search_persistent_stack + +from .api import SearchApi + + +class SearchApiStack(AppStack): + def __init__( + self, + scope: Construct, + construct_id: str, + *, + environment_name: str, + environment_context: dict, + persistent_stack: persistent_stack.PersistentStack, + search_persistent_stack: search_persistent_stack.SearchPersistentStack, + **kwargs, + ): + super().__init__( + scope, construct_id, environment_context=environment_context, environment_name=environment_name, **kwargs + ) + + security_profile = SecurityProfile[environment_context.get('security_profile', 'RECOMMENDED')] + + self.api = SearchApi( + self, + 'SearchApi', + environment_name=environment_name, + security_profile=security_profile, + persistent_stack=persistent_stack, + search_persistent_stack=search_persistent_stack, + domain_name=self.search_api_domain_name, + ) diff --git a/backend/compact-connect/stacks/search_api_stack/api.py b/backend/compact-connect/stacks/search_api_stack/api.py new file mode 100644 index 000000000..bb67e6def --- /dev/null +++ b/backend/compact-connect/stacks/search_api_stack/api.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from functools import cached_property + +from constructs import Construct + +from common_constructs.cc_api import CCApi +from stacks import persistent_stack, search_persistent_stack + + +class SearchApi(CCApi): + def __init__( + self, + scope: Construct, + construct_id: str, + *, + persistent_stack: persistent_stack.PersistentStack, + search_persistent_stack: search_persistent_stack.SearchPersistentStack, + **kwargs, + ): + super().__init__( + scope, + construct_id, + persistent_stack=persistent_stack, + **kwargs, + ) + from stacks.search_api_stack.v1_api import V1Api + + self.v1_api = V1Api( + self.root, persistent_stack=persistent_stack, search_persistent_stack=search_persistent_stack + ) + + @cached_property + def staff_users_authorizer(self): + from aws_cdk.aws_apigateway import CognitoUserPoolsAuthorizer + + return CognitoUserPoolsAuthorizer( + self, 'StaffUsersPoolAuthorizer', cognito_user_pools=[self._persistent_stack.staff_users] + ) diff --git a/backend/compact-connect/stacks/search_api_stack/v1_api/__init__.py b/backend/compact-connect/stacks/search_api_stack/v1_api/__init__.py new file mode 100644 index 000000000..e14e23d9e --- /dev/null +++ b/backend/compact-connect/stacks/search_api_stack/v1_api/__init__.py @@ -0,0 +1,4 @@ +# ruff: noqa: F401 +# We place this import here so it can be referenced by other +# CDK resources +from .api import V1Api diff --git a/backend/compact-connect/stacks/search_api_stack/v1_api/api.py b/backend/compact-connect/stacks/search_api_stack/v1_api/api.py new file mode 100644 index 000000000..e4be20e89 --- /dev/null +++ b/backend/compact-connect/stacks/search_api_stack/v1_api/api.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from aws_cdk.aws_apigateway import AuthorizationType, IResource, MethodOptions + +from stacks import persistent_stack, search_persistent_stack +from stacks.search_api_stack.v1_api.privilege_search import PrivilegeSearch +from stacks.search_api_stack.v1_api.provider_search import ProviderSearch + +from .api_model import ApiModel + + +class V1Api: + """v1 of the Search API""" + + def __init__( + self, + root: IResource, + persistent_stack: persistent_stack.PersistentStack, + search_persistent_stack: search_persistent_stack.SearchPersistentStack, + ): + super().__init__() + from stacks.search_api_stack.api import SearchApi + + self.root = root + self.resource = root.add_resource('v1') + self.api: SearchApi = root.api + self.api_model = ApiModel(api=self.api) + _active_compacts = persistent_stack.get_list_of_compact_abbreviations() + + read_scopes = [] + # set the compact level scopes + for compact in _active_compacts: + # We only set the readGeneral permission scope at the compact level, since users with any permissions + # within a compact are implicitly granted this scope + read_scopes.append(f'{compact}/readGeneral') + + read_auth_method_options = MethodOptions( + authorization_type=AuthorizationType.COGNITO, + authorizer=self.api.staff_users_authorizer, + authorization_scopes=read_scopes, + ) + + # /v1/compacts + self.compacts_resource = self.resource.add_resource('compacts') + # /v1/compacts/{compact} + self.compact_resource = self.compacts_resource.add_resource('{compact}') + + # POST /v1/compacts/{compact}/providers + providers_resource = self.compact_resource.add_resource('providers') + self.provider_search = ProviderSearch( + resource=providers_resource, + method_options=read_auth_method_options, + search_persistent_stack=search_persistent_stack, + api_model=self.api_model, + ) + + # POST /v1/compacts/{compact}/privileges + privileges_resource = self.compact_resource.add_resource('privileges') + self.privilege_search = PrivilegeSearch( + resource=privileges_resource, + method_options=read_auth_method_options, + search_persistent_stack=search_persistent_stack, + api_model=self.api_model, + ) diff --git a/backend/compact-connect/stacks/search_api_stack/v1_api/api_model.py b/backend/compact-connect/stacks/search_api_stack/v1_api/api_model.py new file mode 100644 index 000000000..4f764c94d --- /dev/null +++ b/backend/compact-connect/stacks/search_api_stack/v1_api/api_model.py @@ -0,0 +1,531 @@ +# ruff: noqa: SLF001 +# This class initializes the api models for the root api, which we then want to set as protected +# so other classes won't modify it. This is a valid use case for protected access to work with cdk. +from __future__ import annotations + +from aws_cdk.aws_apigateway import JsonSchema, JsonSchemaType, Model +from common_constructs.stack import AppStack + +# Importing module level to allow lazy loading for typing +from common_constructs import cc_api + + +class ApiModel: + """This class is responsible for defining the model definitions used in the Search API endpoints.""" + + def __init__(self, api: cc_api.CCApi): + self.stack: AppStack = AppStack.of(api) + self.api = api + + @property + def _common_search_request_schema(self) -> JsonSchema: + """ + Return the common search request schema used by both provider and privilege search endpoints. + + This schema closely mirrors OpenSearch DSL for pagination using search_after. + See: https://docs.opensearch.org/latest/search-plugins/searching-data/paginate/ + """ + return JsonSchema( + type=JsonSchemaType.OBJECT, + additional_properties=False, + required=['query'], + properties={ + 'query': JsonSchema( + type=JsonSchemaType.OBJECT, + description='The OpenSearch query body', + ), + 'from': JsonSchema( + type=JsonSchemaType.INTEGER, + minimum=0, + description='Starting document offset for pagination', + ), + 'size': JsonSchema( + type=JsonSchemaType.INTEGER, + minimum=1, + # setting low limit for now, as this search endpoint is only used by the UI client, + # and we don't anticipate needing to support more than 100 records per request + maximum=100, + description='Number of results to return', + ), + 'sort': JsonSchema( + type=JsonSchemaType.ARRAY, + description='Sort order for results (required for search_after pagination)', + items=JsonSchema(type=JsonSchemaType.OBJECT), + ), + 'search_after': JsonSchema( + type=JsonSchemaType.ARRAY, + description='Sort values from the last hit of the previous page for cursor-based pagination', + ), + }, + ) + + @property + def search_providers_request_model(self) -> Model: + """ + Return the search providers request model, which should only be created once per API. + """ + if hasattr(self.api, '_v1_search_providers_request_model'): + return self.api._v1_search_providers_request_model + self.api._v1_search_providers_request_model = self.api.add_model( + 'V1SearchProvidersRequestModel', + description='Search providers request model following OpenSearch DSL', + schema=self._common_search_request_schema, + ) + return self.api._v1_search_providers_request_model + + @property + def _export_privileges_request_schema(self) -> JsonSchema: + """ + Return the export privileges request schema. + + This schema is similar to the search request schema but without pagination parameters. + The export endpoint does not support pagination - it returns all results as a CSV file. + """ + return JsonSchema( + type=JsonSchemaType.OBJECT, + additional_properties=False, + required=['query'], + properties={ + 'query': JsonSchema( + type=JsonSchemaType.OBJECT, + description='The OpenSearch query body', + ), + }, + ) + + @property + def search_privileges_request_model(self) -> Model: + """ + Return the export privileges request model, which should only be created once per API. + + This model is used for the privilege export endpoint and does not include + pagination parameters (size, from, search_after). + """ + if hasattr(self.api, '_v1_search_privileges_request_model'): + return self.api._v1_search_privileges_request_model + self.api._v1_search_privileges_request_model = self.api.add_model( + 'V1ExportPrivilegesRequestModel', + description='Export privileges request model - query only, no pagination', + schema=self._export_privileges_request_schema, + ) + return self.api._v1_search_privileges_request_model + + @property + def _search_response_total_schema(self) -> JsonSchema: + """Return the common total hits schema used by search response models""" + return JsonSchema( + type=JsonSchemaType.OBJECT, + description='Total hits information from OpenSearch', + properties={ + 'value': JsonSchema(type=JsonSchemaType.INTEGER), + 'relation': JsonSchema(type=JsonSchemaType.STRING, enum=['eq', 'gte']), + }, + ) + + @property + def search_providers_response_model(self) -> Model: + """Return the search providers response model, which should only be created once per API""" + if hasattr(self.api, '_v1_search_providers_response_model'): + return self.api._v1_search_providers_response_model + self.api._v1_search_providers_response_model = self.api.add_model( + 'V1SearchProvidersResponseModel', + description='Search providers response model', + schema=JsonSchema( + type=JsonSchemaType.OBJECT, + required=['providers', 'total'], + properties={ + 'providers': JsonSchema( + type=JsonSchemaType.ARRAY, + items=self._providers_response_schema, + ), + 'total': self._search_response_total_schema, + 'lastSort': JsonSchema( + type=JsonSchemaType.ARRAY, + description='Sort values from the last hit to use with search_after for the next page', + ), + }, + ), + ) + return self.api._v1_search_providers_response_model + + @property + def search_privileges_response_model(self) -> Model: + """Return the export privileges response model, which should only be created once per API""" + if hasattr(self.api, '_v1_search_privileges_response_model'): + return self.api._v1_search_privileges_response_model + self.api._v1_search_privileges_response_model = self.api.add_model( + 'V1ExportPrivilegesResponseModel', + description='Export privileges response model with presigned URL to CSV file', + schema=JsonSchema( + type=JsonSchemaType.OBJECT, + required=['fileUrl'], + properties={ + 'fileUrl': JsonSchema( + type=JsonSchemaType.STRING, + description='Presigned URL to download the CSV file containing the export results', + ), + }, + ), + ) + return self.api._v1_search_privileges_response_model + + @property + def _providers_response_schema(self): + stack: AppStack = AppStack.of(self.api) + + return JsonSchema( + type=JsonSchemaType.OBJECT, + required=[ + 'type', + 'providerId', + 'givenName', + 'familyName', + 'licenseStatus', + 'compactEligibility', + 'jurisdictionUploadedLicenseStatus', + 'jurisdictionUploadedCompactEligibility', + 'compact', + 'licenseJurisdiction', + 'privilegeJurisdictions', + 'dateOfUpdate', + 'dateOfExpiration', + 'birthMonthDay', + ], + properties={ + 'type': JsonSchema(type=JsonSchemaType.STRING, enum=['provider']), + 'providerId': JsonSchema( + type=JsonSchemaType.STRING, + pattern=cc_api.UUID4_FORMAT, + ), + 'givenName': JsonSchema( + type=JsonSchemaType.STRING, + max_length=100, + ), + 'middleName': JsonSchema( + type=JsonSchemaType.STRING, + max_length=100, + ), + 'familyName': JsonSchema( + type=JsonSchemaType.STRING, + max_length=100, + ), + 'suffix': JsonSchema( + type=JsonSchemaType.STRING, + max_length=100, + ), + 'npi': JsonSchema( + type=JsonSchemaType.STRING, + pattern='^[0-9]{10}$', + ), + 'licenseStatus': JsonSchema( + type=JsonSchemaType.STRING, + enum=['active', 'inactive'], + ), + 'compactEligibility': JsonSchema( + type=JsonSchemaType.STRING, + enum=['eligible', 'ineligible'], + ), + 'jurisdictionUploadedLicenseStatus': JsonSchema( + type=JsonSchemaType.STRING, + enum=['active', 'inactive'], + ), + 'jurisdictionUploadedCompactEligibility': JsonSchema( + type=JsonSchemaType.STRING, + enum=['eligible', 'ineligible'], + ), + 'compact': JsonSchema( + type=JsonSchemaType.STRING, + enum=stack.node.get_context('compacts'), + ), + 'licenseJurisdiction': JsonSchema( + type=JsonSchemaType.STRING, + enum=stack.node.get_context('jurisdictions'), + ), + 'currentHomeJurisdiction': JsonSchema( + type=JsonSchemaType.STRING, + enum=stack.node.get_context('jurisdictions'), + ), + 'privilegeJurisdictions': JsonSchema( + type=JsonSchemaType.ARRAY, + items=JsonSchema( + type=JsonSchemaType.STRING, + enum=stack.node.get_context('jurisdictions'), + ), + ), + 'dateOfUpdate': JsonSchema( + type=JsonSchemaType.STRING, + format='date-time', + ), + 'dateOfExpiration': JsonSchema( + type=JsonSchemaType.STRING, + format='date', + ), + 'birthMonthDay': JsonSchema( + type=JsonSchemaType.STRING, + pattern='^[0-1]{1}[0-9]{1}-[0-3]{1}[0-9]{1}', + ), + 'compactConnectRegisteredEmailAddress': JsonSchema( + type=JsonSchemaType.STRING, + format='email', + ), + 'licenses': JsonSchema( + type=JsonSchemaType.ARRAY, + items=self._license_general_response_schema, + ), + 'privileges': JsonSchema( + type=JsonSchemaType.ARRAY, + items=self._privilege_general_response_schema, + ), + 'militaryAffiliations': JsonSchema( + type=JsonSchemaType.ARRAY, + items=self._military_affiliation_general_response_schema, + ), + }, + ) + + @property + def _license_general_response_schema(self): + """ + Schema for LicenseGeneralResponseSchema - license fields visible to staff users + with 'readGeneral' permission. + """ + stack: AppStack = AppStack.of(self.api) + + return JsonSchema( + type=JsonSchemaType.OBJECT, + required=[ + 'providerId', + 'type', + 'dateOfUpdate', + 'compact', + 'jurisdiction', + 'licenseType', + 'licenseStatus', + 'jurisdictionUploadedLicenseStatus', + 'compactEligibility', + 'jurisdictionUploadedCompactEligibility', + 'givenName', + 'familyName', + 'dateOfIssuance', + 'dateOfExpiration', + 'homeAddressStreet1', + 'homeAddressCity', + 'homeAddressState', + 'homeAddressPostalCode', + ], + properties={ + 'providerId': JsonSchema(type=JsonSchemaType.STRING, pattern=cc_api.UUID4_FORMAT), + 'type': JsonSchema(type=JsonSchemaType.STRING, enum=['license-home']), + 'dateOfUpdate': JsonSchema(type=JsonSchemaType.STRING, format='date-time'), + 'compact': JsonSchema(type=JsonSchemaType.STRING, enum=stack.node.get_context('compacts')), + 'jurisdiction': JsonSchema(type=JsonSchemaType.STRING, enum=stack.node.get_context('jurisdictions')), + 'licenseType': JsonSchema(type=JsonSchemaType.STRING), + 'licenseStatusName': JsonSchema(type=JsonSchemaType.STRING, max_length=100), + 'licenseStatus': JsonSchema(type=JsonSchemaType.STRING, enum=['active', 'inactive']), + 'jurisdictionUploadedLicenseStatus': JsonSchema( + type=JsonSchemaType.STRING, enum=['active', 'inactive'] + ), + 'compactEligibility': JsonSchema(type=JsonSchemaType.STRING, enum=['eligible', 'ineligible']), + 'jurisdictionUploadedCompactEligibility': JsonSchema( + type=JsonSchemaType.STRING, enum=['eligible', 'ineligible'] + ), + 'npi': JsonSchema(type=JsonSchemaType.STRING, pattern='^[0-9]{10}$'), + 'licenseNumber': JsonSchema(type=JsonSchemaType.STRING, max_length=100), + 'givenName': JsonSchema(type=JsonSchemaType.STRING, max_length=100), + 'middleName': JsonSchema(type=JsonSchemaType.STRING, max_length=100), + 'familyName': JsonSchema(type=JsonSchemaType.STRING, max_length=100), + 'suffix': JsonSchema(type=JsonSchemaType.STRING, max_length=100), + 'dateOfIssuance': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'dateOfRenewal': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'dateOfExpiration': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'homeAddressStreet1': JsonSchema(type=JsonSchemaType.STRING, min_length=2, max_length=100), + 'homeAddressStreet2': JsonSchema(type=JsonSchemaType.STRING, min_length=1, max_length=100), + 'homeAddressCity': JsonSchema(type=JsonSchemaType.STRING, min_length=2, max_length=100), + 'homeAddressState': JsonSchema(type=JsonSchemaType.STRING, min_length=2, max_length=100), + 'homeAddressPostalCode': JsonSchema(type=JsonSchemaType.STRING, min_length=5, max_length=7), + 'emailAddress': JsonSchema(type=JsonSchemaType.STRING, format='email'), + 'phoneNumber': JsonSchema(type=JsonSchemaType.STRING, pattern=cc_api.PHONE_NUMBER_FORMAT), + 'adverseActions': JsonSchema(type=JsonSchemaType.ARRAY, items=self._adverse_action_general_schema), + 'investigations': JsonSchema(type=JsonSchemaType.ARRAY, items=self._investigation_general_schema), + 'investigationStatus': JsonSchema(type=JsonSchemaType.STRING, enum=['underInvestigation']), + }, + ) + + @property + def _privilege_general_response_schema(self): + """ + Schema for PrivilegeGeneralResponseSchema - privilege fields visible to staff users + with 'readGeneral' permission. + """ + stack: AppStack = AppStack.of(self.api) + + return JsonSchema( + type=JsonSchemaType.OBJECT, + required=[ + 'type', + 'providerId', + 'compact', + 'jurisdiction', + 'licenseJurisdiction', + 'licenseType', + 'dateOfIssuance', + 'dateOfRenewal', + 'dateOfExpiration', + 'dateOfUpdate', + 'administratorSetStatus', + 'privilegeId', + 'status', + ], + properties={ + 'type': JsonSchema(type=JsonSchemaType.STRING, enum=['privilege']), + 'providerId': JsonSchema(type=JsonSchemaType.STRING, pattern=cc_api.UUID4_FORMAT), + 'compact': JsonSchema(type=JsonSchemaType.STRING, enum=stack.node.get_context('compacts')), + 'jurisdiction': JsonSchema(type=JsonSchemaType.STRING, enum=stack.node.get_context('jurisdictions')), + 'licenseJurisdiction': JsonSchema( + type=JsonSchemaType.STRING, enum=stack.node.get_context('jurisdictions') + ), + 'licenseType': JsonSchema(type=JsonSchemaType.STRING), + 'dateOfIssuance': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'dateOfRenewal': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'dateOfExpiration': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'dateOfUpdate': JsonSchema(type=JsonSchemaType.STRING, format='date-time'), + 'adverseActions': JsonSchema(type=JsonSchemaType.ARRAY, items=self._adverse_action_general_schema), + 'investigations': JsonSchema(type=JsonSchemaType.ARRAY, items=self._investigation_general_schema), + 'administratorSetStatus': JsonSchema(type=JsonSchemaType.STRING, enum=['active', 'inactive']), + 'compactTransactionId': JsonSchema(type=JsonSchemaType.STRING), + 'attestations': JsonSchema( + type=JsonSchemaType.ARRAY, + items=JsonSchema( + type=JsonSchemaType.OBJECT, + required=['attestationId', 'version'], + properties={ + 'attestationId': JsonSchema(type=JsonSchemaType.STRING, max_length=100), + 'version': JsonSchema(type=JsonSchemaType.STRING, max_length=100), + }, + ), + ), + 'privilegeId': JsonSchema(type=JsonSchemaType.STRING), + 'status': JsonSchema(type=JsonSchemaType.STRING, enum=['active', 'inactive']), + 'activeSince': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'investigationStatus': JsonSchema(type=JsonSchemaType.STRING, enum=['underInvestigation']), + }, + ) + + @property + def _military_affiliation_general_response_schema(self): + """ + Schema for MilitaryAffiliationGeneralResponseSchema - military affiliation fields visible + to staff users with 'readGeneral' permission. + """ + stack: AppStack = AppStack.of(self.api) + + return JsonSchema( + type=JsonSchemaType.OBJECT, + required=[ + 'type', + 'dateOfUpdate', + 'providerId', + 'compact', + 'fileNames', + 'affiliationType', + 'dateOfUpload', + 'status', + ], + properties={ + 'type': JsonSchema(type=JsonSchemaType.STRING, enum=['militaryAffiliation']), + 'dateOfUpdate': JsonSchema(type=JsonSchemaType.STRING, format='date-time'), + 'providerId': JsonSchema(type=JsonSchemaType.STRING, pattern=cc_api.UUID4_FORMAT), + 'compact': JsonSchema(type=JsonSchemaType.STRING, enum=stack.node.get_context('compacts')), + 'fileNames': JsonSchema( + type=JsonSchemaType.ARRAY, + items=JsonSchema(type=JsonSchemaType.STRING), + ), + 'affiliationType': JsonSchema( + type=JsonSchemaType.STRING, enum=['militaryMember', 'militaryMemberSpouse'] + ), + 'dateOfUpload': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'status': JsonSchema(type=JsonSchemaType.STRING, enum=['active', 'inactive']), + }, + ) + + @property + def _adverse_action_general_schema(self): + """ + Schema for AdverseActionGeneralResponseSchema - adverse action fields visible + to staff users with 'readGeneral' permission. + """ + stack: AppStack = AppStack.of(self.api) + + return JsonSchema( + type=JsonSchemaType.OBJECT, + required=[ + 'type', + 'compact', + 'providerId', + 'jurisdiction', + 'licenseTypeAbbreviation', + 'licenseType', + 'actionAgainst', + 'effectiveStartDate', + 'creationDate', + 'adverseActionId', + 'dateOfUpdate', + 'encumbranceType', + 'submittingUser', + ], + properties={ + 'type': JsonSchema(type=JsonSchemaType.STRING, enum=['adverseAction']), + 'compact': JsonSchema(type=JsonSchemaType.STRING, enum=stack.node.get_context('compacts')), + 'providerId': JsonSchema(type=JsonSchemaType.STRING, pattern=cc_api.UUID4_FORMAT), + 'jurisdiction': JsonSchema(type=JsonSchemaType.STRING, enum=stack.node.get_context('jurisdictions')), + 'licenseTypeAbbreviation': JsonSchema(type=JsonSchemaType.STRING), + 'licenseType': JsonSchema(type=JsonSchemaType.STRING), + 'actionAgainst': JsonSchema(type=JsonSchemaType.STRING, enum=['license', 'privilege']), + 'effectiveStartDate': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'creationDate': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'adverseActionId': JsonSchema(type=JsonSchemaType.STRING), + 'effectiveLiftDate': JsonSchema(type=JsonSchemaType.STRING, format='date', pattern=cc_api.YMD_FORMAT), + 'dateOfUpdate': JsonSchema(type=JsonSchemaType.STRING, format='date-time'), + 'encumbranceType': JsonSchema(type=JsonSchemaType.STRING), + 'clinicalPrivilegeActionCategories': JsonSchema( + type=JsonSchemaType.ARRAY, + items=JsonSchema(type=JsonSchemaType.STRING), + ), + 'liftingUser': JsonSchema(type=JsonSchemaType.STRING), + 'submittingUser': JsonSchema(type=JsonSchemaType.STRING), + }, + ) + + @property + def _investigation_general_schema(self): + """ + Schema for InvestigationGeneralResponseSchema - investigation fields visible + to staff users with 'readGeneral' permission. + """ + stack: AppStack = AppStack.of(self.api) + + return JsonSchema( + type=JsonSchemaType.OBJECT, + required=[ + 'type', + 'compact', + 'providerId', + 'investigationId', + 'jurisdiction', + 'licenseType', + 'dateOfUpdate', + 'creationDate', + 'submittingUser', + ], + properties={ + 'type': JsonSchema(type=JsonSchemaType.STRING, enum=['investigation']), + 'compact': JsonSchema(type=JsonSchemaType.STRING, enum=stack.node.get_context('compacts')), + 'providerId': JsonSchema(type=JsonSchemaType.STRING, pattern=cc_api.UUID4_FORMAT), + 'investigationId': JsonSchema(type=JsonSchemaType.STRING), + 'jurisdiction': JsonSchema(type=JsonSchemaType.STRING, enum=stack.node.get_context('jurisdictions')), + 'licenseType': JsonSchema(type=JsonSchemaType.STRING), + 'dateOfUpdate': JsonSchema(type=JsonSchemaType.STRING, format='date-time'), + 'creationDate': JsonSchema(type=JsonSchemaType.STRING, format='date-time'), + 'submittingUser': JsonSchema(type=JsonSchemaType.STRING), + }, + ) diff --git a/backend/compact-connect/stacks/search_api_stack/v1_api/privilege_search.py b/backend/compact-connect/stacks/search_api_stack/v1_api/privilege_search.py new file mode 100644 index 000000000..d37dcc9b2 --- /dev/null +++ b/backend/compact-connect/stacks/search_api_stack/v1_api/privilege_search.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from aws_cdk import Duration +from aws_cdk.aws_apigateway import LambdaIntegration, MethodOptions, MethodResponse, Resource + +from common_constructs.cc_api import CCApi +from stacks import search_persistent_stack + +from .api_model import ApiModel + + +class PrivilegeSearch: + """ + Endpoint related to privilege searching in the OpenSearch domain. + """ + + def __init__( + self, + *, + resource: Resource, + method_options: MethodOptions, + search_persistent_stack: search_persistent_stack.SearchPersistentStack, + api_model: ApiModel, + ): + super().__init__() + + self.resource = resource + self.api: CCApi = resource.api + self.api_model = api_model + + self._add_export_privileges( + method_options=method_options, + search_persistent_stack=search_persistent_stack, + ) + + def _add_export_privileges( + self, + method_options: MethodOptions, + search_persistent_stack: search_persistent_stack.SearchPersistentStack, + ): + export_resource = self.resource.add_resource('export') + + # Get the search handler from the search persistent stack (same handler as provider search) + handler = search_persistent_stack.search_handler.handler + + self.privilege_search_export_endpoint = export_resource.add_method( + 'POST', + request_validator=self.api.parameter_body_validator, + request_models={'application/json': self.api_model.search_privileges_request_model}, + method_responses=[ + MethodResponse( + status_code='200', + response_models={'application/json': self.api_model.search_privileges_response_model}, + ), + ], + integration=LambdaIntegration(handler, timeout=Duration.seconds(29)), + request_parameters={'method.request.header.Authorization': True}, + authorization_type=method_options.authorization_type, + authorizer=method_options.authorizer, + authorization_scopes=method_options.authorization_scopes, + ) diff --git a/backend/compact-connect/stacks/search_api_stack/v1_api/provider_search.py b/backend/compact-connect/stacks/search_api_stack/v1_api/provider_search.py new file mode 100644 index 000000000..1cdf36ce9 --- /dev/null +++ b/backend/compact-connect/stacks/search_api_stack/v1_api/provider_search.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from aws_cdk import Duration +from aws_cdk.aws_apigateway import LambdaIntegration, MethodOptions, MethodResponse, Resource + +from common_constructs.cc_api import CCApi +from stacks import search_persistent_stack + +from .api_model import ApiModel + + +class ProviderSearch: + """ + Endpoint related to provider searching in the OpenSearch domain. + """ + + def __init__( + self, + *, + resource: Resource, + method_options: MethodOptions, + search_persistent_stack: search_persistent_stack.SearchPersistentStack, + api_model: ApiModel, + ): + super().__init__() + + self.resource = resource + self.api: CCApi = resource.api + self.api_model = api_model + + # Create the nested resources used by endpoints + self.provider_resource = self.resource.add_resource('{providerId}') + + self._add_search_providers( + method_options=method_options, + search_persistent_stack=search_persistent_stack, + ) + + def _add_search_providers( + self, + method_options: MethodOptions, + search_persistent_stack: search_persistent_stack.SearchPersistentStack, + ): + search_resource = self.resource.add_resource('search') + + # Get the search providers handler from the search persistent stack + handler = search_persistent_stack.search_handler.handler + + self.provider_search_endpoint = search_resource.add_method( + 'POST', + request_validator=self.api.parameter_body_validator, + request_models={'application/json': self.api_model.search_providers_request_model}, + method_responses=[ + MethodResponse( + status_code='200', + response_models={'application/json': self.api_model.search_providers_response_model}, + ), + ], + integration=LambdaIntegration(handler, timeout=Duration.seconds(29)), + request_parameters={'method.request.header.Authorization': True}, + authorization_type=method_options.authorization_type, + authorizer=method_options.authorizer, + authorization_scopes=method_options.authorization_scopes, + ) diff --git a/backend/compact-connect/stacks/search_persistent_stack/__init__.py b/backend/compact-connect/stacks/search_persistent_stack/__init__.py new file mode 100644 index 000000000..5c5f9a8a3 --- /dev/null +++ b/backend/compact-connect/stacks/search_persistent_stack/__init__.py @@ -0,0 +1,181 @@ +from aws_cdk.aws_iam import Role, ServicePrincipal +from aws_cdk.aws_logs import QueryDefinition, QueryString +from common_constructs.stack import AppStack +from constructs import Construct + +from stacks.persistent_stack import PersistentStack +from stacks.search_persistent_stack.export_results_bucket import ExportResultsBucket +from stacks.search_persistent_stack.index_manager import IndexManagerCustomResource +from stacks.search_persistent_stack.populate_provider_documents_handler import PopulateProviderDocumentsHandler +from stacks.search_persistent_stack.provider_search_domain import ProviderSearchDomain +from stacks.search_persistent_stack.provider_update_ingest_handler import ProviderUpdateIngestHandler +from stacks.search_persistent_stack.provider_update_ingest_pipe import ProviderUpdateIngestPipe +from stacks.search_persistent_stack.search_handler import SearchHandler +from stacks.vpc_stack import VpcStack + + +class SearchPersistentStack(AppStack): + """ + Stack for OpenSearch Domain and related search infrastructure. + + This stack provides the search capabilities for the advanced provider search feature: + - OpenSearch Domain deployed in VPC for network isolation + - KMS encryption for data at rest + - Node-to-node encryption and HTTPS enforcement + - Environment-specific instance sizing and cluster configuration + """ + + def __init__( + self, + scope: Construct, + construct_id: str, + *, + environment_name: str, + environment_context: dict, + vpc_stack: VpcStack, + persistent_stack: PersistentStack, + **kwargs, + ): + super().__init__( + scope, construct_id, environment_context=environment_context, environment_name=environment_name, **kwargs + ) + + # Create IAM roles for Lambda functions that need OpenSearch access + self.opensearch_ingest_lambda_role = Role( + self, + 'OpenSearchIngestLambdaRole', + assumed_by=ServicePrincipal('lambda.amazonaws.com'), + description='IAM role for Ingest Lambda function that needs write access to OpenSearch Domain', + ) + + self.opensearch_index_manager_lambda_role = Role( + self, + 'OpenSearchIndexManagerLambdaRole', + assumed_by=ServicePrincipal('lambda.amazonaws.com'), + description='IAM role for index manager Lambda function that needs read/write access to OpenSearch Domain', + ) + + # Create IAM role for Lambda functions that access OpenSearch through API + # this role only needs read access + self.search_api_lambda_role = Role( + self, + 'SearchApiLambdaRole', + assumed_by=ServicePrincipal('lambda.amazonaws.com'), + description='IAM role for Search API Lambda functions that need read access to OpenSearch Domain', + ) + + # Create the OpenSearch domain and associated resources + self.provider_search_domain = ProviderSearchDomain( + self, + 'ProviderSearchDomain', + environment_name=environment_name, + region=self.region, + vpc_stack=vpc_stack, + compact_abbreviations=persistent_stack.get_list_of_compact_abbreviations(), + alarm_topic=persistent_stack.alarm_topic, + ingest_lambda_role=self.opensearch_ingest_lambda_role, + index_manager_lambda_role=self.opensearch_index_manager_lambda_role, + search_api_lambda_role=self.search_api_lambda_role, + ) + + # Expose domain and encryption key for use by other constructs + self.domain = self.provider_search_domain.domain + self.opensearch_encryption_key = self.provider_search_domain.encryption_key + + # Create the export results bucket for temporary CSV files + self.export_results_bucket = ExportResultsBucket( + self, + 'ExportResultsBucket', + access_logs_bucket=persistent_stack.access_logs_bucket, + encryption_key=persistent_stack.shared_encryption_key, + ) + + # Create the index manager custom resource + self.index_manager_custom_resource = IndexManagerCustomResource( + self, + construct_id='indexManager', + opensearch_domain=self.provider_search_domain.domain, + vpc_stack=vpc_stack, + vpc_subnets=self.provider_search_domain.vpc_subnets, + lambda_role=self.opensearch_index_manager_lambda_role, + environment_name=environment_name, + ) + + # Create the search providers handler for API Gateway integration + self.search_handler = SearchHandler( + self, + construct_id='searchHandler', + opensearch_domain=self.provider_search_domain.domain, + vpc_stack=vpc_stack, + vpc_subnets=self.provider_search_domain.vpc_subnets, + lambda_role=self.search_api_lambda_role, + alarm_topic=persistent_stack.alarm_topic, + export_results_bucket=self.export_results_bucket, + ) + + # Create the populate provider documents handler for manual invocation + # This handler is used to bulk index provider documents from DynamoDB into OpenSearch + self.populate_provider_documents_handler = PopulateProviderDocumentsHandler( + self, + construct_id='populateProviderDocumentsHandler', + opensearch_domain=self.domain, + vpc_stack=vpc_stack, + vpc_subnets=self.provider_search_domain.vpc_subnets, + lambda_role=self.opensearch_ingest_lambda_role, + provider_table=persistent_stack.provider_table, + alarm_topic=persistent_stack.alarm_topic, + ) + + # Create the provider update ingest handler for SQS-based stream processing + # This handler processes real-time updates from the provider table stream via EventBridge Pipe -> SQS + self.provider_update_ingest_handler = ProviderUpdateIngestHandler( + self, + construct_id='providerUpdateIngestHandler', + opensearch_domain=self.domain, + vpc_stack=vpc_stack, + vpc_subnets=self.provider_search_domain.vpc_subnets, + lambda_role=self.opensearch_ingest_lambda_role, + provider_table=persistent_stack.provider_table, + encryption_key=self.opensearch_encryption_key, + alarm_topic=persistent_stack.alarm_topic, + ) + # don't deploy ingest resources until index manager has set proper index configuration + self.provider_update_ingest_handler.node.add_dependency(self.index_manager_custom_resource) + + # Create the EventBridge Pipe to connect DynamoDB stream to SQS queue + # This pipe reads from the provider table stream and sends events to the ingest handler's queue + self.provider_update_ingest_pipe = ProviderUpdateIngestPipe( + self, + construct_id='providerUpdateIngestPipe', + provider_table=persistent_stack.provider_table, + target_queue=self.provider_update_ingest_handler.queue, + encryption_key=self.opensearch_encryption_key, + ) + # don't deploy ingest resources until index manager has set proper index configuration + self.provider_update_ingest_pipe.node.add_dependency(self.index_manager_custom_resource) + + # add log insights for provider ingest + QueryDefinition( + self, + 'IngestQuery', + query_definition_name=f'{self.node.id}/ProviderUpdateIngest', + query_string=QueryString( + fields=['@timestamp', '@log', 'level', 'message', 'compact', 'provider_id', '@message'], + filter_statements=['level in ["INFO", "WARNING", "ERROR"]'], + sort='@timestamp asc', + ), + log_groups=[self.provider_update_ingest_handler.handler.log_group], + ) + + # add log insights for search requests + QueryDefinition( + self, + 'SearchLambdaQuery', + query_definition_name=f'{self.node.id}/SearchAPILambda', + query_string=QueryString( + fields=['@timestamp', '@log', 'level', 'message', 'compact', '@message'], + filter_statements=['level in ["INFO", "WARNING", "ERROR"]'], + sort='@timestamp asc', + ), + log_groups=[self.search_handler.handler.log_group], + ) diff --git a/backend/compact-connect/stacks/search_persistent_stack/export_results_bucket.py b/backend/compact-connect/stacks/search_persistent_stack/export_results_bucket.py new file mode 100644 index 000000000..2f143a873 --- /dev/null +++ b/backend/compact-connect/stacks/search_persistent_stack/export_results_bucket.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +from aws_cdk import Duration +from aws_cdk.aws_kms import IKey +from aws_cdk.aws_s3 import BucketEncryption, CorsRule, HttpMethods, LifecycleRule +from cdk_nag import NagSuppressions +from common_constructs.access_logs_bucket import AccessLogsBucket +from common_constructs.bucket import Bucket +from constructs import Construct + + +class ExportResultsBucket(Bucket): + """ + S3 bucket to store temporary CSV export result files. + + Files stored in this bucket are automatically deleted after 1 day + since they are only needed for the duration of the download. + """ + + def __init__( + self, + scope: Construct, + construct_id: str, + *, + access_logs_bucket: AccessLogsBucket, + encryption_key: IKey, + **kwargs, + ): + super().__init__( + scope, + construct_id, + encryption=BucketEncryption.KMS, + encryption_key=encryption_key, + server_access_logs_bucket=access_logs_bucket, + # Versioning is not needed for temporary export files + versioned=False, + cors=[ + CorsRule( + allowed_methods=[HttpMethods.GET], + allowed_origins=['*'], + allowed_headers=['*'], + ), + ], + # Automatically delete objects after 1 day + lifecycle_rules=[ + LifecycleRule( + id='DeleteExportFilesAfterOneDay', + enabled=True, + expiration=Duration.days(1), + ), + ], + **kwargs, + ) + + NagSuppressions.add_resource_suppressions( + self, + suppressions=[ + { + 'id': 'HIPAA.Security-S3BucketReplicationEnabled', + 'reason': 'This bucket houses transitory export data only that is deleted after 1 day. ' + 'Replication to a backup bucket is unhelpful.', + }, + { + 'id': 'HIPAA.Security-S3BucketVersioningEnabled', + 'reason': 'This bucket houses transitory export data only. ' + 'Version history is not needed for temporary files.', + }, + ], + ) diff --git a/backend/compact-connect/stacks/search_persistent_stack/index_manager.py b/backend/compact-connect/stacks/search_persistent_stack/index_manager.py new file mode 100644 index 000000000..e8e8108e5 --- /dev/null +++ b/backend/compact-connect/stacks/search_persistent_stack/index_manager.py @@ -0,0 +1,189 @@ +import os + +from aws_cdk import CustomResource, Duration +from aws_cdk.aws_ec2 import SubnetSelection +from aws_cdk.aws_iam import IRole +from aws_cdk.aws_logs import LogGroup, RetentionDays +from aws_cdk.aws_opensearchservice import Domain +from aws_cdk.custom_resources import Provider +from cdk_nag import NagSuppressions +from common_constructs.stack import Stack +from constructs import Construct + +from common_constructs.constants import PROD_ENV_NAME +from common_constructs.python_function import PythonFunction +from stacks.vpc_stack import VpcStack + +# Index configuration constants +# Non-prod environments use a single data node, so no replicas are needed +NON_PROD_NUMBER_OF_SHARDS = 1 +NON_PROD_NUMBER_OF_REPLICAS = 0 +# Production uses 3 data nodes across 3 AZs, so 1 primary and 2 replica ensures data availability +# if this is updated, the total of primary + replica shards must be a multiple of 3 +PROD_NUMBER_OF_SHARDS = 1 +PROD_NUMBER_OF_REPLICAS = 2 + + +class IndexManagerCustomResource(Construct): + """ + Custom resource for managing OpenSearch indices. + + This construct creates a CloudFormation custom resource that populates the OpenSearch Domain with the needed + provider indices. Indices are created with versioned names (e.g., compact_aslp_providers_v1) and aliases + (e.g., compact_aslp_providers) to enable safe blue-green migrations in the future. + """ + + def __init__( + self, + scope: Construct, + construct_id: str, + opensearch_domain: Domain, + vpc_stack: VpcStack, + vpc_subnets: SubnetSelection, + lambda_role: IRole, + environment_name: str, + ): + """ + Initialize the IndexManagerCustomResource construct. + + :param scope: The scope of the construct + :param construct_id: The id of the construct + :param opensearch_domain: The reference to the OpenSearch domain resource + :param vpc_stack: The VPC stack + :param vpc_subnets: The VPC subnets + :param lambda_role: The IAM role for the Lambda function + :param environment_name: The deployment environment name (e.g., 'prod', 'test') + """ + super().__init__(scope, construct_id) + stack = Stack.of(scope) + + self._is_prod_environment = environment_name == PROD_ENV_NAME + + # Create Lambda function for managing OpenSearch indices + self.manage_function = PythonFunction( + self, + 'IndexManagerFunction', + index=os.path.join('handlers', 'manage_opensearch_indices.py'), + lambda_dir='search', + handler='on_event', + role=lambda_role, + log_retention=RetentionDays.ONE_MONTH, + environment={ + 'OPENSEARCH_HOST_ENDPOINT': opensearch_domain.domain_endpoint, + **stack.common_env_vars, + }, + timeout=Duration.minutes(10), + memory_size=256, + vpc=vpc_stack.vpc, + vpc_subnets=vpc_subnets, + security_groups=[vpc_stack.lambda_security_group], + ) + # grant resource ability to create and check indices + opensearch_domain.grant_read_write(self.manage_function) + + # Add CDK Nag suppressions for the Lambda function's IAM role + NagSuppressions.add_resource_suppressions_by_path( + stack, + f'{self.manage_function.role.node.path}/DefaultPolicy/Resource', + [ + { + 'id': 'AwsSolutions-IAM5', + 'reason': 'The grant_read_write method requires wildcard permissions on the OpenSearch domain to ' + 'create, read, and manage indices. This is appropriate for an index management function ' + 'that needs to operate on all indices in the domain.', + }, + ], + ) + + provider_log_group = LogGroup( + self, + 'ProviderLogGroup', + retention=RetentionDays.ONE_DAY, + ) + NagSuppressions.add_resource_suppressions( + provider_log_group, + suppressions=[ + { + 'id': 'HIPAA.Security-CloudWatchLogGroupEncrypted', + 'reason': 'We do not log sensitive data to CloudWatch, and operational visibility of system' + ' logs to operators with credentials for the AWS account is desired. Encryption is not' + ' appropriate here.', + }, + ], + ) + + # Create custom resource provider + # Note: Provider framework Lambda does NOT need VPC access - it only needs to: + # 1. Invoke the Lambda (via Lambda service API, no VPC needed) + # 2. Respond to CloudFormation + provider = Provider( + self, + 'Provider', + on_event_handler=self.manage_function, + log_group=provider_log_group, + ) + + # Add CDK Nag suppressions for the provider framework + NagSuppressions.add_resource_suppressions_by_path( + stack, + f'{provider.node.path}/framework-onEvent/Resource', + [ + {'id': 'AwsSolutions-L1', 'reason': 'We do not control this runtime'}, + { + 'id': 'HIPAA.Security-LambdaConcurrency', + 'reason': 'This function is only run at deploy time, by CloudFormation and has no need for ' + 'concurrency limits.', + }, + { + 'id': 'HIPAA.Security-LambdaDLQ', + 'reason': 'This is a synchronous function that runs at deploy time. It does not need a DLQ', + }, + { + 'id': 'HIPAA.Security-LambdaInsideVPC', + 'reason': 'Provider framework lambda is managed by AWS and does not function inside a VPC', + }, + ], + ) + + # Add CDK Nag suppressions for the provider framework's IAM role + NagSuppressions.add_resource_suppressions_by_path( + stack, + f'{provider.node.path}/framework-onEvent/ServiceRole/Resource', + [ + { + 'id': 'AwsSolutions-IAM4', + 'reason': 'The Provider framework requires AWS managed policies (AWSLambdaBasicExecutionRole) ' + 'for its service role. We do not control these policies.', + }, + ], + ) + + NagSuppressions.add_resource_suppressions_by_path( + stack, + f'{provider.node.path}/framework-onEvent/ServiceRole/DefaultPolicy/Resource', + [ + { + 'id': 'AwsSolutions-IAM5', + 'reason': 'The Provider framework requires wildcard permissions to invoke the Lambda function. ' + 'This is a standard pattern for custom resource providers and is necessary for the ' + 'framework to manage the custom resource lifecycle.', + }, + ], + ) + + # Create custom resource for managing indices + # This custom resource will create versioned indices (e.g., 'compact_aslp_providers_v1') + # with aliases (e.g., 'compact_aslp_providers') for each compact. + # The alias abstraction enables safe blue-green migrations for future mapping changes. + self.index_manager = CustomResource( + self, + 'IndexManagerCustomResource', + resource_type='Custom::IndexManager', + service_token=provider.service_token, + properties={ + 'numberOfShards': PROD_NUMBER_OF_SHARDS if self._is_prod_environment else NON_PROD_NUMBER_OF_SHARDS, + 'numberOfReplicas': PROD_NUMBER_OF_REPLICAS + if self._is_prod_environment + else NON_PROD_NUMBER_OF_REPLICAS, + }, + ) diff --git a/backend/compact-connect/stacks/search_persistent_stack/populate_provider_documents_handler.py b/backend/compact-connect/stacks/search_persistent_stack/populate_provider_documents_handler.py new file mode 100644 index 000000000..d3c013ba4 --- /dev/null +++ b/backend/compact-connect/stacks/search_persistent_stack/populate_provider_documents_handler.py @@ -0,0 +1,99 @@ +import os + +from aws_cdk import Duration +from aws_cdk.aws_dynamodb import ITable +from aws_cdk.aws_ec2 import SubnetSelection +from aws_cdk.aws_iam import IRole +from aws_cdk.aws_logs import RetentionDays +from aws_cdk.aws_opensearchservice import Domain +from aws_cdk.aws_sns import ITopic +from cdk_nag import NagSuppressions +from common_constructs.stack import Stack +from constructs import Construct + +from common_constructs.python_function import PythonFunction +from stacks.vpc_stack import VpcStack + + +class PopulateProviderDocumentsHandler(Construct): + """ + Construct for the Populate Provider Documents Lambda function. + + This construct creates the Lambda function that populates the OpenSearch + indices with provider documents by scanning the provider table and + bulk indexing the sanitized records. + + This Lambda is intended to be invoked manually through the AWS console + for initial data population or re-indexing operations. + """ + + def __init__( + self, + scope: Construct, + construct_id: str, + opensearch_domain: Domain, + vpc_stack: VpcStack, + vpc_subnets: SubnetSelection, + lambda_role: IRole, + provider_table: ITable, + alarm_topic: ITopic, + ): + """ + Initialize the PopulateProviderDocumentsHandler construct. + + :param scope: The scope of the construct + :param construct_id: The id of the construct + :param opensearch_domain: The reference to the OpenSearch domain resource + :param vpc_stack: The VPC stack + :param vpc_subnets: The VPC subnets for Lambda deployment + :param lambda_role: The IAM role for the Lambda function (should have OpenSearch write access) + :param provider_table: The DynamoDB provider table + :param alarm_topic: The SNS topic for alarms + """ + super().__init__(scope, construct_id) + stack = Stack.of(scope) + + # Create Lambda function for populating provider documents + self.handler = PythonFunction( + self, + 'PopulateProviderDocumentsFunction', + description='Populates OpenSearch indices with provider documents from DynamoDB', + index=os.path.join('handlers', 'populate_provider_documents.py'), + lambda_dir='search', + handler='populate_provider_documents', + role=lambda_role, + log_retention=RetentionDays.ONE_MONTH, + environment={ + 'OPENSEARCH_HOST_ENDPOINT': opensearch_domain.domain_endpoint, + 'PROVIDER_TABLE_NAME': provider_table.table_name, + 'PROV_DATE_OF_UPDATE_INDEX_NAME': provider_table.provider_date_of_update_index_name, + **stack.common_env_vars, + }, + # Longer timeout for processing large datasets + timeout=Duration.minutes(15), + memory_size=512, + vpc=vpc_stack.vpc, + vpc_subnets=vpc_subnets, + security_groups=[vpc_stack.lambda_security_group], + alarm_topic=alarm_topic, + ) + + # Grant the handler write access to the OpenSearch domain + opensearch_domain.grant_write(self.handler) + + # Grant the handler read access to the provider table + provider_table.grant_read_data(self.handler) + + # Add CDK Nag suppressions for the Lambda function's IAM role + NagSuppressions.add_resource_suppressions_by_path( + stack, + f'{self.handler.role.node.path}/DefaultPolicy/Resource', + [ + { + 'id': 'AwsSolutions-IAM5', + 'reason': 'The grant_write method requires wildcard permissions on the OpenSearch domain to ' + 'write to indices. This is appropriate for a function that needs to bulk index ' + 'provider documents. The DynamoDB grant_read_data also requires index permissions.', + }, + ], + ) diff --git a/backend/compact-connect/stacks/search_persistent_stack/provider_search_domain.py b/backend/compact-connect/stacks/search_persistent_stack/provider_search_domain.py new file mode 100644 index 000000000..3a6ee2aaa --- /dev/null +++ b/backend/compact-connect/stacks/search_persistent_stack/provider_search_domain.py @@ -0,0 +1,644 @@ +from aws_cdk import Duration, Fn, RemovalPolicy +from aws_cdk.aws_cloudwatch import Alarm, ComparisonOperator, Metric, TreatMissingData +from aws_cdk.aws_cloudwatch_actions import SnsAction +from aws_cdk.aws_ec2 import EbsDeviceVolumeType, SubnetSelection, SubnetType +from aws_cdk.aws_iam import Effect, IRole, PolicyStatement, ServicePrincipal +from aws_cdk.aws_kms import Key +from aws_cdk.aws_logs import LogGroup, ResourcePolicy, RetentionDays +from aws_cdk.aws_opensearchservice import ( + CapacityConfig, + Domain, + EbsOptions, + EncryptionAtRestOptions, + EngineVersion, + LoggingOptions, + TLSSecurityPolicy, + WindowStartTime, + ZoneAwarenessConfig, +) +from aws_cdk.aws_sns import ITopic +from cdk_nag import NagSuppressions +from common_constructs.stack import Stack +from constructs import Construct + +from common_constructs.constants import PROD_ENV_NAME +from stacks.vpc_stack import PRIVATE_SUBNET_ONE_NAME, VpcStack + +PROD_EBS_VOLUME_SIZE = 25 +NON_PROD_EBS_VOLUME_SIZE = 10 + + +class ProviderSearchDomain(Construct): + """ + Construct for the OpenSearch Domain and related resources. + + This construct encapsulates: + - OpenSearch Domain with VPC deployment and encryption + - KMS encryption key for the domain + - CloudWatch log groups for OpenSearch logging + - Access policies restricting domain access to specific Lambda roles + - CloudWatch alarms for capacity monitoring + + Instance sizing by environment: + - Non-prod (sandbox/test/beta): t3.small.search, 1 node + - Prod: m7g.medium.search, 3 master + 3 data nodes (with standby) + """ + + def __init__( + self, + scope: Construct, + construct_id: str, + *, + environment_name: str, + region: str, + vpc_stack: VpcStack, + compact_abbreviations: list[str], + alarm_topic: ITopic, + ingest_lambda_role: IRole, + index_manager_lambda_role: IRole, + search_api_lambda_role: IRole, + ): + """ + Initialize the ProviderSearchDomain construct. + + :param scope: The scope of the construct + :param construct_id: The id of the construct + :param environment_name: The deployment environment name (e.g., 'prod', 'test') + :param region: The deployment region (e.g., 'us-east-1') + :param vpc_stack: The VPC stack containing network resources + :param compact_abbreviations: List of compact abbreviations for index access policies + :param alarm_topic: The SNS topic for capacity alarms + :param ingest_lambda_role: IAM role for the ingest Lambda function (write access) + :param index_manager_lambda_role: IAM role for the index manager Lambda function (read/write access) + :param search_api_lambda_role: IAM role for the search API Lambda function (read access) + """ + super().__init__(scope, construct_id) + stack = Stack.of(self) + + # Store references to the Lambda roles for access policy configuration + self._ingest_lambda_role = ingest_lambda_role + self._index_manager_lambda_role = index_manager_lambda_role + self._search_api_lambda_role = search_api_lambda_role + + self._is_prod_environment = environment_name == PROD_ENV_NAME + + # Determine removal policy based on environment + removal_policy = RemovalPolicy.RETAIN if self._is_prod_environment else RemovalPolicy.DESTROY + + # Create dedicated KMS key for OpenSearch domain encryption + self.encryption_key = Key( + self, + 'EncryptionKey', + enable_key_rotation=True, + alias=f'{stack.stack_name}-opensearch-encryption-key', + removal_policy=removal_policy, + ) + + # Grant OpenSearch service principal permission to use the key + opensearch_principal = ServicePrincipal('es.amazonaws.com') + self.encryption_key.grant_encrypt_decrypt(opensearch_principal) + + # Grant cloudwatch service principal permission to use the key + log_principal = ServicePrincipal(f'logs.{region}.amazonaws.com') + self.encryption_key.grant_encrypt_decrypt(log_principal) + + # Create CloudWatch log groups for OpenSearch logging + app_log_group = LogGroup( + self, + 'AppLogGroup', + retention=RetentionDays.ONE_MONTH, + removal_policy=removal_policy, + encryption_key=self.encryption_key, + ) + slow_search_log_group = LogGroup( + self, + 'SlowSearchLogGroup', + retention=RetentionDays.ONE_MONTH, + removal_policy=removal_policy, + encryption_key=self.encryption_key, + ) + slow_index_log_group = LogGroup( + self, + 'SlowIndexLogGroup', + retention=RetentionDays.ONE_MONTH, + removal_policy=removal_policy, + encryption_key=self.encryption_key, + ) + + # Create CloudWatch Logs resource policy to allow OpenSearch to write logs + # This is set here to avoid CDK creating an auto-generated Lambda function + # The resource ARNs must include ':*' to grant permissions on log streams within the log groups + ResourcePolicy( + self, + 'LogsResourcePolicy', + policy_statements=[ + PolicyStatement( + effect=Effect.ALLOW, + principals=[ServicePrincipal('es.amazonaws.com')], + actions=[ + 'logs:PutLogEvents', + 'logs:CreateLogStream', + ], + resources=[ + f'{app_log_group.log_group_arn}:*', + f'{slow_search_log_group.log_group_arn}:*', + f'{slow_index_log_group.log_group_arn}:*', + ], + ), + ], + ) + + # Determine instance type and capacity based on environment + capacity_config = self._get_capacity_config() + # Determine AZ awareness based on environment + zone_awareness_config = self._get_zone_awareness_config() + # Determine subnet selection based on environment + self.vpc_subnets = self._get_vpc_subnets(vpc_stack) + + # Create OpenSearch Domain + self.domain = Domain( + self, + 'Domain', + # IMPORTANT NOTE: updating the engine version requires a blue/green deployment. + # During development, we found that if a blue/green deployment became stuck, the search endpoints were still + # able to serve data, but the CloudFormation deployment would fail waiting for the domain to become active. + # In such cases you may have to work with AWS support to get it out of that state. + # If you intend to update this field, or any other field that will require a blue/green deployment as + # described here: + # https://docs.aws.amazon.com/opensearch-service/latest/developerguide/managedomains-configuration-changes.html + # consider working with stakeholders to schedule a maintenance window during low-traffic periods where + # advanced search may become inaccessible during the update, to give you time to verify changes. + version=EngineVersion.OPENSEARCH_3_3, + capacity=capacity_config, + enable_auto_software_update=True, + enable_version_upgrade=True, + # We set the off-peak window to 9AM UTC (1AM PST) + # this determines when automatic updates are performed on the domain. + off_peak_window_start=WindowStartTime(hours=9, minutes=0), + # VPC configuration for network isolation + vpc=vpc_stack.vpc, + vpc_subnets=[self.vpc_subnets], + security_groups=[vpc_stack.opensearch_security_group], + # EBS volume configuration + ebs=EbsOptions( + enabled=True, + volume_size=PROD_EBS_VOLUME_SIZE if self._is_prod_environment else NON_PROD_EBS_VOLUME_SIZE, + # this type is required for medium instances + volume_type=EbsDeviceVolumeType.GP3, + ), + # Encryption settings + encryption_at_rest=EncryptionAtRestOptions(enabled=True, kms_key=self.encryption_key), + node_to_node_encryption=True, + enforce_https=True, + tls_security_policy=TLSSecurityPolicy.TLS_1_2, + logging=LoggingOptions( + app_log_enabled=True, + app_log_group=app_log_group, + slow_search_log_enabled=True, + slow_search_log_group=slow_search_log_group, + slow_index_log_enabled=True, + slow_index_log_group=slow_index_log_group, + ), + # Suppress auto-generated Lambda for log resource policy (we created it manually above) + suppress_logs_resource_policy=True, + # Domain removal policy + removal_policy=removal_policy, + zone_awareness=zone_awareness_config, + ) + + # Configure access policies + self._configure_access_policies(compact_abbreviations) + + # Grant lambda roles access to domain + self.domain.grant_read(self._search_api_lambda_role) + self.domain.grant_write(self._ingest_lambda_role) + self.domain.grant_read_write(self._index_manager_lambda_role) + + # Add CDK Nag suppressions + self._add_domain_suppressions() + self._add_access_policy_lambda_suppressions() + self._add_lambda_role_suppressions(self._search_api_lambda_role) + self._add_lambda_role_suppressions(self._ingest_lambda_role) + self._add_lambda_role_suppressions(self._index_manager_lambda_role) + + # Add capacity monitoring alarms + self._add_capacity_alarms(alarm_topic) + + def _configure_access_policies(self, compact_abbreviations: list[str]): + """ + Configure access policies for the OpenSearch domain. + + Creates IAM-based access policies that restrict access to specific Lambda roles: + - Ingest role: POST/PUT access to compact indices + - Index manager role: GET/HEAD/POST/PUT access for index management + - Search API role: POST access restricted to _search endpoint only + + :param compact_abbreviations: List of compact abbreviations for index access policies + """ + ingest_access_policy = PolicyStatement( + effect=Effect.ALLOW, + principals=[self._ingest_lambda_role], + actions=[ + 'es:ESHttpPost', + 'es:ESHttpPut', + ], + resources=[Fn.join('', [self.domain.domain_arn, '/compact*'])], + ) + index_manager_access_policy = PolicyStatement( + effect=Effect.ALLOW, + principals=[self._index_manager_lambda_role], + actions=[ + 'es:ESHttpGet', + 'es:ESHttpHead', # Required for index_exists() checks + 'es:ESHttpPost', + 'es:ESHttpPut', + ], + resources=[Fn.join('', [self.domain.domain_arn, '/compact*'])], + ) + # Search API policy - restricted to _search endpoint only + # POST is required for _search queries even though they are read-only operations + # because OpenSearch's search API uses POST to send the query DSL body. + # By restricting the resource to /_search, we prevent POST from being used + # for document indexing or other write operations. + # See: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/ac.html + search_api_policy = PolicyStatement( + effect=Effect.ALLOW, + principals=[self._search_api_lambda_role], + actions=[ + 'es:ESHttpPost', + ], + # define all compact indices to restrict the policy to the search operation + resources=[ + Fn.join(delimiter='', list_of_values=[self.domain.domain_arn, f'/compact_{compact}_providers/_search']) + for compact in compact_abbreviations + ], + ) + # Add access policy to restrict access to set of roles + self.domain.add_access_policies( + ingest_access_policy, + index_manager_access_policy, + search_api_policy, + ) + + def _get_capacity_config(self) -> CapacityConfig: + """ + Determine OpenSearch cluster capacity configuration based on environment. + + Non-prod (sandbox, test, beta, etc.): Single t3.small.search node + Prod: 3 dedicated master (r8g.medium.search) + 3 data nodes (m7g.medium.search) with standby + + :return: CapacityConfig with appropriate instance types and counts + """ + if self._is_prod_environment: + # Production configuration with high availability + # 3 dedicated master nodes + 3 data nodes across 3 AZs with standby + # Multi-AZ with standby does not support t3 instance types + return CapacityConfig( + # Data nodes - m7g.medium provides 1 vCPU and 4GB RAM + data_node_instance_type='m7g.medium.search', + # we require at least 3 data nodes and master nodes to support multi-az with standby + # for high availability + data_nodes=3, + # Dedicated master nodes for cluster management + # r8g.medium provides 8GB RAM, which the master nodes + # need based on our domain size + master_node_instance_type='r8g.medium.search', + master_nodes=3, + # Multi-AZ with standby for high availability + multi_az_with_standby_enabled=True, + ) + + # Single node configuration for all non-prod environments + # (test, beta, and developer sandboxes) + return CapacityConfig( + data_node_instance_type='t3.small.search', + data_nodes=1, + # No dedicated master nodes for single-node clusters + master_nodes=None, + # No multi-AZ for single node + multi_az_with_standby_enabled=False, + ) + + def _get_zone_awareness_config(self) -> ZoneAwarenessConfig: + """ + Determine OpenSearch cluster availability zone awareness based on environment. + + 3 for production, not enabled for all other non-prod environments + + :return: ZoneAwarenessConfig with appropriate settings + """ + if self._is_prod_environment: + return ZoneAwarenessConfig(enabled=True, availability_zone_count=3) + + # Non-prod environments only use one data node, hence we don't enable zone awareness + return ZoneAwarenessConfig(enabled=False) + + def _get_vpc_subnets(self, vpc_stack: VpcStack) -> SubnetSelection: + """ + Determine VPC subnet selection based on environment. + + Production: All private isolated subnets (3 AZs) for zone awareness and high availability + Non-prod: Single subnet (privateSubnet1 with CIDR 10.0.0.0/20) for single-node deployment + + :param vpc_stack: The VPC stack containing the private subnets + :return: SubnetSelection with appropriate subnet configuration + """ + if self._is_prod_environment: + # Production: Use all private isolated subnets from the VPC. + # VPC is configured with max_azs=3, so this will select exactly 3 subnets + return SubnetSelection(subnet_type=SubnetType.PRIVATE_ISOLATED) + + # Non-prod: Single-node deployment explicitly uses privateSubnet1 (CIDR 10.0.0.0/20) + # OpenSearch requires exactly one subnet for single-node deployments + # We explicitly find the subnet by its construct name to guarantee consistency + private_subnet1 = self._find_subnet_by_name(vpc_stack.vpc, PRIVATE_SUBNET_ONE_NAME) + return SubnetSelection(subnets=[private_subnet1]) + + def _find_subnet_by_name(self, vpc, subnet_name: str): + """ + Find a specific subnet by its logical construct name in the VPC. + + This provides a guaranteed, explicit reference to a specific subnet regardless of + CDK's internal list ordering, which is critical for stateful resources like OpenSearch. + + :param vpc: The VPC construct containing the subnet + :param subnet_name: The logical name of the subnet (e.g., 'privateSubnet1') + :return: The ISubnet instance + :raises ValueError: If the subnet cannot be found + """ + # Navigate the construct tree to find the subnet by name + subnet_construct = vpc.node.try_find_child(subnet_name) + if subnet_construct is None: + raise ValueError( + f'Subnet {subnet_name} not found in VPC construct tree. ' + f'Available children: {[c.node.id for c in vpc.node.children]}' + ) + + return subnet_construct + + def _add_capacity_alarms(self, alarm_topic: ITopic): + """ + Add CloudWatch alarms to monitor OpenSearch capacity and alert before hitting limits. + + These proactive thresholds give the DevOps team time to plan scaling activities: + - Free Storage Space < 50% of allocated capacity + - JVM Memory Pressure > 85% + - CPU Utilization > 70% + - Cluster Status (red/yellow) for critical and degraded states + - Automated Snapshot Failure for backup issues + + :param alarm_topic: The SNS topic to send alarm notifications to + """ + stack = Stack.of(self) + + # Get the volume size for calculating storage threshold + volume_size_gb = PROD_EBS_VOLUME_SIZE if self._is_prod_environment else NON_PROD_EBS_VOLUME_SIZE + # 50% threshold in MB (FreeStorageSpace metric is reported in megabytes) + # Formula: GB * 1024 MB/GB * 0.5 for 50% threshold + storage_threshold_mb = volume_size_gb * 1024 * 0.5 + + # Alarm: Free Storage Space < 50% + # This gives ample time to plan capacity increases before hitting critical levels + # Note: FreeStorageSpace metric is reported in megabytes (MB) + Alarm( + self, + 'FreeStorageSpaceAlarm', + metric=Metric( + namespace='AWS/ES', + metric_name='FreeStorageSpace', + dimensions_map={'DomainName': self.domain.domain_name, 'ClientId': stack.account}, + # check twice a day + period=Duration.hours(12), + statistic='Minimum', + ), + evaluation_periods=1, # Notify the moment the storage space is less than 50% + threshold=storage_threshold_mb, + comparison_operator=ComparisonOperator.LESS_THAN_THRESHOLD, + treat_missing_data=TreatMissingData.NOT_BREACHING, + alarm_description=( + f'OpenSearch Domain {self.domain.domain_name} free storage space has dropped below 50% ' + f'({storage_threshold_mb}MB of {volume_size_gb * 1024}MB allocated EBS volume). ' + 'Consider planning to increase EBS volume size or scaling the cluster.' + ), + ).add_alarm_action(SnsAction(alarm_topic)) + + # Alarm: JVM Memory Pressure > 85% + # Sustained high memory pressure indicates need for instance scaling + Alarm( + self, + 'JVMMemoryPressureAlarm', + metric=Metric( + namespace='AWS/ES', + metric_name='JVMMemoryPressure', + dimensions_map={'DomainName': self.domain.domain_name, 'ClientId': stack.account}, + period=Duration.minutes(5), + statistic='Maximum', + ), + evaluation_periods=3, # 15 minutes sustained + threshold=85, + comparison_operator=ComparisonOperator.GREATER_THAN_THRESHOLD, + treat_missing_data=TreatMissingData.NOT_BREACHING, + alarm_description=( + f'OpenSearch Domain {self.domain.domain_name} JVM memory pressure is above 85%. ' + 'This indicates the cluster is using a significant portion of its heap memory. ' + 'Consider scaling to larger instance types if pressure continues to increase.' + ), + ).add_alarm_action(SnsAction(alarm_topic)) + + # Alarm: CPU Utilization > 70% + # Sustained high CPU indicates need for more compute capacity + Alarm( + self, + 'CPUUtilizationAlarm', + metric=Metric( + namespace='AWS/ES', + metric_name='CPUUtilization', + dimensions_map={'DomainName': self.domain.domain_name, 'ClientId': stack.account}, + period=Duration.minutes(5), + statistic='Average', + ), + evaluation_periods=3, # 15 minutes sustained + threshold=70, + comparison_operator=ComparisonOperator.GREATER_THAN_THRESHOLD, + treat_missing_data=TreatMissingData.NOT_BREACHING, + alarm_description=( + f'OpenSearch Domain {self.domain.domain_name} CPU utilization has been above 70% for 15 minutes. ' + 'This indicates sustained high load. Review metrics and consider scaling to larger instance types ' + 'or adding more data nodes to distribute the load.' + ), + ).add_alarm_action(SnsAction(alarm_topic)) + + # Alarm: Cluster Status RED - Critical + # Red status indicates critical issues requiring immediate attention + Alarm( + self, + 'ClusterStatusRedAlarm', + metric=Metric( + namespace='AWS/ES', + metric_name='ClusterStatus.red', + dimensions_map={'DomainName': self.domain.domain_name, 'ClientId': stack.account}, + period=Duration.minutes(1), + statistic='Sum', + ), + evaluation_periods=1, # Alert immediately when red + threshold=1, + comparison_operator=ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, + treat_missing_data=TreatMissingData.NOT_BREACHING, + alarm_description=( + f'OpenSearch Domain {self.domain.domain_name} cluster status is RED. ' + 'This indicates critical issues requiring immediate attention. ' + 'Check cluster health and consider scaling if resource-constrained.' + ), + ).add_alarm_action(SnsAction(alarm_topic)) + + # Alarm: Cluster Status YELLOW - Degraded + # Yellow status indicates degraded state that should be monitored + Alarm( + self, + 'ClusterStatusYellowAlarm', + metric=Metric( + namespace='AWS/ES', + metric_name='ClusterStatus.yellow', + dimensions_map={'DomainName': self.domain.domain_name, 'ClientId': stack.account}, + period=Duration.minutes(5), + statistic='Sum', + ), + evaluation_periods=1, # Alert when yellow status is detected + threshold=1, + comparison_operator=ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, + treat_missing_data=TreatMissingData.NOT_BREACHING, + alarm_description=( + f'OpenSearch Domain {self.domain.domain_name} cluster status is YELLOW. ' + 'This indicates degraded state. Monitor closely and consider scaling if persistent.' + ), + ).add_alarm_action(SnsAction(alarm_topic)) + + # Alarm: Automated Snapshot Failure + # Snapshot failures may indicate resource constraints or other issues + Alarm( + self, + 'AutomatedSnapshotFailureAlarm', + metric=Metric( + namespace='AWS/ES', + metric_name='AutomatedSnapshotFailure', + dimensions_map={'DomainName': self.domain.domain_name, 'ClientId': stack.account}, + period=Duration.hours(1), + statistic='Sum', + ), + evaluation_periods=1, + threshold=1, + comparison_operator=ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, + treat_missing_data=TreatMissingData.NOT_BREACHING, + alarm_description=( + f'OpenSearch Domain {self.domain.domain_name} automated snapshot has failed. ' + 'This may indicate resource constraints or other issues requiring investigation.' + ), + ).add_alarm_action(SnsAction(alarm_topic)) + + def _add_domain_suppressions(self): + """ + Add CDK Nag suppressions for OpenSearch Domain configuration. + """ + NagSuppressions.add_resource_suppressions( + self.domain, + suppressions=[ + { + 'id': 'AwsSolutions-OS3', + 'reason': 'Access to this domain is restricted by Access Policies and VPC security groups. ' + 'The data in the domain is only accessible by the ingest lambda which indexes the ' + 'documents and the search API lambda which can only be accessed by authenticated staff ' + 'users in CompactConnect.', + }, + { + 'id': 'AwsSolutions-OS5', + 'reason': 'Access to this domain is restricted by Access Policies and VPC security groups. ' + 'The data in the domain is only accessible by the ingest lambda which indexes the ' + 'documents and the search API lambda which can only be accessed by authenticated staff ' + 'users in CompactConnect.', + }, + ], + apply_to_children=True, + ) + if not self._is_prod_environment: + NagSuppressions.add_resource_suppressions( + self.domain, + suppressions=[ + { + 'id': 'AwsSolutions-OS4', + 'reason': 'Dedicated master nodes are only used in production environments with multiple data ' + 'nodes. Single-node non-prod environments do not require dedicated master nodes.', + }, + { + 'id': 'AwsSolutions-OS7', + 'reason': 'Zone awareness with standby is only enabled for production environments with ' + 'multiple nodes. Single-node test environments do not require multi-AZ ' + 'configuration.', + }, + ], + apply_to_children=True, + ) + + def _add_access_policy_lambda_suppressions(self): + """ + Add CDK Nag suppressions for the auto-generated Lambda function created by add_access_policies. + """ + stack = Stack.of(self) + + # Suppress for the auto-generated Lambda function + # The construct ID is auto-generated by CDK, so we need to suppress at the stack level + NagSuppressions.add_resource_suppressions_by_path( + stack, + f'{stack.node.path}/AWS679f53fac002430cb0da5b7982bd2287', + suppressions=[ + { + 'id': 'AwsSolutions-L1', + 'reason': 'This is an AWS-managed custom resource Lambda created by CDK to manage ' + 'OpenSearch domain access policies. We cannot specify the runtime version.', + }, + { + 'id': 'AwsSolutions-IAM4', + 'appliesTo': [ + 'Policy::arn::iam::aws:policy/service-role/AWSLambdaBasicExecutionRole' + ], + 'reason': 'This is an AWS-managed custom resource Lambda created by CDK to manage ' + 'OpenSearch domain access policies. It uses the standard execution role.', + }, + { + 'id': 'AwsSolutions-IAM5', + 'appliesTo': ['Action::kms:Describe*', 'Action::kms:List*'], + 'reason': 'This is an AWS-managed custom resource Lambda that requires KMS permissions to ' + 'access the encryption key used by the OpenSearch domain.', + }, + { + 'id': 'HIPAA.Security-LambdaDLQ', + 'reason': 'This is an AWS-managed custom resource Lambda used only during deployment to ' + 'manage OpenSearch access policies. A DLQ is not necessary for deployment-time ' + 'functions.', + }, + { + 'id': 'HIPAA.Security-LambdaInsideVPC', + 'reason': 'This is an AWS-managed custom resource Lambda that needs internet access to ' + 'manage OpenSearch domain access policies via AWS APIs. VPC placement is not ' + 'required.', + }, + ], + apply_to_children=True, + ) + + def _add_lambda_role_suppressions(self, lambda_role: IRole): + """ + Add CDK Nag suppressions for OpenSearch Lambda role configuration. + + :param lambda_role: The Lambda role to add suppressions for + """ + NagSuppressions.add_resource_suppressions( + lambda_role, + suppressions=[ + { + 'id': 'AwsSolutions-IAM5', + 'reason': 'This lambda role access is restricted to the specific ' + 'OpenSearch domain and its indices within the VPC.', + }, + ], + apply_to_children=True, + ) diff --git a/backend/compact-connect/stacks/search_persistent_stack/provider_update_ingest_handler.py b/backend/compact-connect/stacks/search_persistent_stack/provider_update_ingest_handler.py new file mode 100644 index 000000000..b63a881e5 --- /dev/null +++ b/backend/compact-connect/stacks/search_persistent_stack/provider_update_ingest_handler.py @@ -0,0 +1,196 @@ +import os + +from aws_cdk import Duration +from aws_cdk.aws_cloudwatch import Alarm, ComparisonOperator, Stats, TreatMissingData +from aws_cdk.aws_cloudwatch_actions import SnsAction +from aws_cdk.aws_ec2 import SubnetSelection +from aws_cdk.aws_iam import IRole +from aws_cdk.aws_kms import IKey +from aws_cdk.aws_logs import FilterPattern, MetricFilter, RetentionDays +from aws_cdk.aws_opensearchservice import Domain +from aws_cdk.aws_sns import ITopic +from cdk_nag import NagSuppressions +from common_constructs.stack import Stack +from constructs import Construct + +from common_constructs.python_function import PythonFunction +from common_constructs.queued_lambda_processor import QueuedLambdaProcessor +from stacks.persistent_stack import ProviderTable +from stacks.vpc_stack import VpcStack + + +class ProviderUpdateIngestHandler(Construct): + """ + Construct for the Provider Update Ingest Lambda function. + + This construct creates the Lambda function that processes SQS messages containing + DynamoDB stream events from the provider table and indexes the updated provider + documents into OpenSearch. + + The Lambda is triggered by SQS (fed by EventBridge Pipe from DynamoDB streams) + and processes events in batches, deduplicating provider IDs by compact before + bulk indexing into OpenSearch. + """ + + def __init__( + self, + scope: Construct, + construct_id: str, + opensearch_domain: Domain, + vpc_stack: VpcStack, + vpc_subnets: SubnetSelection, + lambda_role: IRole, + provider_table: ProviderTable, + encryption_key: IKey, + alarm_topic: ITopic, + ): + """ + Initialize the ProviderUpdateIngestHandler construct. + + :param scope: The scope of the construct + :param construct_id: The id of the construct + :param opensearch_domain: The reference to the OpenSearch domain resource + :param vpc_stack: The VPC stack + :param vpc_subnets: The VPC subnets for Lambda deployment + :param lambda_role: The IAM role for the Lambda function (should have OpenSearch write access) + :param provider_table: The DynamoDB provider table (used for fetching full provider records) + :param encryption_key: The KMS encryption key for the SQS queue + :param alarm_topic: The SNS topic for alarms + """ + super().__init__(scope, construct_id) + stack = Stack.of(scope) + + # Create Lambda function for processing provider updates from SQS + self.handler = PythonFunction( + self, + 'ProviderUpdateIngestFunction', + description='Processes SQS messages with DynamoDB stream events and indexes provider documents into ' + 'OpenSearch', + index=os.path.join('handlers', 'provider_update_ingest.py'), + lambda_dir='search', + handler='provider_update_ingest_handler', + role=lambda_role, + log_retention=RetentionDays.ONE_MONTH, + environment={ + 'OPENSEARCH_HOST_ENDPOINT': opensearch_domain.domain_endpoint, + 'PROVIDER_TABLE_NAME': provider_table.table_name, + **stack.common_env_vars, + }, + # Allow enough time for processing large batches + timeout=Duration.minutes(10), + memory_size=1024, + vpc=vpc_stack.vpc, + vpc_subnets=vpc_subnets, + security_groups=[vpc_stack.lambda_security_group], + # We set a limit to the number of concurrent executions that can be started before being throttled. + # This protects us in several ways. First, it prevents ingest from taking concurrent execution count from + # our api lambdas, which if left unchecked could cause them to get throttled if we hit our account limit + # (currently at the default of 1000). It also prevents the OpenSearch Domain from getting slammed during + # high volume. This reserved limit can result in messages waiting a bit longer on the queue during high + # volume while the reserved executions complete their tasks before grabbing the next batch. We have an alert + # in place to fire if this lambda is ever throttled. This limit can be adjusted as needed, but based on + # initial load testing this seems like a reasonable limit. + reserved_concurrent_executions=25, + alarm_topic=alarm_topic, + ) + + # Create the QueuedLambdaProcessor for SQS-based event processing + # The queue receives DynamoDB stream events from EventBridge Pipe + self.queue_processor = QueuedLambdaProcessor( + self, + 'ProviderUpdateIngest', + process_function=self.handler, + # Visibility timeout controls when failed messages (in batchItemFailures) become visible for retry. + # Set to slightly longer than Lambda timeout (10 min) to prevent duplicate processing during + # Lambda execution. Failed messages will retry after this timeout expires (~15 minutes). + visibility_timeout=Duration.minutes(15), + # Retention period for the source queue (these should be processed fairly quickly, but setting this to + # account for retries) + retention_period=Duration.hours(2), + # OpenSearch recommends performing bulk indexing with sizes between 5 - 15 MB per operation. + # see https://www.elastic.co/guide/en/elasticsearch/guide/2.x/indexing-performance.html#_using_and_sizing_bulk_requests + # A basic provider document without any additional records (privileges, adverse actions, etc.) is + # around 2 KB on average. We expect these provider documents to grow over time as providers accumulate + # privileges and other records. Setting a batch size of 3000 places the initial bulk operations around + # 6 MB max size per request (2KB * 3000 = 6 MB). This puts us within that range and provides headroom for + # these documents to grow over time, while still processing license uploads in a timely manner. + batch_size=3000, + # Batching window to allow multiple events for the same provider to be processed together + max_batching_window=Duration.seconds(15), + # Max receive count = total attempts before DLQ (1 initial + 2 retries = 3 total) + # Failed messages retry after visibility_timeout expires (15 min between attempts) + max_receive_count=3, + encryption_key=encryption_key, + alarm_topic=alarm_topic, + # DLQ retention of 14 days for analysis and replay + dlq_retention_period=Duration.days(14), + # Alert immediately if any messages end up in the DLQ + dlq_count_alarm_threshold=0, + ) + + # Expose the queue and DLQ for use by the EventBridge Pipe + self.queue = self.queue_processor.queue + self.dlq = self.queue_processor.dlq + + # Grant the handler write access to the OpenSearch domain + opensearch_domain.grant_write(self.handler) + + # Grant the handler read access to the provider table for fetching full provider records + provider_table.grant_read_data(self.handler) + + # Grant the handler permission to use the encryption key for SQS operations + encryption_key.grant_encrypt_decrypt(self.handler) + + # Add alarm for Lambda errors + Alarm( + self, + 'ProviderUpdateIngestErrorAlarm', + metric=self.handler.metric_errors(statistic=Stats.SUM), + evaluation_periods=1, + threshold=1, + actions_enabled=True, + alarm_description=f'{self.handler.node.path} failed to process an SQS message batch', + comparison_operator=ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, + treat_missing_data=TreatMissingData.NOT_BREACHING, + ).add_alarm_action(SnsAction(alarm_topic)) + + # Create a metric filter to capture ERROR level logs from the provider update ingest Lambda + error_log_metric = MetricFilter( + self, + 'ProviderUpdateIngestErrorLogMetric', + log_group=self.handler.log_group, + metric_namespace='CompactConnect/Search', + metric_name='ProviderUpdateIngestErrors', + filter_pattern=FilterPattern.string_value(json_field='$.level', comparison='=', value='ERROR'), + metric_value='1', + default_value=0, + ) + + # Create an alarm that triggers when ERROR logs are detected + error_log_alarm = Alarm( + self, + 'ProviderUpdateIngestErrorLogAlarm', + metric=error_log_metric.metric(statistic='Sum'), + evaluation_periods=1, + threshold=1, + actions_enabled=True, + alarm_description=f'The Provider Update Ingest Lambda logged an ERROR level message. Investigate ' + f'the logs for the {self.handler.function_name} lambda to determine the cause.', + comparison_operator=ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, + treat_missing_data=TreatMissingData.NOT_BREACHING, + ) + error_log_alarm.add_alarm_action(SnsAction(alarm_topic)) + + # Add CDK Nag suppressions for the Lambda function's IAM role + NagSuppressions.add_resource_suppressions_by_path( + stack, + f'{self.handler.role.node.path}/DefaultPolicy/Resource', + [ + { + 'id': 'AwsSolutions-IAM5', + 'reason': 'The grant_write method requires wildcard permissions on the OpenSearch domain to ' + 'write to indices. This is appropriate for a function that needs to index ' + 'provider documents.', + }, + ], + ) diff --git a/backend/compact-connect/stacks/search_persistent_stack/provider_update_ingest_pipe.py b/backend/compact-connect/stacks/search_persistent_stack/provider_update_ingest_pipe.py new file mode 100644 index 000000000..548d3f525 --- /dev/null +++ b/backend/compact-connect/stacks/search_persistent_stack/provider_update_ingest_pipe.py @@ -0,0 +1,109 @@ +from aws_cdk.aws_iam import Effect, PolicyStatement, Role, ServicePrincipal +from aws_cdk.aws_kms import IKey +from aws_cdk.aws_pipes import CfnPipe +from aws_cdk.aws_sqs import IQueue +from cdk_nag import NagSuppressions +from common_constructs.stack import Stack +from constructs import Construct + +from stacks.persistent_stack import ProviderTable + + +class ProviderUpdateIngestPipe(Construct): + """ + Construct for the EventBridge Pipe that connects DynamoDB stream to SQS. + + This construct creates an EventBridge Pipe that: + - Reads events from the DynamoDB provider table stream + - Sends events to an SQS queue for processing by the provider update ingest Lambda + + The Pipe enables decoupling the DynamoDB stream from the Lambda function, allowing + for better scalability and resilience through SQS-based message processing. + """ + + def __init__( + self, + scope: Construct, + construct_id: str, + provider_table: ProviderTable, + target_queue: IQueue, + encryption_key: IKey, + ): + """ + Initialize the ProviderUpdateIngestPipe construct. + + :param scope: The scope of the construct + :param construct_id: The id of the construct + :param provider_table: The DynamoDB provider table with stream enabled + :param target_queue: The SQS queue to send events to + :param encryption_key: The KMS encryption key used by the SQS queue + """ + super().__init__(scope, construct_id) + stack = Stack.of(scope) + + # Create IAM role for the EventBridge Pipe + self.pipe_role = Role( + self, + 'PipeRole', + assumed_by=ServicePrincipal('pipes.amazonaws.com'), + description='IAM role for EventBridge Pipe that reads from DynamoDB stream and sends to SQS', + ) + + # Grant permissions to read from DynamoDB stream + # The stream ARN is constructed from the table ARN + self.pipe_role.add_to_policy( + PolicyStatement( + effect=Effect.ALLOW, + actions=[ + 'dynamodb:DescribeStream', + 'dynamodb:GetRecords', + 'dynamodb:GetShardIterator', + 'dynamodb:ListStreams', + ], + resources=[ + f'{provider_table.table_arn}/stream/*', + ], + ) + ) + + # Grant permissions to send messages to SQS + target_queue.grant_send_messages(self.pipe_role) + + # Grant permissions to use the KMS key for encrypting SQS messages + encryption_key.grant_encrypt_decrypt(self.pipe_role) + # Grant permission to decrypt stream records from provider table + provider_table.encryption_key.grant_decrypt(self.pipe_role) + + # Create the EventBridge Pipe + # Using CfnPipe (L1 construct) as there's no stable L2 construct available yet + self.pipe = CfnPipe( + self, + 'Pipe', + role_arn=self.pipe_role.role_arn, + source=provider_table.table_stream_arn, + target=target_queue.queue_arn, + source_parameters=CfnPipe.PipeSourceParametersProperty( + dynamo_db_stream_parameters=CfnPipe.PipeSourceDynamoDBStreamParametersProperty( + # 'LATEST' starts processing from the latest available stream record + # from the moment the pipe is created + starting_position='LATEST', + # send everything to SQS as it arrives + batch_size=1, + ), + ), + description='Pipe to send DynamoDB provider table stream events to SQS for OpenSearch indexing', + ) + + # Add CDK Nag suppressions + NagSuppressions.add_resource_suppressions_by_path( + stack, + f'{self.pipe_role.node.path}/DefaultPolicy/Resource', + [ + { + 'id': 'AwsSolutions-IAM5', + 'reason': 'The DynamoDB stream permissions require wildcard access to stream resources ' + 'as the stream ARN includes a timestamp component that changes on table recreation. ' + 'The SQS grant_send_messages also adds appropriate permissions.', + }, + ], + ) diff --git a/backend/compact-connect/stacks/search_persistent_stack/search_handler.py b/backend/compact-connect/stacks/search_persistent_stack/search_handler.py new file mode 100644 index 000000000..798083649 --- /dev/null +++ b/backend/compact-connect/stacks/search_persistent_stack/search_handler.py @@ -0,0 +1,129 @@ +import os + +from aws_cdk import Duration +from aws_cdk.aws_cloudwatch import Alarm, ComparisonOperator, TreatMissingData +from aws_cdk.aws_cloudwatch_actions import SnsAction +from aws_cdk.aws_ec2 import SubnetSelection +from aws_cdk.aws_iam import IRole +from aws_cdk.aws_logs import FilterPattern, MetricFilter, RetentionDays +from aws_cdk.aws_opensearchservice import Domain +from aws_cdk.aws_s3 import IBucket +from aws_cdk.aws_sns import ITopic +from cdk_nag import NagSuppressions +from common_constructs.stack import Stack +from constructs import Construct + +from common_constructs.python_function import PythonFunction +from stacks.vpc_stack import VpcStack + + +class SearchHandler(Construct): + """ + Construct for the Search Lambda function. + + This construct creates the Lambda function that handles search requests + against the OpenSearch domain for both provider and privilege records. + """ + + def __init__( + self, + scope: Construct, + construct_id: str, + opensearch_domain: Domain, + vpc_stack: VpcStack, + vpc_subnets: SubnetSelection, + lambda_role: IRole, + alarm_topic: ITopic, + export_results_bucket: IBucket, + ): + """ + Initialize the SearchHandler construct. + + :param scope: The scope of the construct + :param construct_id: The id of the construct + :param opensearch_domain: The reference to the OpenSearch domain resource + :param vpc_stack: The VPC stack + :param vpc_subnets: The VPC subnets for Lambda deployment + :param lambda_role: The IAM role for the Lambda function + :param alarm_topic: The SNS topic for alarms + :param export_results_bucket: The S3 bucket for storing export result CSV files + """ + super().__init__(scope, construct_id) + stack = Stack.of(scope) + + # Create Lambda function for searching providers and privileges + self.handler = PythonFunction( + self, + 'SearchProvidersFunction', + description='Search handler for OpenSearch queries', + index=os.path.join('handlers', 'search.py'), + lambda_dir='search', + handler='search_api_handler', + role=lambda_role, + log_retention=RetentionDays.ONE_MONTH, + environment={ + 'OPENSEARCH_HOST_ENDPOINT': opensearch_domain.domain_endpoint, + 'EXPORT_RESULTS_BUCKET_NAME': export_results_bucket.bucket_name, + **stack.common_env_vars, + }, + timeout=Duration.seconds(29), + # memory slightly larger to manage pulling down privilege reports for CSV export + # and to improve performance of search in general + memory_size=2048, + vpc=vpc_stack.vpc, + vpc_subnets=vpc_subnets, + security_groups=[vpc_stack.lambda_security_group], + alarm_topic=alarm_topic, + ) + + # Grant the handler read access to the OpenSearch domain + opensearch_domain.grant_read(self.handler) + + # Grant the handler write access to the export results bucket + export_results_bucket.grant_write(self.handler) + + # Grant the handler permission to generate presigned URLs for the export results bucket + export_results_bucket.grant_read(self.handler) + + # Add CDK Nag suppressions for the Lambda function's IAM role + NagSuppressions.add_resource_suppressions_by_path( + stack, + f'{self.handler.role.node.path}/DefaultPolicy/Resource', + [ + { + 'id': 'AwsSolutions-IAM5', + 'reason': 'The grant_read method requires wildcard permissions on the OpenSearch domain to ' + 'read from indices. This is appropriate for a search function that needs to query ' + 'provider indices in the domain. Additionally, grant_write and grant_read on the S3 bucket ' + 'use wildcard permissions for object-level operations which is required for writing and ' + 'generating presigned URLs for export result CSV files.', + }, + ], + ) + + # Create a metric filter to capture ERROR level logs from the search handler Lambda + error_log_metric = MetricFilter( + self, + 'SearchHandlerErrorLogMetric', + log_group=self.handler.log_group, + metric_namespace='CompactConnect/Search', + metric_name='SearchHandlerErrors', + filter_pattern=FilterPattern.string_value(json_field='$.level', comparison='=', value='ERROR'), + metric_value='1', + default_value=0, + ) + + # Create an alarm that triggers when ERROR logs are detected + error_log_alarm = Alarm( + self, + 'SearchHandlerErrorLogAlarm', + metric=error_log_metric.metric(statistic='Sum'), + evaluation_periods=1, + threshold=1, + actions_enabled=True, + alarm_description=f'The Search Handler Lambda logged an ERROR level message. Investigate ' + f'the logs for the {self.handler.function_name} lambda to determine the cause.', + comparison_operator=ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, + treat_missing_data=TreatMissingData.NOT_BREACHING, + ) + error_log_alarm.add_alarm_action(SnsAction(alarm_topic)) diff --git a/backend/compact-connect/stacks/vpc_stack/__init__.py b/backend/compact-connect/stacks/vpc_stack/__init__.py new file mode 100644 index 000000000..650889e9f --- /dev/null +++ b/backend/compact-connect/stacks/vpc_stack/__init__.py @@ -0,0 +1,228 @@ +from aws_cdk import RemovalPolicy +from aws_cdk.aws_ec2 import ( + FlowLogDestination, + FlowLogTrafficType, + GatewayVpcEndpointAwsService, + InterfaceVpcEndpointAwsService, + IpAddresses, + Port, + SecurityGroup, + SubnetConfiguration, + SubnetType, + Vpc, +) +from aws_cdk.aws_iam import ServicePrincipal +from aws_cdk.aws_kms import Key +from aws_cdk.aws_logs import LogGroup, RetentionDays +from cdk_nag import NagSuppressions +from common_constructs.stack import AppStack +from constructs import Construct + +PRIVATE_SUBNET_ONE_NAME = 'privateSubnet1' +PRIVATE_SUBNET_TWO_NAME = 'privateSubnet2' +PRIVATE_SUBNET_THREE_NAME = 'privateSubnet3' + + +class VpcStack(AppStack): + """ + Stack for VPC resources needed for OpenSearch Domain and Lambda functions. + + This stack provides network infrastructure including: + - VPC with private subnets across multiple availability zones + - VPC endpoints for AWS services (CloudWatch Logs, DynamoDB) + - Security groups for OpenSearch and Lambda functions + - VPC Flow Logs for network monitoring + + IMPORTANT - VPC Subnet CIDR Allocation Strategy: + ================================================= + This VPC uses explicit CIDR block overrides to prevent conflicts when expanding. + Each subnet CIDR is locked in using CloudFormation property overrides, which + allows safe addition of more AZs/subnets in the future without deployment failures. + + Current allocation from 10.0.0.0/16 VPC CIDR: + - Private subnets (3 AZs): 10.0.0.0/20, 10.0.16.0/20, 10.0.32.0/20 (4096 IPs each) + - Reserved for future expansion: 10.0.48.0/20, 10.0.64.0/20, etc. + + To add more subnets in the future: + 1. Increase max_azs (e.g., from 3 to 4) + 2. Add new CIDR blocks to the private_cidrs list (e.g., '10.0.48.0/20') + 3. Deploy - existing subnets won't be modified due to explicit CIDR overrides + + Solution reference: https://github.com/aws/aws-cdk/issues/24708#issuecomment-1665795316 + """ + + def __init__( + self, + scope: Construct, + construct_id: str, + *, + environment_name: str, + environment_context: dict, + **kwargs, + ): + super().__init__( + scope, construct_id, environment_context=environment_context, environment_name=environment_name, **kwargs + ) + + # Determine removal policy based on environment + removal_policy = RemovalPolicy.RETAIN if environment_name == 'prod' else RemovalPolicy.DESTROY + + self.vpc_encryption_key = Key( + self, + 'VpcEncryptionKey', + enable_key_rotation=True, + alias=f'{self.stack_name}-vpc-encryption-key', + removal_policy=removal_policy, + ) + + # Create VPC with private subnets across multiple availability zones + # Using explicit CIDR allocation to allow future expansion without conflicts + self.vpc = Vpc( + self, + 'CompactConnectVpc', + # No Internet or NAT Gateway needed - using VPC endpoints for AWS service access + create_internet_gateway=False, + nat_gateways=0, + ip_addresses=IpAddresses.cidr('10.0.0.0/16'), + # Use 3 AZs for high availability + # CDK will automatically select 3 AZs from the region + max_azs=3, + subnet_configuration=[ + SubnetConfiguration( + name='private', + subnet_type=SubnetType.PRIVATE_ISOLATED, + # cidr_mask is set to 20 to provide /20 subnets (4096 IPs each) + # However, we explicitly override the CIDR blocks below to lock them in + cidr_mask=20, + ), + ], + enable_dns_hostnames=True, + enable_dns_support=True, + ) + + # Explicitly set CIDR blocks for each subnet to prevent conflicts when expanding VPC + # This follows the solution from: https://github.com/aws/aws-cdk/issues/24708#issuecomment-1665795316 + # By locking in the CIDR blocks, we can safely add more AZs or public subnets in the future without + # CloudFormation errors. + private_cidrs = ['10.0.0.0/20', '10.0.16.0/20', '10.0.32.0/20'] + self._assign_subnet_cidr(PRIVATE_SUBNET_ONE_NAME, private_cidrs[0]) + self._assign_subnet_cidr(PRIVATE_SUBNET_TWO_NAME, private_cidrs[1]) + self._assign_subnet_cidr(PRIVATE_SUBNET_THREE_NAME, private_cidrs[2]) + + # grant access to Cloudwatch logs for vpc encryption key + logs_principal = ServicePrincipal('logs.amazonaws.com') + self.vpc_encryption_key.grant_encrypt_decrypt(logs_principal) + + # Create VPC Flow Logs for monitoring network traffic + flow_log_group = LogGroup( + self, + 'VpcFlowLogGroup', + retention=RetentionDays.ONE_MONTH, + removal_policy=removal_policy, + encryption_key=self.vpc_encryption_key, + ) + + self.vpc.add_flow_log( + 'VpcFlowLog', + destination=FlowLogDestination.to_cloud_watch_logs(flow_log_group), + traffic_type=FlowLogTrafficType.ALL, + ) + + # VPC Endpoint for CloudWatch Logs + # This allows Lambda functions in the VPC to send logs to CloudWatch without internet access + self.logs_vpc_endpoint = self.vpc.add_interface_endpoint( + 'LogsVpcEndpoint', + service=InterfaceVpcEndpointAwsService.CLOUDWATCH_LOGS, + ) + + # Suppress CdkNag warnings for the auto-generated VPC endpoint security group + # These warnings occur because CDK creates security group rules with intrinsic functions + # that CdkNag cannot fully evaluate at synthesis time + NagSuppressions.add_resource_suppressions_by_path( + self, + path=self.logs_vpc_endpoint.node.path, + suppressions=[ + { + 'id': 'AwsSolutions-EC23', + 'reason': 'VPC endpoint security groups are automatically managed by CDK. Inbound rules are ' + 'appropriately restricted to HTTPS (port 443) from VPC CIDR block.', + }, + { + 'id': 'HIPAA.Security-EC2RestrictedCommonPorts', + 'reason': 'VPC endpoint security groups are automatically managed by CDK. Only HTTPS (port 443) ' + 'is allowed for CloudWatch Logs communication.', + }, + { + 'id': 'HIPAA.Security-EC2RestrictedSSH', + 'reason': 'VPC endpoint security groups are automatically managed by CDK. SSH is not enabled on ' + 'this security group.', + }, + ], + apply_to_children=True, + ) + + # VPC Endpoint for DynamoDB + # This allows Lambda functions to access DynamoDB without internet access + self.dynamodb_vpc_endpoint = self.vpc.add_gateway_endpoint( + 'DynamoDbVpcEndpoint', + service=GatewayVpcEndpointAwsService.DYNAMODB, + ) + + # VPC Endpoint for S3 + # This is needed for our custom resource which manages OpenSearch indices to access + # the CloudFormation S3 bucket without internet access + self.s3_vpc_endpoint = self.vpc.add_gateway_endpoint( + 'S3VpcEndpoint', + service=GatewayVpcEndpointAwsService.S3, + ) + + # Security Group for Lambda Functions + # This will control inbound and outbound traffic for Lambda functions that interact with OpenSearch + self.lambda_security_group = SecurityGroup( + self, + 'LambdaSecurityGroup', + vpc=self.vpc, + description='Security group for Lambda functions within VPC', + allow_all_outbound=True, # Allow Lambda to make outbound connections + ) + + # Security Group for OpenSearch Domain + # This will control inbound and outbound traffic for the OpenSearch cluster + self.opensearch_security_group = SecurityGroup( + self, + 'OpenSearchSecurityGroup', + vpc=self.vpc, + description='Security group for OpenSearch Domain', + allow_all_outbound=True, # Allow OpenSearch to make outbound connections + ) + # Allow Lambda functions to communicate with OpenSearch on port 443 (HTTPS) + self.opensearch_security_group.add_ingress_rule( + peer=self.lambda_security_group, + connection=Port.tcp(443), + description='Allow HTTPS traffic from Lambda functions', + ) + + def _assign_subnet_cidr(self, subnet_name: str, cidr: str): + """ + Explicitly assign a CIDR block to a subnet by overriding the CloudFormation property. + + This prevents CIDR conflicts when adding more AZs to the VPC in the future. + Without this override, CloudFormation attempts to reassign CIDR blocks when subnets/AZs are added, + causing deployment failures with "CIDR conflict" errors. See https://github.com/aws/aws-cdk/issues/24708 + + param subnet_name: The logical name of the subnet (e.g., 'privateSubnet1') + param cidr: The CIDR block to assign (e.g., '10.0.0.0/20') + """ + + # Navigate the construct tree to find the subnet + subnet_construct = self.vpc.node.try_find_child(subnet_name) + if subnet_construct is None: + raise ValueError(f'Subnet {subnet_name} not found in VPC') + + # Get the underlying CloudFormation subnet resource + cfn_subnet = subnet_construct.node.try_find_child('Subnet') + if cfn_subnet is None: + raise ValueError(f'CloudFormation Subnet resource not found for {subnet_name}') + + # Override the CIDR block property + cfn_subnet.add_property_override('CidrBlock', cidr) diff --git a/backend/compact-connect/tests/app/base.py b/backend/compact-connect/tests/app/base.py index 439537908..12ff282ef 100644 --- a/backend/compact-connect/tests/app/base.py +++ b/backend/compact-connect/tests/app/base.py @@ -270,26 +270,31 @@ def _inspect_ssn_table(self, persistent_stack: PersistentStack, persistent_stack { 'Properties': { 'KeyPolicy': { - 'Statement': Match.array_with([ - { - 'Action': 'kms:*', - 'Effect': 'Allow', - 'Principal': {'AWS': f'arn:aws:iam::{persistent_stack.account}:root'}, - 'Resource': '*', - }, - { - 'Action': ['kms:Decrypt', 'kms:Encrypt', 'kms:GenerateDataKey*', 'kms:ReEncrypt*'], - 'Condition': { - 'StringNotEquals': { - 'aws:PrincipalArn': principal_arn_array, - 'aws:PrincipalServiceName': ['dynamodb.amazonaws.com', 'events.amazonaws.com'], - } + 'Statement': Match.array_with( + [ + { + 'Action': 'kms:*', + 'Effect': 'Allow', + 'Principal': {'AWS': f'arn:aws:iam::{persistent_stack.account}:root'}, + 'Resource': '*', + }, + { + 'Action': ['kms:Decrypt', 'kms:Encrypt', 'kms:GenerateDataKey*', 'kms:ReEncrypt*'], + 'Condition': { + 'StringNotEquals': { + 'aws:PrincipalArn': principal_arn_array, + 'aws:PrincipalServiceName': [ + 'dynamodb.amazonaws.com', + 'events.amazonaws.com', + ], + } + }, + 'Effect': 'Deny', + 'Principal': '*', + 'Resource': '*', }, - 'Effect': 'Deny', - 'Principal': '*', - 'Resource': '*', - }, - ]), + ] + ), 'Version': '2012-10-17', } } @@ -305,59 +310,53 @@ def _inspect_ssn_table(self, persistent_stack: PersistentStack, persistent_stack 'TableName': 'ssn-table-DataEventsLog', 'ResourcePolicy': { 'PolicyDocument': { - 'Statement': Match.array_with([ - { - 'Effect': 'Deny', - 'Principal': '*', - 'Resource': '*', - 'Action': 'dynamodb:CreateBackup', - 'Condition': { - 'StringNotEquals': { - 'aws:PrincipalServiceName': 'dynamodb.amazonaws.com' - } - } - }, - { - 'Effect': 'Deny', - 'Principal': '*', - 'Resource': '*', - 'Action': [ - 'dynamodb:BatchGetItem', - 'dynamodb:BatchWriteItem', - 'dynamodb:PartiQL*', - 'dynamodb:Scan', - ], - 'Condition': { - 'StringNotEquals': { - 'aws:PrincipalServiceName': 'dynamodb.amazonaws.com', - 'aws:PrincipalArn': Match.any_value(), - } + 'Statement': Match.array_with( + [ + { + 'Effect': 'Deny', + 'Principal': '*', + 'Resource': '*', + 'Action': 'dynamodb:CreateBackup', + 'Condition': { + 'StringNotEquals': {'aws:PrincipalServiceName': 'dynamodb.amazonaws.com'} + }, }, - }, - { - "Action": [ - "dynamodb:ConditionCheckItem", - "dynamodb:GetItem", - "dynamodb:Query" - ], - "Effect": "Deny", - "Principal": "*", - "NotResource": Match.string_like_regexp( - f"arn:aws:dynamodb:{persistent_stack.region}:{persistent_stack.account}:table/ssn-table-DataEventsLog/index/ssnIndex" - ) - }, - ]) + { + 'Effect': 'Deny', + 'Principal': '*', + 'Resource': '*', + 'Action': [ + 'dynamodb:BatchGetItem', + 'dynamodb:BatchWriteItem', + 'dynamodb:PartiQL*', + 'dynamodb:Scan', + ], + 'Condition': { + 'StringNotEquals': { + 'aws:PrincipalServiceName': 'dynamodb.amazonaws.com', + 'aws:PrincipalArn': Match.any_value(), + } + }, + }, + { + 'Action': ['dynamodb:ConditionCheckItem', 'dynamodb:GetItem', 'dynamodb:Query'], + 'Effect': 'Deny', + 'Principal': '*', + 'NotResource': Match.string_like_regexp( + f'arn:aws:dynamodb:{persistent_stack.region}:{persistent_stack.account}:table/ssn-table-DataEventsLog/index/ssnIndex' + ), + }, + ] + ) } }, 'SSESpecification': { - 'KMSMasterKeyId': { - 'Fn::GetAtt': [ssn_key_logical_id, 'Arn'] - }, + 'KMSMasterKeyId': {'Fn::GetAtt': [ssn_key_logical_id, 'Arn']}, 'SSEEnabled': True, 'SSEType': 'KMS', - } + }, } - } + }, ) def _inspect_backup_resources(self, persistent_stack: PersistentStack, persistent_stack_template: Template): diff --git a/backend/compact-connect/tests/app/test_search_persistent_stack.py b/backend/compact-connect/tests/app/test_search_persistent_stack.py new file mode 100644 index 000000000..1b57da7c2 --- /dev/null +++ b/backend/compact-connect/tests/app/test_search_persistent_stack.py @@ -0,0 +1,445 @@ +import json +from unittest import TestCase + +from aws_cdk.assertions import Match, Template + +from tests.app.base import TstAppABC + + +class TestSearchPersistentStack(TstAppABC, TestCase): + """ + Test cases for the SearchPersistentStack to ensure proper OpenSearch Domain configuration + for advanced provider search functionality. + """ + + @classmethod + def get_context(cls): + with open('cdk.json') as f: + context = json.load(f)['context'] + with open('cdk.context.sandbox-example.json') as f: + context.update(json.load(f)) + + # Suppresses lambda bundling for tests + context['aws:cdk:bundling-stacks'] = [] + return context + + def test_opensearch_domain_created(self): + """ + Test that the OpenSearch Domain is created with the correct basic configuration. + """ + search_stack = self.app.sandbox_backend_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify exactly one OpenSearch Domain is created + search_template.resource_count_is('AWS::OpenSearchService::Domain', 1) + + def test_opensearch_version(self): + """ + Test that OpenSearch uses the correct version. + """ + search_stack = self.app.sandbox_backend_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify OpenSearch version + search_template.has_resource_properties( + 'AWS::OpenSearchService::Domain', + { + 'EngineVersion': 'OpenSearch_3.3', + }, + ) + + def test_vpc_configuration(self): + """ + Test that the OpenSearch Domain is deployed within the VPC for network isolation. + """ + search_stack = self.app.sandbox_backend_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify VPC configuration is present + search_template.has_resource_properties( + 'AWS::OpenSearchService::Domain', + { + 'VPCOptions': { + 'SubnetIds': Match.any_value(), + 'SecurityGroupIds': Match.any_value(), + }, + }, + ) + + def test_node_to_node_encryption(self): + """ + Test that node-to-node encryption is enabled. + """ + search_stack = self.app.sandbox_backend_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify node-to-node encryption is enabled + search_template.has_resource_properties( + 'AWS::OpenSearchService::Domain', + { + 'NodeToNodeEncryptionOptions': { + 'Enabled': True, + }, + }, + ) + + def test_https_enforcement(self): + """ + Test that HTTPS is enforced for all traffic to the domain. + """ + search_stack = self.app.sandbox_backend_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify HTTPS is required + search_template.has_resource_properties( + 'AWS::OpenSearchService::Domain', + { + 'DomainEndpointOptions': { + 'EnforceHTTPS': True, + 'TLSSecurityPolicy': 'Policy-Min-TLS-1-2-2019-07', + }, + }, + ) + + def test_ebs_encryption(self): + """ + Test that EBS volumes are encrypted. + """ + search_stack = self.app.sandbox_backend_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + encryption_key_logical_id = search_stack.get_logical_id( + search_stack.opensearch_encryption_key.node.default_child + ) + + # Verify EBS volumes are encrypted + search_template.has_resource_properties( + 'AWS::OpenSearchService::Domain', + { + 'EBSOptions': { + 'EBSEnabled': True, + 'VolumeSize': 10, + }, + 'EncryptionAtRestOptions': { + 'Enabled': True, + 'KmsKeyId': { + 'Ref': encryption_key_logical_id, + }, + }, + }, + ) + + def test_sandbox_instance_type(self): + """ + Test that sandbox environment uses t3.small.search instance type for cost optimization. + """ + search_stack = self.app.sandbox_backend_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify sandbox uses t3.small.search with single node + search_template.has_resource_properties( + 'AWS::OpenSearchService::Domain', + { + 'ClusterConfig': { + 'InstanceType': 't3.small.search', + 'InstanceCount': 1, + 'DedicatedMasterEnabled': False, + 'MultiAZWithStandbyEnabled': False, + }, + }, + ) + + def test_logging_configuration(self): + """ + Test that appropriate logging is enabled for monitoring and troubleshooting. + """ + search_stack = self.app.sandbox_backend_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify logging configuration + search_template.has_resource_properties( + 'AWS::OpenSearchService::Domain', + { + 'LogPublishingOptions': { + 'ES_APPLICATION_LOGS': Match.object_like({'Enabled': True}), + }, + }, + ) + + def test_capacity_alarms_configured(self): + """ + Test that capacity monitoring alarms are configured for proactive scaling. + + Verifies six critical alarms: + 1. Free Storage Space < 50% threshold + 2. JVM Memory Pressure > 85% threshold + 3. CPU Utilization > 70% threshold + 4. Cluster Status RED for critical issues + 5. Cluster Status YELLOW for degraded state + 6. Automated Snapshot Failure for backup issues + + These alarms give DevOps team time to plan scaling activities before hitting limits. + """ + search_stack = self.app.sandbox_backend_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify Free Storage Space Alarm + # Note: FreeStorageSpace is reported in megabytes (MB), not bytes + search_template.has_resource_properties( + 'AWS::CloudWatch::Alarm', + { + 'MetricName': 'FreeStorageSpace', + 'Namespace': 'AWS/ES', + 'Threshold': 5120, # 5GB in MB (50% of 10GB = 5GB = 5120MB for sandbox) + 'ComparisonOperator': 'LessThanThreshold', + 'EvaluationPeriods': 1, + }, + ) + + # Verify JVM Memory Pressure Alarm + search_template.has_resource_properties( + 'AWS::CloudWatch::Alarm', + { + 'MetricName': 'JVMMemoryPressure', + 'Namespace': 'AWS/ES', + 'Threshold': 85, + 'ComparisonOperator': 'GreaterThanThreshold', + 'EvaluationPeriods': 3, + }, + ) + + # Verify CPU Utilization Alarm + search_template.has_resource_properties( + 'AWS::CloudWatch::Alarm', + { + 'MetricName': 'CPUUtilization', + 'Namespace': 'AWS/ES', + 'Threshold': 70, + 'ComparisonOperator': 'GreaterThanThreshold', + 'EvaluationPeriods': 3, # 15 minutes sustained + }, + ) + + # Verify Cluster Status RED Alarm + search_template.has_resource_properties( + 'AWS::CloudWatch::Alarm', + { + 'MetricName': 'ClusterStatus.red', + 'Namespace': 'AWS/ES', + 'Threshold': 1, + 'ComparisonOperator': 'GreaterThanOrEqualToThreshold', + 'EvaluationPeriods': 1, + }, + ) + + # Verify Cluster Status YELLOW Alarm + search_template.has_resource_properties( + 'AWS::CloudWatch::Alarm', + { + 'MetricName': 'ClusterStatus.yellow', + 'Namespace': 'AWS/ES', + 'Threshold': 1, + 'ComparisonOperator': 'GreaterThanOrEqualToThreshold', + 'EvaluationPeriods': 1, + }, + ) + + # Verify Automated Snapshot Failure Alarm + search_template.has_resource_properties( + 'AWS::CloudWatch::Alarm', + { + 'MetricName': 'AutomatedSnapshotFailure', + 'Namespace': 'AWS/ES', + 'Threshold': 1, + 'ComparisonOperator': 'GreaterThanOrEqualToThreshold', + 'EvaluationPeriods': 1, + }, + ) + + def test_sandbox_uses_expected_private_subnet(self): + """ + Test that the OpenSearch Domain in sandbox uses expected private Subnet. + + For non-prod single-node deployments, OpenSearch must use exactly one subnet. + We explicitly select privateSubnet1 (CIDR 10.0.0.0/20) to ensure deterministic + placement across deployments, since the related lambda functions will also be + deployed within that same subnet, and we want to ensure that can communicate with + one another. + + This test verifies that OpenSearch references the specific subnet we expect, + not just any arbitrary subnet from the VPC. + """ + search_stack = self.app.sandbox_backend_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Get the OpenSearch Domain's subnet configuration + opensearch_resources = search_template.find_resources('AWS::OpenSearchService::Domain') + opensearch_properties = list(opensearch_resources.values())[0]['Properties'] + vpc_options = opensearch_properties['VPCOptions'] + subnet_ids = vpc_options['SubnetIds'] + + # For sandbox (non-prod), should use exactly one subnet + self.assertEqual(len(subnet_ids), 1, 'Sandbox OpenSearch should use exactly one subnet') + + # Get the subnet reference from OpenSearch + opensearch_subnet_ref = subnet_ids[0] + # Extract the export name that OpenSearch is importing + import_value = opensearch_subnet_ref['Fn::ImportValue'] + + # Verify OpenSearch is importing the correct subnet (privateSubnet1) + # The import_value should reference the export name of privateSubnet1 + # The export name contains the construct name, which includes 'privateSubnet1' + self.assertIn( + 'privateSubnet1', + str(import_value), + f'OpenSearch should import privateSubnet1, but is importing: {import_value}. ' + 'This is critical for deterministic subnet placement in non-prod environments.', + ) + + +class TestProdSearchPersistentStack(TstAppABC, TestCase): + """ + Test cases for the prod SearchPersistentStack to ensure proper production OpenSearch Domain configuration + for advanced provider search functionality. + """ + + @classmethod + def get_context(cls): + with open('cdk.json') as f: + context = json.load(f)['context'] + with open('cdk.context.prod-example.json') as f: + context.update(json.load(f)) + + # Suppresses lambda bundling for tests + context['aws:cdk:bundling-stacks'] = [] + return context + + def test_prod_instance_type(self): + """ + Test that production environment uses m7g.medium.search instance type for data nodes + and r8g.medium.search for master nodes with high availability configuration. + """ + search_stack = self.app.prod_backend_pipeline_stack.prod_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify production uses m7g.medium.search with 3 data nodes + search_template.has_resource_properties( + 'AWS::OpenSearchService::Domain', + { + 'ClusterConfig': { + 'InstanceType': 'm7g.medium.search', + 'InstanceCount': 3, + 'DedicatedMasterEnabled': True, + 'DedicatedMasterType': 'r8g.medium.search', + 'DedicatedMasterCount': 3, + 'MultiAZWithStandbyEnabled': True, + }, + }, + ) + + def test_prod_ebs_volume_size(self): + """ + Test that production environment uses 25GB EBS volume size. + """ + search_stack = self.app.prod_backend_pipeline_stack.prod_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify production uses 25GB EBS volume + search_template.has_resource_properties( + 'AWS::OpenSearchService::Domain', + { + 'EBSOptions': { + 'EBSEnabled': True, + 'VolumeSize': 25, + }, + }, + ) + + def test_prod_zone_awareness(self): + """ + Test that production environment has zone awareness enabled with 3 availability zones. + """ + search_stack = self.app.prod_backend_pipeline_stack.prod_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify zone awareness is enabled with 3 AZs + search_template.has_resource_properties( + 'AWS::OpenSearchService::Domain', + { + 'ClusterConfig': { + 'ZoneAwarenessEnabled': True, + }, + }, + ) + + def test_prod_uses_all_private_subnets(self): + """ + Test that production OpenSearch Domain uses all private isolated subnets (3 AZs) + for high availability and zone awareness. + + Production requires 3 subnets across 3 availability zones to support + multi-AZ with standby configuration. + """ + search_stack = self.app.prod_backend_pipeline_stack.prod_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Get the OpenSearch Domain's subnet configuration + opensearch_resources = search_template.find_resources('AWS::OpenSearchService::Domain') + opensearch_properties = list(opensearch_resources.values())[0]['Properties'] + vpc_options = opensearch_properties['VPCOptions'] + subnet_ids = vpc_options['SubnetIds'] + + # For production, should use 3 subnets (one per AZ) + self.assertEqual( + len(subnet_ids), + 3, + 'Production OpenSearch should use exactly 3 subnets (one per availability zone)', + ) + + def test_prod_index_shard_configuration(self): + """ + Test that production index manager custom resource uses production shard configuration: + - 1 primary shard + - 2 replica shards (for 3 data nodes across 3 AZs) + + This ensures data availability if one node fails, with total shards (1 + 2 = 3) + being a multiple of 3 to distribute evenly across the 3 data nodes. + """ + search_stack = self.app.prod_backend_pipeline_stack.prod_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify index manager custom resource has production shard/replica configuration + search_template.has_resource_properties( + 'Custom::IndexManager', + { + 'numberOfShards': 1, + 'numberOfReplicas': 2, + }, + ) + + # Note that the prod alarm tests specifically check for the + # differences we configure for our production environment as opposed + # to the non-prod environments. If all the sandbox alarms are properly + # configured, they are configured for prod as well, so we don't retest that here. + def test_prod_storage_threshold_alarm(self): + """ + Test that production storage alarm threshold is set to 50% of 25GB volume (12800 MB). + + Production uses 25GB EBS volumes, so 50% threshold = 12.5GB = 12800 MB. + This gives ample time to plan capacity increases before hitting critical levels. + """ + search_stack = self.app.prod_backend_pipeline_stack.prod_stage.search_persistent_stack + search_template = Template.from_stack(search_stack) + + # Verify Free Storage Space Alarm threshold for production (50% of 25GB = 12800 MB) + # Note: FreeStorageSpace metric is reported in megabytes (MB) + search_template.has_resource_properties( + 'AWS::CloudWatch::Alarm', + { + 'MetricName': 'FreeStorageSpace', + 'Namespace': 'AWS/ES', + 'Threshold': 12800, # 50% of 25GB = 12.5GB = 12800 MB + 'ComparisonOperator': 'LessThanThreshold', + 'EvaluationPeriods': 1, + }, + ) diff --git a/backend/compact-connect/tests/app/test_vpc.py b/backend/compact-connect/tests/app/test_vpc.py new file mode 100644 index 000000000..4c59c0e90 --- /dev/null +++ b/backend/compact-connect/tests/app/test_vpc.py @@ -0,0 +1,222 @@ +import json +from unittest import TestCase + +from aws_cdk.assertions import Match, Template + +from tests.app.base import TstAppABC + + +class TestVpcStack(TstAppABC, TestCase): + """ + Test cases for the VpcStack to ensure proper VPC configuration + for OpenSearch Domain and Lambda functions. + """ + + @classmethod + def get_context(cls): + with open('cdk.json') as f: + context = json.load(f)['context'] + with open('cdk.context.sandbox-example.json') as f: + context.update(json.load(f)) + + # Suppresses lambda bundling for tests + context['aws:cdk:bundling-stacks'] = [] + return context + + def test_vpc_configuration(self): + """ + Test that the VPC is created with the correct configuration for OpenSearch and Lambda functions. + """ + vpc_stack = self.app.sandbox_backend_stage.vpc_stack + vpc_template = Template.from_stack(vpc_stack) + + # Verify exactly one VPC is created + vpc_template.resource_count_is('AWS::EC2::VPC', 1) + + # Verify VPC has the correct configuration + vpc_template.has_resource_properties( + 'AWS::EC2::VPC', + { + 'CidrBlock': '10.0.0.0/16', + 'EnableDnsHostnames': True, + 'EnableDnsSupport': True, + }, + ) + + def test_no_internet_gateway(self): + """ + Test that no Internet Gateway is created, as we're using VPC endpoints for AWS service access. + """ + vpc_stack = self.app.sandbox_backend_stage.vpc_stack + vpc_template = Template.from_stack(vpc_stack) + + # Verify no Internet Gateway is created + vpc_template.resource_count_is('AWS::EC2::InternetGateway', 0) + + def test_no_nat_gateway(self): + """ + Test that no NAT Gateway is created, as we're using VPC endpoints for AWS service access. + """ + vpc_stack = self.app.sandbox_backend_stage.vpc_stack + vpc_template = Template.from_stack(vpc_stack) + + # Verify no NAT Gateway is created + vpc_template.resource_count_is('AWS::EC2::NatGateway', 0) + + def test_vpc_flow_logs(self): + """ + Test that VPC Flow Logs are configured to monitor network traffic. + """ + vpc_stack = self.app.sandbox_backend_stage.vpc_stack + vpc_template = Template.from_stack(vpc_stack) + + # Verify Flow Log is created + vpc_template.resource_count_is('AWS::EC2::FlowLog', 1) + + # Verify Flow Log is configured correctly + vpc_template.has_resource_properties( + 'AWS::EC2::FlowLog', + { + 'ResourceType': 'VPC', + 'TrafficType': 'ALL', + }, + ) + + # Verify CloudWatch Log Group for Flow Logs exists + vpc_template.resource_count_is('AWS::Logs::LogGroup', 1) + + def test_cloudwatch_logs_vpc_endpoint(self): + """ + Test that CloudWatch Logs VPC endpoint is created to allow Lambda functions to send logs. + """ + vpc_stack = self.app.sandbox_backend_stage.vpc_stack + vpc_template = Template.from_stack(vpc_stack) + + # Verify VPC endpoint for CloudWatch Logs is created + vpc_template.has_resource_properties( + 'AWS::EC2::VPCEndpoint', + { + 'ServiceName': Match.string_like_regexp('.*logs.*'), + 'VpcEndpointType': 'Interface', + }, + ) + + def test_dynamodb_vpc_endpoint(self): + """ + Test that DynamoDB VPC endpoint is created for Lambda functions to access DynamoDB. + """ + vpc_stack = self.app.sandbox_backend_stage.vpc_stack + vpc_template = Template.from_stack(vpc_stack) + + # Verify VPC gateway endpoint for DynamoDB is created + vpc_template.has_resource_properties( + 'AWS::EC2::VPCEndpoint', + { + 'VpcEndpointType': 'Gateway', + }, + ) + + def test_security_groups_created(self): + """ + Test that security groups are created for OpenSearch and Lambda functions. + """ + vpc_stack = self.app.sandbox_backend_stage.vpc_stack + vpc_template = Template.from_stack(vpc_stack) + + # Verify security groups are created (2 for our services + default VPC security group) + security_groups = vpc_template.find_resources('AWS::EC2::SecurityGroup') + + # Verify OpenSearch security group exists with correct description + opensearch_sg_logical_id = vpc_stack.get_logical_id(vpc_stack.opensearch_security_group.node.default_child) + opensearch_sg = TestVpcStack.get_resource_properties_by_logical_id(opensearch_sg_logical_id, security_groups) + self.assertEqual( + { + 'GroupDescription': 'Security group for OpenSearch Domain', + 'SecurityGroupEgress': [ + {'CidrIp': '0.0.0.0/0', 'Description': 'Allow all outbound traffic by default', 'IpProtocol': '-1'} + ], + 'VpcId': {'Ref': 'CompactConnectVpcF5956695'}, + }, + opensearch_sg, + ) + + # Verify Lambda security group exists with correct description + lambda_sg_logical_id = vpc_stack.get_logical_id(vpc_stack.lambda_security_group.node.default_child) + lambda_sg = TestVpcStack.get_resource_properties_by_logical_id(lambda_sg_logical_id, security_groups) + self.assertEqual( + { + 'GroupDescription': 'Security group for Lambda functions within VPC', + 'SecurityGroupEgress': [ + {'CidrIp': '0.0.0.0/0', 'Description': 'Allow all outbound traffic by default', 'IpProtocol': '-1'} + ], + 'VpcId': {'Ref': 'CompactConnectVpcF5956695'}, + }, + lambda_sg, + ) + + def test_opensearch_ingress_rule(self): + """ + Test that the OpenSearch security group allows ingress from Lambda security group on port 443. + """ + vpc_stack = self.app.sandbox_backend_stage.vpc_stack + vpc_template = Template.from_stack(vpc_stack) + + # Get the logical IDs for both security groups + lambda_sg_logical_id = vpc_stack.get_logical_id(vpc_stack.lambda_security_group.node.default_child) + + # Verify ingress rule exists allowing Lambda to access OpenSearch on port 443 + vpc_template.has_resource_properties( + 'AWS::EC2::SecurityGroupIngress', + { + 'IpProtocol': 'tcp', + 'FromPort': 443, + 'ToPort': 443, + 'SourceSecurityGroupId': {'Fn::GetAtt': [lambda_sg_logical_id, 'GroupId']}, + }, + ) + + def test_explicit_subnet_cidr_blocks(self): + """ + Test that subnet CIDR blocks are explicitly set to allow future VPC expansion. + + This verifies that each subnet has its CIDR block locked in via CloudFormation + property overrides. This prevents CIDR conflicts when adding more AZs in the future. + + CIDR allocation from 10.0.0.0/16 VPC: + - Subnet 1 (AZ 1): 10.0.0.0/20 (10.0.0.0 - 10.0.15.255, 4096 IPs) + - Subnet 2 (AZ 2): 10.0.16.0/20 (10.0.16.0 - 10.0.31.255, 4096 IPs) + - Subnet 3 (AZ 3): 10.0.32.0/20 (10.0.32.0 - 10.0.47.255, 4096 IPs) + - Reserved for future: 10.0.48.0/20 and beyond + + Reference: https://github.com/aws/aws-cdk/issues/24708#issuecomment-1665795316 + """ + vpc_stack = self.app.sandbox_backend_stage.vpc_stack + vpc_template = Template.from_stack(vpc_stack) + + # Get all subnet resources + subnet_resources = vpc_template.find_resources('AWS::EC2::Subnet') + + # Filter to only private subnets (those without MapPublicIpOnLaunch) + private_subnets = [] + for logical_id, subnet in subnet_resources.items(): + properties = subnet.get('Properties', {}) + # Private subnets don't have MapPublicIpOnLaunch or it's set to false + if not properties.get('MapPublicIpOnLaunch', False): + private_subnets.append((logical_id, properties)) + + # Verify we have exactly 3 private subnets + self.assertEqual(3, len(private_subnets), f'Expected exactly 3 private subnets, found {len(private_subnets)}') + + # Expected CIDR blocks for the 3 private subnets + expected_cidr_blocks = ['10.0.0.0/20', '10.0.16.0/20', '10.0.32.0/20'] + + # Extract and sort the CIDR blocks from the subnets + actual_cidr_blocks = sorted([subnet[1]['CidrBlock'] for subnet in private_subnets]) + + # Verify the CIDR blocks match our expected explicit allocation + self.assertEqual( + expected_cidr_blocks, + actual_cidr_blocks, + 'Subnet CIDR blocks do not match expected explicit allocation. ' + 'This is critical for preventing conflicts when expanding the VPC.', + ) diff --git a/backend/multi-account/README.md b/backend/multi-account/README.md index f1f308503..1be0424b6 100644 --- a/backend/multi-account/README.md +++ b/backend/multi-account/README.md @@ -216,6 +216,8 @@ For enhanced security, use the secure bootstrap templates that trust only specif --cloudformation-execution-policies 'arn:aws:iam::aws:policy/AdministratorAccess' ``` +**Note on OpenSearch Service-Linked Role**: The bootstrap templates include creation of a service-linked role for Amazon OpenSearch Service VPC access. This role can only exist once per AWS account. If the role already exists in the account (e.g., from previous OpenSearch usage), the bootstrap deployment will fail. In that case, simply remove the `OpenSearchServiceLinkedRole` resource from the template before running the bootstrap command. + ### Bootstrap the secondary accounts See ./backups/README for instructions on setting up the secondary accounts and backup resources. diff --git a/backend/multi-account/backups/requirements-dev.txt b/backend/multi-account/backups/requirements-dev.txt index 88d4637ce..5680962c6 100644 --- a/backend/multi-account/backups/requirements-dev.txt +++ b/backend/multi-account/backups/requirements-dev.txt @@ -1,27 +1,27 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.14 # by the following command: # -# pip-compile --no-emit-index-url backups/requirements-dev.in +# pip-compile --no-emit-index-url --no-strip-extras backups/requirements-dev.in # -boto3==1.40.33 +boto3==1.42.11 # via moto -botocore==1.40.33 +botocore==1.42.11 # via # boto3 # moto # s3transfer -certifi==2025.8.3 +certifi==2025.11.12 # via requests cffi==2.0.0 # via cryptography -charset-normalizer==3.4.3 +charset-normalizer==3.4.4 # via requests -cryptography==46.0.1 +cryptography==46.0.3 # via moto -idna==3.10 +idna==3.11 # via requests -iniconfig==2.1.0 +iniconfig==2.3.0 # via pytest jinja2==3.1.6 # via moto @@ -29,11 +29,11 @@ jmespath==1.0.1 # via # boto3 # botocore -markupsafe==3.0.2 +markupsafe==3.0.3 # via # jinja2 # werkzeug -moto==5.1.12 +moto==5.1.18 # via -r backups/requirements-dev.in packaging==25.0 # via pytest @@ -43,13 +43,13 @@ pycparser==2.23 # via cffi pygments==2.19.2 # via pytest -pytest==8.4.2 +pytest==9.0.2 # via -r backups/requirements-dev.in python-dateutil==2.9.0.post0 # via # botocore # moto -pyyaml==6.0.2 +pyyaml==6.0.3 # via responses requests==2.32.5 # via @@ -57,16 +57,16 @@ requests==2.32.5 # responses responses==0.25.8 # via moto -s3transfer==0.14.0 +s3transfer==0.16.0 # via boto3 six==1.17.0 # via python-dateutil -urllib3==2.5.0 +urllib3==2.6.2 # via # botocore # requests # responses -werkzeug==3.1.3 +werkzeug==3.1.4 # via moto xmltodict==1.0.2 # via moto diff --git a/backend/multi-account/backups/requirements.txt b/backend/multi-account/backups/requirements.txt index 536510e88..b681e8aac 100644 --- a/backend/multi-account/backups/requirements.txt +++ b/backend/multi-account/backups/requirements.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.14 # by the following command: # -# pip-compile --no-emit-index-url backups/requirements.in +# pip-compile --no-emit-index-url --no-strip-extras backups/requirements.in # -attrs==25.3.0 +attrs==25.4.0 # via # cattrs # jsii @@ -12,19 +12,19 @@ aws-cdk-asset-awscli-v1==2.2.242 # via aws-cdk-lib aws-cdk-asset-node-proxy-agent-v6==2.1.0 # via aws-cdk-lib -aws-cdk-cloud-assembly-schema==48.10.0 +aws-cdk-cloud-assembly-schema==48.20.0 # via aws-cdk-lib -aws-cdk-lib==2.215.0 +aws-cdk-lib==2.232.2 # via -r backups/requirements.in -cattrs==25.2.0 +cattrs==25.3.0 # via jsii -constructs==10.4.2 +constructs==10.4.4 # via # -r backups/requirements.in # aws-cdk-lib importlib-resources==6.5.2 # via jsii -jsii==1.114.1 +jsii==1.121.0 # via # aws-cdk-asset-awscli-v1 # aws-cdk-asset-node-proxy-agent-v6 diff --git a/backend/multi-account/control-tower/requirements-dev.txt b/backend/multi-account/control-tower/requirements-dev.txt index 21864a695..62d01ed4f 100644 --- a/backend/multi-account/control-tower/requirements-dev.txt +++ b/backend/multi-account/control-tower/requirements-dev.txt @@ -1,36 +1,36 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.14 # by the following command: # -# pip-compile --no-emit-index-url control-tower/requirements-dev.in +# pip-compile --no-emit-index-url --no-strip-extras control-tower/requirements-dev.in # boolean-py==5.0 # via license-expression build==1.3.0 # via pip-tools -cachecontrol[filecache]==0.14.3 +cachecontrol[filecache]==0.14.4 # via # cachecontrol # pip-audit -certifi==2025.8.3 +certifi==2025.11.12 # via requests -charset-normalizer==3.4.3 +charset-normalizer==3.4.4 # via requests -click==8.2.1 +click==8.3.1 # via pip-tools -coverage[toml]==7.10.6 +coverage[toml]==7.13.0 # via # -r control-tower/requirements-dev.in # pytest-cov -cyclonedx-python-lib==9.1.0 +cyclonedx-python-lib==11.6.0 # via pip-audit defusedxml==0.7.1 # via py-serializable -filelock==3.19.1 +filelock==3.20.1 # via cachecontrol -idna==3.10 +idna==3.11 # via requests -iniconfig==2.1.0 +iniconfig==2.3.0 # via pytest license-expression==30.4.4 # via cyclonedx-python-lib @@ -38,9 +38,9 @@ markdown-it-py==4.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -msgpack==1.1.1 +msgpack==1.1.2 # via cachecontrol -packageurl-python==0.17.5 +packageurl-python==0.17.6 # via cyclonedx-python-lib packaging==25.0 # via @@ -50,13 +50,13 @@ packaging==25.0 # pytest pip-api==0.0.34 # via pip-audit -pip-audit==2.9.0 +pip-audit==2.10.0 # via -r control-tower/requirements-dev.in pip-requirements-parser==32.0.1 # via pip-audit -pip-tools==7.5.0 +pip-tools==7.5.2 # via -r control-tower/requirements-dev.in -platformdirs==4.4.0 +platformdirs==4.5.1 # via pip-audit pluggy==1.6.0 # via @@ -68,13 +68,13 @@ pygments==2.19.2 # via # pytest # rich -pyparsing==3.2.4 +pyparsing==3.2.5 # via pip-requirements-parser pyproject-hooks==1.2.0 # via # build # pip-tools -pytest==8.4.2 +pytest==9.0.2 # via # -r control-tower/requirements-dev.in # pytest-cov @@ -84,15 +84,17 @@ requests==2.32.5 # via # cachecontrol # pip-audit -rich==14.1.0 +rich==14.2.0 # via pip-audit -ruff==0.13.0 +ruff==0.14.9 # via -r control-tower/requirements-dev.in sortedcontainers==2.4.0 # via cyclonedx-python-lib -toml==0.10.2 +tomli==2.3.0 # via pip-audit -urllib3==2.5.0 +tomli-w==1.2.0 + # via pip-audit +urllib3==2.6.2 # via requests wheel==0.45.1 # via pip-tools diff --git a/backend/multi-account/control-tower/requirements.txt b/backend/multi-account/control-tower/requirements.txt index 2f24b1a7a..b85f9da0d 100644 --- a/backend/multi-account/control-tower/requirements.txt +++ b/backend/multi-account/control-tower/requirements.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.14 # by the following command: # -# pip-compile --no-emit-index-url control-tower/requirements.in +# pip-compile --no-emit-index-url --no-strip-extras control-tower/requirements.in # -attrs==25.3.0 +attrs==25.4.0 # via # cattrs # jsii @@ -12,24 +12,24 @@ aws-cdk-asset-awscli-v1==2.2.242 # via aws-cdk-lib aws-cdk-asset-node-proxy-agent-v6==2.1.0 # via aws-cdk-lib -aws-cdk-cloud-assembly-schema==48.10.0 +aws-cdk-cloud-assembly-schema==48.20.0 # via aws-cdk-lib -aws-cdk-lib==2.215.0 +aws-cdk-lib==2.232.2 # via # -r control-tower/requirements.in # cdk-nag -cattrs==25.2.0 +cattrs==25.3.0 # via jsii -cdk-nag==2.37.29 +cdk-nag==2.37.55 # via -r control-tower/requirements.in -constructs==10.4.2 +constructs==10.4.4 # via # -r control-tower/requirements.in # aws-cdk-lib # cdk-nag importlib-resources==6.5.2 # via jsii -jsii==1.114.1 +jsii==1.121.0 # via # aws-cdk-asset-awscli-v1 # aws-cdk-asset-node-proxy-agent-v6 diff --git a/backend/multi-account/log-aggregation/requirements-dev.txt b/backend/multi-account/log-aggregation/requirements-dev.txt index e002df771..255223201 100644 --- a/backend/multi-account/log-aggregation/requirements-dev.txt +++ b/backend/multi-account/log-aggregation/requirements-dev.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.14 # by the following command: # -# pip-compile --no-emit-index-url log-aggregation/requirements-dev.in +# pip-compile --no-emit-index-url --no-strip-extras log-aggregation/requirements-dev.in # -iniconfig==2.1.0 +iniconfig==2.3.0 # via pytest packaging==25.0 # via pytest @@ -12,5 +12,5 @@ pluggy==1.6.0 # via pytest pygments==2.19.2 # via pytest -pytest==8.4.2 +pytest==9.0.2 # via -r log-aggregation/requirements-dev.in diff --git a/backend/multi-account/log-aggregation/requirements.txt b/backend/multi-account/log-aggregation/requirements.txt index 338d35ef1..c722156d4 100644 --- a/backend/multi-account/log-aggregation/requirements.txt +++ b/backend/multi-account/log-aggregation/requirements.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.14 # by the following command: # -# pip-compile --no-emit-index-url log-aggregation/requirements.in +# pip-compile --no-emit-index-url --no-strip-extras log-aggregation/requirements.in # -attrs==25.3.0 +attrs==25.4.0 # via # cattrs # jsii @@ -12,19 +12,19 @@ aws-cdk-asset-awscli-v1==2.2.242 # via aws-cdk-lib aws-cdk-asset-node-proxy-agent-v6==2.1.0 # via aws-cdk-lib -aws-cdk-cloud-assembly-schema==48.10.0 +aws-cdk-cloud-assembly-schema==48.20.0 # via aws-cdk-lib -aws-cdk-lib==2.215.0 +aws-cdk-lib==2.232.2 # via -r log-aggregation/requirements.in -cattrs==25.2.0 +cattrs==25.3.0 # via jsii -constructs==10.4.2 +constructs==10.4.4 # via # -r log-aggregation/requirements.in # aws-cdk-lib importlib-resources==6.5.2 # via jsii -jsii==1.114.1 +jsii==1.121.0 # via # aws-cdk-asset-awscli-v1 # aws-cdk-asset-node-proxy-agent-v6