speedandfunction · SashkoMarchuk · Feb 26, 2026 · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026
@@ -0,0 +1,74 @@
+name: "Terraform — MN Vectorization"
+
+on:
+  pull_request:
+    paths:
+      - "mn-vectorization/infra/**"
+  push:
+    branches:
+      - main
+    paths:
+      - "mn-vectorization/infra/**"
+
+permissions:
+  contents: read
+
+env:
+  TF_WORKING_DIR: mn-vectorization/infra
+  TF_ENV: dev
+  AWS_REGION: us-east-1
+
+jobs:
+  terraform:
+    name: Terraform
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ${{ env.TF_WORKING_DIR }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Terraform
+        uses: hashicorp/setup-terraform@v3
+        with:
+          terraform_version: "~1.5"
+
+      # NOTE: Using static keys for POC. OIDC migration deferred until
+      # Nomad provisions IAM OIDC identity provider + role in AWS.
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ secrets.MN_VECTORIZATION_AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.MN_VECTORIZATION_AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ env.AWS_REGION }}
+
+      - name: Terraform Format Check
+        run: terraform fmt -check -recursive
+
+      - name: Terraform Init
+        run: |
+          terraform init \
+            -backend-config="bucket=sf-terraform-state" \
+            -backend-config="key=mn-vectorization/${{ env.TF_ENV }}/terraform.tfstate" \
+            -backend-config="region=${{ env.AWS_REGION }}"
+
+      - name: Terraform Validate
+        run: terraform validate
+
+      - name: Terraform Plan
+        if: github.event_name == 'pull_request'
+        run: |
+          set +e
+          terraform plan -var-file=environments/${{ env.TF_ENV }}.tfvars -no-color -input=false -detailed-exitcode
+          ec=$?
+          if [ "$ec" -eq 1 ]; then
+            echo "::error::Terraform plan failed"
+            exit 1
+          fi
+          exit 0
+
-      - name: Terraform Plan
-        if: github.event_name == 'pull_request'
-        run: terraform plan -var-file=environments/dev.tfvars -no-color -input=false
-        continue-on-error: true
+      - name: Terraform Plan
+        if: github.event_name == 'pull_request'
+        run: |
+          set +e
+          terraform plan -var-file=environments/dev.tfvars -no-color -input=false -detailed-exitcode
+          ec=$?
+          if [ "$ec" -eq 1 ]; then
+            exit 1
+          fi
+          exit 0
-      - name: Terraform Plan
-        if: github.event_name == 'pull_request'
-        run: terraform plan -var-file=environments/dev.tfvars -no-color -input=false
-        continue-on-error: true
+      - name: Terraform Plan
+        if: github.event_name == 'pull_request'
+        run: |
+          set +e
+          terraform plan -var-file=environments/dev.tfvars -no-color -input=false -detailed-exitcode
+          ec=$?
+          if [ "$ec" -eq 1 ]; then
+            exit 1
+          fi
+          exit 0
+      - name: Terraform Apply
+        if: github.ref == 'refs/heads/main' && github.event_name == 'push'
+        run: terraform apply -var-file=environments/${{ env.TF_ENV }}.tfvars -auto-approve -input=false
@@ -0,0 +1,4 @@
+.terraform/
+*.tfstate
+*.tfstate.backup
+*.tfplan
@@ -0,0 +1,3 @@
+terraform {
+  backend "s3" {}
+}
@@ -0,0 +1,75 @@
+# -----------------------------------------------------
+# CloudWatch log groups
+# -----------------------------------------------------
+
+resource "aws_cloudwatch_log_group" "main" {
+  for_each          = local.log_groups
+  name              = each.value
+  retention_in_days = var.log_retention_days
+  tags              = { Name = each.value }
+}
+
+# -----------------------------------------------------
+# CloudWatch alarms
+# -----------------------------------------------------
+
+# Alarm 1: Indexing failures (custom metric from Temporal worker)
+resource "aws_cloudwatch_metric_alarm" "indexing_failures" {
+  count               = var.is_alarm_enabled ? 1 : 0
+  alarm_name          = "${local.name_prefix}_indexing_failures_alarm"
+  comparison_operator = "GreaterThanThreshold"
+  evaluation_periods  = 1
+  metric_name         = "IndexingFailures"
+  namespace           = "MNVectorization/${var.environment}"
+  period              = 300
+  statistic           = "Sum"
+  threshold           = 0
+  alarm_description   = "Indexing pipeline failure detected"
+  treat_missing_data  = "notBreaching"
+
+  alarm_actions = var.alarm_sns_topic_arn != "" ? [var.alarm_sns_topic_arn] : []
+
+  tags = { Name = "${local.name_prefix}_indexing_failures_alarm" }
+}
+
+# Alarm 2: Query latency p99 (custom metric from MCP server)
+resource "aws_cloudwatch_metric_alarm" "query_latency_p99" {
+  count               = var.is_alarm_enabled ? 1 : 0
+  alarm_name          = "${local.name_prefix}_query_latency_p99_alarm"
+  comparison_operator = "GreaterThanThreshold"
+  evaluation_periods  = 2
+  metric_name         = "QueryLatencyP99"
+  namespace           = "MNVectorization/${var.environment}"
+  period              = 300
+  statistic           = "Maximum"
+  threshold           = 30000
+  alarm_description   = "Query p99 latency exceeds 30s"
+  treat_missing_data  = "notBreaching"
+
+  alarm_actions = var.alarm_sns_topic_arn != "" ? [var.alarm_sns_topic_arn] : []
+
+  tags = { Name = "${local.name_prefix}_query_latency_p99_alarm" }
+}
+
+# Alarm 3: DynamoDB throttling (per table)
+resource "aws_cloudwatch_metric_alarm" "dynamodb_throttling" {
+  for_each            = var.is_alarm_enabled ? local.dynamodb_tables : {}
+  alarm_name          = "${local.name_prefix}_${each.key}_throttling_alarm"
+  comparison_operator = "GreaterThanThreshold"
+  evaluation_periods  = 1
+  metric_name         = "ThrottledRequests"
+  namespace           = "AWS/DynamoDB"
+  period              = 60
+  statistic           = "Sum"
+  threshold           = 0
+  alarm_description   = "DynamoDB throttling on ${each.key} table"
+  treat_missing_data  = "notBreaching"
+
+  dimensions = {
+    TableName = aws_dynamodb_table.main[each.key].name
+  }
+
+  alarm_actions = var.alarm_sns_topic_arn != "" ? [var.alarm_sns_topic_arn] : []
+
+  tags = { Name = "${local.name_prefix}_${each.key}_throttling_alarm" }
+}
@@ -0,0 +1,11 @@
+data "aws_vpc" "existing" {
+  id = var.vpc_id
+}
+
+data "aws_instance" "existing" {
+  instance_id = var.ec2_instance_id
+}
+
+data "aws_caller_identity" "current" {}
+
+data "aws_region" "current" {}
@@ -0,0 +1,30 @@
+# -----------------------------------------------------
+# DynamoDB tables for task state and user ACL
+# Created via for_each over local.dynamodb_tables
+# -----------------------------------------------------
+
+resource "aws_dynamodb_table" "main" {
+  for_each     = local.dynamodb_tables
+  name         = "${local.name_prefix}_${each.key}_ddb"
+  billing_mode = "PAY_PER_REQUEST"
+  hash_key     = each.value.hash_key
+
+  attribute {
+    name = each.value.hash_key
+    type = "S"
+  }
+
+  dynamic "ttl" {
+    for_each = each.value.ttl_attr != null ? [each.value.ttl_attr] : []
+    content {
+      attribute_name = ttl.value
+      enabled        = true
+    }
+  }
+
+  point_in_time_recovery {
+    enabled = var.environment == "prod"
+  }
+
+  tags = { Name = "${local.name_prefix}_${each.key}_ddb" }
+}
@@ -0,0 +1,21 @@
+environment = "dev"
+aws_region  = "us-east-1"
+billing_tag = "mn-vectorization"
+
+# Existing infrastructure — replace with actual IDs
+vpc_id          = "vpc-385f9a56"
+ec2_instance_id = "i-XXXXXXXXXXXXXXXXX"
+
+# CloudWatch
+log_retention_days  = 14
+alarm_sns_topic_arn = ""
+is_alarm_enabled    = false
+
+# MCP Server
+mcp_server_port = 3000
+
+# Encryption
+is_kms_enabled = false
+
+# S3 lifecycle
+embeddings_expiry_days = 0
@@ -0,0 +1,22 @@
+environment = "prod"
+aws_region  = "us-east-1"
+billing_tag = "mn-vectorization"
+
+# Existing infrastructure — replace with actual IDs
+vpc_id          = "vpc-XXXXXXXXXXXXXXXXX"
+ec2_instance_id = "i-XXXXXXXXXXXXXXXXX"
+
+# CloudWatch
+log_retention_days  = 90
+alarm_sns_topic_arn = "arn:aws:sns:us-east-1:891612588877:mn-vectorization-alerts"
+is_alarm_enabled    = true
+
+# MCP Server
+mcp_server_port = 3000
+
+# Encryption
+is_kms_enabled = false
+# kms_key_arn  = "arn:aws:kms:us-east-1:891612588877:key/XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"
+
+# S3 lifecycle
+embeddings_expiry_days = 0
@@ -0,0 +1,21 @@
+environment = "staging"
+aws_region  = "us-east-1"
+billing_tag = "mn-vectorization"
+
+# Existing infrastructure — replace with actual IDs
+vpc_id          = "vpc-XXXXXXXXXXXXXXXXX"
+ec2_instance_id = "i-XXXXXXXXXXXXXXXXX"
+
+# CloudWatch
+log_retention_days  = 30
+alarm_sns_topic_arn = ""
+is_alarm_enabled    = true
+
+# MCP Server
+mcp_server_port = 3000
+
+# Encryption
+is_kms_enabled = false
+
+# S3 lifecycle
+embeddings_expiry_days = 0