Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
232 changes: 82 additions & 150 deletions import-automation/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# - Necessary GCP APIs
# - Secret Manager for the import-config secret
# - GCS Buckets for imports, mounting, and Dataflow templates
# - Spanner Instance and Database with schema
# - Spanner Instance and Database
# - Artifact Registry for hosting Docker images (Flex Template & Executor)
# - Pub/Sub Topic and Subscription for triggering imports
# - Cloud Functions, Workflows, and Ingestion Pipeline
Expand Down Expand Up @@ -73,6 +73,12 @@ variable "spanner_database_id" {
default = "dc-import-db"
}

variable "spanner_graph_database_id" {
description = "Spanner Graph Database ID"
type = string
default = "dc-import-db"
}

variable "bq_dataset_id" {
description = "BigQuery Dataset ID for aggregation"
type = string
Expand Down Expand Up @@ -172,141 +178,80 @@ resource "google_storage_bucket" "mount_bucket" {

# --- Cloud Functions Source Packaging ---

data "archive_file" "ingestion_helper_source" {
type = "zip"
source_dir = "${path.module}/../workflow/ingestion-helper"
output_path = "${path.module}/ingestion_helper.zip"
}

data "archive_file" "aggregation_helper_source" {
type = "zip"
source_dir = "${path.module}/../workflow/aggregation-helper"
output_path = "${path.module}/aggregation_helper.zip"
}

data "archive_file" "import_helper_source" {
type = "zip"
source_dir = "${path.module}/../workflow/import-helper"
output_path = "${path.module}/import_helper.zip"
}

resource "google_storage_bucket_object" "ingestion_helper_zip" {
name = "function-source/ingestion_helper.${data.archive_file.ingestion_helper_source.output_md5}.zip"
bucket = google_storage_bucket.import_bucket.name
source = data.archive_file.ingestion_helper_source.output_path
}

resource "google_storage_bucket_object" "aggregation_helper_zip" {
name = "function-source/aggregation_helper.${data.archive_file.aggregation_helper_source.output_md5}.zip"
bucket = google_storage_bucket.import_bucket.name
source = data.archive_file.aggregation_helper_source.output_path
}

resource "google_storage_bucket_object" "import_helper_zip" {
name = "function-source/import_helper.${data.archive_file.import_helper_source.output_md5}.zip"
bucket = google_storage_bucket.import_bucket.name
source = data.archive_file.import_helper_source.output_path
}

# --- Cloud Functions ---

resource "google_cloudfunctions2_function" "ingestion_helper" {
name = "spanner-ingestion-helper"
location = var.region
project = var.project_id
description = "Helper for Spanner ingestion"

build_config {
runtime = "python312"
entry_point = "ingestion_helper"
source {
storage_source {
bucket = google_storage_bucket.import_bucket.name
object = google_storage_bucket_object.ingestion_helper_zip.name
}
}
}

service_config {
max_instance_count = 10
available_memory = "256M"
timeout_seconds = 60
service_account_email = google_service_account.automation_sa.email
environment_variables = {
PROJECT_ID = var.project_id
SPANNER_PROJECT_ID = var.project_id
SPANNER_INSTANCE_ID = var.spanner_instance_id
SPANNER_DATABASE_ID = var.spanner_database_id
GCS_BUCKET_ID = google_storage_bucket.import_bucket.name
LOCATION = var.region
}
}

depends_on = [google_project_service.services]
}
resource "google_cloud_run_v2_service" "ingestion_helper" {
name = "spanner-ingestion-helper"
location = var.region
project = var.project_id

resource "google_cloudfunctions2_function" "aggregation_helper" {
name = "import-aggregation-helper"
location = var.region
project = var.project_id
description = "Helper for import aggregation"

build_config {
runtime = "python312"
entry_point = "aggregation_helper"
source {
storage_source {
bucket = google_storage_bucket.import_bucket.name
object = google_storage_bucket_object.aggregation_helper_zip.name
template {
service_account = google_service_account.automation_sa.email
containers {
image = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.repository_id}/spanner-ingestion-helper:latest"
env {
name = "PROJECT_ID"
value = var.project_id
}
env {
name = "SPANNER_PROJECT_ID"
value = var.project_id
}
env {
name = "SPANNER_INSTANCE_ID"
value = var.spanner_instance_id
}
env {
name = "SPANNER_DATABASE_ID"
value = var.spanner_database_id
}
env {
name = "SPANNER_GRAPH_DATABASE_ID"
value = var.spanner_graph_database_id
}
env {
name = "GCS_BUCKET_ID"
value = google_storage_bucket.import_bucket.name
}
env {
name = "LOCATION"
value = var.region
}
env {
name = "BQ_DATASET_ID"
value = var.bq_dataset_id
}
Comment thread
vish-cs marked this conversation as resolved.
}
}

service_config {
max_instance_count = 10
available_memory = "256M"
timeout_seconds = 60
service_account_email = google_service_account.automation_sa.email
environment_variables = {
PROJECT_ID = var.project_id
SPANNER_PROJECT_ID = var.project_id
SPANNER_INSTANCE_ID = var.spanner_instance_id
SPANNER_DATABASE_ID = var.spanner_database_id
GCS_BUCKET_ID = google_storage_bucket.import_bucket.name
LOCATION = var.region
BQ_DATASET_ID = var.bq_dataset_id
}
}

depends_on = [google_project_service.services]
}

resource "google_cloudfunctions2_function" "import_helper" {
name = "import-automation-helper"
location = var.region
project = var.project_id
description = "Helper for import automation"

build_config {
runtime = "python312"
entry_point = "handle_feed_event"
source {
storage_source {
bucket = google_storage_bucket.import_bucket.name
object = google_storage_bucket_object.import_helper_zip.name
}
}
}
resource "google_cloud_run_v2_service" "import_helper" {
name = "import-automation-helper"
location = var.region
project = var.project_id

service_config {
max_instance_count = 10
available_memory = "256M"
timeout_seconds = 60
service_account_email = google_service_account.automation_sa.email
environment_variables = {
PROJECT_ID = var.project_id
LOCATION = var.region
GCS_BUCKET_ID = google_storage_bucket.import_bucket.name
template {
service_account = google_service_account.automation_sa.email
containers {
image = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.repository_id}/import-automation-helper:latest"
env {
name = "PROJECT_ID"
value = var.project_id
}
env {
name = "LOCATION"
value = var.region
}
env {
name = "GCS_BUCKET_ID"
value = google_storage_bucket.import_bucket.name
}
env {
name = "INGESTION_HELPER_URL"
value = google_cloud_run_v2_service.ingestion_helper.uri
}
}
}

Expand All @@ -324,9 +269,10 @@ resource "google_workflows_workflow" "import_automation_workflow" {
source_contents = file("${path.module}/../workflow/import-automation-workflow.yaml")

user_env_vars = {
LOCATION = var.region
GCS_BUCKET_ID = google_storage_bucket.import_bucket.name
GCS_MOUNT_BUCKET = google_storage_bucket.mount_bucket.name
LOCATION = var.region
GCS_BUCKET_ID = google_storage_bucket.import_bucket.name
GCS_MOUNT_BUCKET = google_storage_bucket.mount_bucket.name
INGESTION_HELPER_URL = google_cloud_run_v2_service.ingestion_helper.uri
}

depends_on = [google_project_service.services]
Expand All @@ -341,11 +287,12 @@ resource "google_workflows_workflow" "spanner_ingestion_workflow" {
source_contents = file("${path.module}/../workflow/spanner-ingestion-workflow.yaml")

user_env_vars = {
LOCATION = var.region
PROJECT_ID = var.project_id
SPANNER_PROJECT_ID = var.project_id
SPANNER_INSTANCE_ID = var.spanner_instance_id
SPANNER_DATABASE_ID = var.spanner_database_id
LOCATION = var.region
PROJECT_ID = var.project_id
SPANNER_PROJECT_ID = var.project_id
SPANNER_INSTANCE_ID = var.spanner_instance_id
SPANNER_DATABASE_ID = var.spanner_database_id
INGESTION_HELPER_URL = google_cloud_run_v2_service.ingestion_helper.uri
}

depends_on = [google_project_service.services]
Expand All @@ -367,25 +314,9 @@ resource "google_spanner_database" "import_db" {
instance = google_spanner_instance.import_instance.name
name = var.spanner_database_id
project = var.project_id
ddl = [for s in split(";", file("${path.module}/../workflow/spanner_schema.sql")) : trimspace(s) if trimspace(s) != ""]
Comment thread
vish-cs marked this conversation as resolved.

deletion_protection = false
}

# Initialize IngestionLock (DML)
resource "null_resource" "init_spanner_lock" {
provisioner "local-exec" {
command = <<EOT
gcloud spanner databases execute-sql ${google_spanner_database.import_db.name} \
--instance=${google_spanner_instance.import_instance.name} \
--project=${var.project_id} \
--sql="INSERT INTO IngestionLock (LockID) VALUES ('global_ingestion_lock')" || echo 'Lock already exists'
EOT
}

depends_on = [google_spanner_database.import_db]
}

# --- IAM ---

resource "google_service_account" "automation_sa" {
Expand Down Expand Up @@ -444,7 +375,7 @@ resource "google_pubsub_subscription" "import_automation_sub" {
filter = "attributes.transfer_status=\"TRANSFER_COMPLETED\""

push_config {
push_endpoint = google_cloudfunctions2_function.import_helper.service_config[0].uri
push_endpoint = google_cloud_run_v2_service.import_helper.uri
oidc_token {
service_account_email = google_service_account.automation_sa.email
}
Expand All @@ -457,3 +388,4 @@ output "automation_service_account_email" {
description = "The email of the service account used for import automation."
}


74 changes: 0 additions & 74 deletions import-automation/workflow/aggregation-helper/main.py

This file was deleted.

This file was deleted.

8 changes: 2 additions & 6 deletions import-automation/workflow/cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,11 @@ steps:

- id: 'spanner-ingestion-helper'
name: 'gcr.io/cloud-builders/gcloud'
args: ['functions', 'deploy', 'spanner-ingestion-helper', '--gen2', '--project', '${_PROJECT_ID}', '--region', '${_LOCATION}', '--runtime', 'python312', '--source', 'ingestion-helper', '--no-allow-unauthenticated', '--trigger-http', '--entry-point', 'ingestion_helper', '--set-env-vars', 'PROJECT_ID=${_PROJECT_ID},SPANNER_PROJECT_ID=${_SPANNER_PROJECT_ID},SPANNER_INSTANCE_ID=${_SPANNER_INSTANCE_ID},SPANNER_DATABASE_ID=${_SPANNER_DATABASE_ID},SPANNER_GRAPH_DATABASE_ID=${_SPANNER_GRAPH_DATABASE_ID},GCS_BUCKET_ID=${_GCS_BUCKET_ID},LOCATION=${_LOCATION}']

- id: 'import-aggregation-helper'
name: 'gcr.io/cloud-builders/gcloud'
args: ['functions', 'deploy', 'import-aggregation-helper', '--runtime', 'python312', '--source', 'aggregation-helper', '--no-allow-unauthenticated', '--trigger-http', '--entry-point', 'aggregation_helper', '--project', '${_PROJECT_ID}', '--set-env-vars', 'PROJECT_ID=${_PROJECT_ID},SPANNER_PROJECT_ID=${_SPANNER_PROJECT_ID},SPANNER_INSTANCE_ID=${_SPANNER_INSTANCE_ID},SPANNER_DATABASE_ID=${_SPANNER_GRAPH_DATABASE_ID},GCS_BUCKET_ID=${_GCS_BUCKET_ID},LOCATION=${_LOCATION},BQ_DATASET_ID=${_BQ_DATASET_ID}']
args: ['builds', 'submit', 'import-automation/workflow/ingestion-helper', '--config', 'import-automation/workflow/ingestion-helper/cloudbuild.yaml', '--substitutions', '_PROJECT_ID=${_PROJECT_ID},_LOCATION=${_LOCATION}']

- id: 'import-automation-helper'
name: 'gcr.io/cloud-builders/gcloud'
args: ['functions', 'deploy', 'import-automation-helper', '--gen2', '--project', '${_PROJECT_ID}', '--region', '${_LOCATION}', '--runtime', 'python312', '--source', 'import-helper', '--no-allow-unauthenticated', '--trigger-http', '--entry-point', 'handle_feed_event', '--set-env-vars', 'PROJECT_ID=${_PROJECT_ID},LOCATION=${_LOCATION},GCS_BUCKET_ID=${_GCS_BUCKET_ID}']
args: ['builds', 'submit', 'import-automation/workflow/import-helper', '--config', 'import-automation/workflow/import-helper/cloudbuild.yaml', '--substitutions', '_PROJECT_ID=${_PROJECT_ID},_LOCATION=${_LOCATION}']

options:
logging: CLOUD_LOGGING_ONLY
Loading
Loading