From 3faf7b960914ab14a07617c828eaa1cf3e3a9a88 Mon Sep 17 00:00:00 2001 From: Gabriel Mechali Date: Tue, 10 Mar 2026 17:29:55 -0400 Subject: [PATCH 1/5] Reset to only the Terraform diffs. --- .gitignore | 26 +- infra/dcp/README.md | 64 +++++ infra/dcp/main.tf | 111 ++++++++ infra/dcp/modules/cdc/locals.tf | 90 ++++++ infra/dcp/modules/cdc/main.tf | 339 +++++++++++++++++++++++ infra/dcp/modules/cdc/outputs.tf | 45 +++ infra/dcp/modules/cdc/service_account.tf | 24 ++ infra/dcp/modules/cdc/variables.tf | 248 +++++++++++++++++ infra/dcp/modules/dcp/cloudrun.tf | 51 ++++ infra/dcp/modules/dcp/iam.tf | 22 ++ infra/dcp/modules/dcp/outputs.tf | 15 + infra/dcp/modules/dcp/spanner.tf | 18 ++ infra/dcp/modules/dcp/variables.tf | 79 ++++++ infra/dcp/outputs.tf | 17 ++ infra/dcp/setup.sh | 72 +++++ infra/dcp/terraform.tfvars.example | 20 ++ infra/dcp/variables.tf | 338 ++++++++++++++++++++++ 17 files changed, 1578 insertions(+), 1 deletion(-) create mode 100644 infra/dcp/README.md create mode 100644 infra/dcp/main.tf create mode 100644 infra/dcp/modules/cdc/locals.tf create mode 100644 infra/dcp/modules/cdc/main.tf create mode 100644 infra/dcp/modules/cdc/outputs.tf create mode 100644 infra/dcp/modules/cdc/service_account.tf create mode 100644 infra/dcp/modules/cdc/variables.tf create mode 100644 infra/dcp/modules/dcp/cloudrun.tf create mode 100644 infra/dcp/modules/dcp/iam.tf create mode 100644 infra/dcp/modules/dcp/outputs.tf create mode 100644 infra/dcp/modules/dcp/spanner.tf create mode 100644 infra/dcp/modules/dcp/variables.tf create mode 100644 infra/dcp/outputs.tf create mode 100755 infra/dcp/setup.sh create mode 100644 infra/dcp/terraform.tfvars.example create mode 100644 infra/dcp/variables.tf diff --git a/.gitignore b/.gitignore index 6b8cf2b..bfdfb52 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,28 @@ venv node_modules .ruff_cache .coverage -htmlcov/ \ No newline at end of file +htmlcov/ + +# Terraform +.terraform/ +*.tfstate +*.tfstate.* +crash.log +crash.log.* +*.tfvars +*.tfvars.json +override.tf +override.tf.json +_override.tf +_override.tf.json +.terraformrc +terraform.rc +tfplan +infra/dcp/backend.tf + +# Configuration +infra/dcp/.env + +*.pyc +.venv/ +uv.lock diff --git a/infra/dcp/README.md b/infra/dcp/README.md new file mode 100644 index 0000000..e9fa14e --- /dev/null +++ b/infra/dcp/README.md @@ -0,0 +1,64 @@ +# Data Commons Platform (DCP) Infrastructure + +This directory contains the Terraform configuration to deploy the Data Commons Platform on Google Cloud Platform (GCP). + +## Prerequisites +* **GCP Project**: A GCP project with billing enabled. +* **Terraform**: Terraform installed locally (>= 1.0.0). +* **gcloud CLI**: GCP CLI installed and authenticated. + +## Setup + +1. **Configure Local Variables**: + Copy the example variable file and fill in your project details. + ```bash + cp terraform.tfvars.example terraform.tfvars + ``` + Edit `terraform.tfvars` with your `project_id` and other preferred settings. + +2. **Run Setup Script**: + The `setup.sh` script creates a GCS bucket for your Terraform state and initializes the backend. + ```bash + ./setup.sh + ``` + +## Deployment + +1. **Initialize**: + Initialize Terraform (if not already done by setup.sh). + ```bash + terraform init + ``` + +2. **Plan**: + Review the changes Terraform will make. + ```bash + terraform plan + ``` + +3. **Apply**: + Provision the infrastructure. + ```bash + terraform apply + ``` + +4. **Teardown**: + Destroy all resources. + ```bash + terraform destroy + ``` + +## Architecture + +This setup uses an **Orchestrator Pattern**: +- `infra/dcp/main.tf`: The root entrypoint that calls modules. +- `infra/dcp/modules/dcp/`: The new Data Commons Platform stack (Cloud Run + Spanner). +- `infra/dcp/modules/cdc/`: The legacy Custom Data Commons stack (Cloud Run + MySQL + Redis). + +Each module is independent and can be toggled via the root variables in `terraform.tfvars`. + +## Troubleshooting +* **Deletion Errors**: If you get a "cannot destroy... deletion_protection" error, ensure `deletion_protection = false` in your `terraform.tfvars`, run `terraform apply`, and then try `terraform destroy` again. Alternatively, use the helper command: + ```bash + make force-destroy + ``` diff --git a/infra/dcp/main.tf b/infra/dcp/main.tf new file mode 100644 index 0000000..4200bf4 --- /dev/null +++ b/infra/dcp/main.tf @@ -0,0 +1,111 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = ">= 5.0" + } + null = { + source = "hashicorp/null" + version = ">= 3.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} + +# Enable required APIs for both stacks +resource "google_project_service" "apis" { + for_each = toset([ + "run.googleapis.com", + "spanner.googleapis.com", + "iam.googleapis.com", + "sqladmin.googleapis.com", + "redis.googleapis.com", + "secretmanager.googleapis.com", + "vpcaccess.googleapis.com", + "artifactregistry.googleapis.com", + "compute.googleapis.com" + ]) + + service = each.key + disable_on_destroy = false +} + +# --- Data Commons Platform (DCP) Stack --- +module "dcp" { + source = "./modules/dcp" + count = var.enable_dcp ? 1 : 0 + + project_id = var.project_id + region = var.region + image_url = var.dcp_image_url + service_name = var.dcp_service_name + service_account_name = var.dcp_service_account_name + create_spanner = var.dcp_create_spanner + spanner_instance_id = var.dcp_spanner_instance_id + spanner_database_id = var.dcp_spanner_database_id + spanner_processing_units = var.dcp_spanner_processing_units + cpu = var.dcp_cpu + memory = var.dcp_memory + min_instances = var.dcp_min_instances + max_instances = var.dcp_max_instances + concurrency = var.dcp_concurrency + timeout_seconds = var.dcp_timeout_seconds + deletion_protection = var.deletion_protection + + depends_on = [google_project_service.apis] +} + +# --- Custom Data Commons (CDC) Legacy Stack --- +module "cdc" { + source = "./modules/cdc" + count = var.enable_cdc ? 1 : 0 + + project_id = var.project_id + namespace = var.cdc_namespace + dc_api_key = var.cdc_dc_api_key + maps_api_key = var.cdc_maps_api_key + disable_google_maps = var.cdc_disable_google_maps + region = var.region + google_analytics_tag_id = var.cdc_google_analytics_tag_id + gcs_data_bucket_name = var.cdc_gcs_data_bucket_name + gcs_data_bucket_input_folder = var.cdc_gcs_data_bucket_input_folder + gcs_data_bucket_output_folder = var.cdc_gcs_data_bucket_output_folder + gcs_data_bucket_location = var.cdc_gcs_data_bucket_location + mysql_instance_name = var.cdc_mysql_instance_name + mysql_database_name = var.cdc_mysql_database_name + mysql_database_version = var.cdc_mysql_database_version + mysql_cpu_count = var.cdc_mysql_cpu_count + mysql_memory_size_mb = var.cdc_mysql_memory_size_mb + mysql_storage_size_gb = var.cdc_mysql_storage_size_gb + mysql_user = var.cdc_mysql_user + mysql_deletion_protection = var.deletion_protection + dc_web_service_image = var.cdc_web_service_image + dc_web_service_min_instance_count = var.cdc_web_service_min_instance_count + dc_web_service_max_instance_count = var.cdc_web_service_max_instance_count + dc_web_service_cpu = var.cdc_web_service_cpu + dc_web_service_memory = var.cdc_web_service_memory + make_dc_web_service_public = var.cdc_make_dc_web_service_public + dc_data_job_image = var.cdc_data_job_image + dc_data_job_cpu = var.cdc_data_job_cpu + dc_data_job_memory = var.cdc_data_job_memory + dc_data_job_timeout = var.cdc_data_job_timeout + dc_search_scope = var.cdc_search_scope + enable_mcp = var.cdc_enable_mcp + vpc_network_name = var.cdc_vpc_network_name + vpc_network_subnet_name = var.cdc_vpc_network_subnet_name + enable_redis = var.cdc_enable_redis + redis_instance_name = var.cdc_redis_instance_name + redis_memory_size_gb = var.cdc_redis_memory_size_gb + redis_tier = var.cdc_redis_tier + redis_location_id = var.cdc_redis_location_id + redis_alternative_location_id = var.cdc_redis_alternative_location_id + redis_replica_count = var.cdc_redis_replica_count + vpc_connector_cidr = var.cdc_vpc_connector_cidr + deletion_protection = var.deletion_protection + + depends_on = [google_project_service.apis] +} diff --git a/infra/dcp/modules/cdc/locals.tf b/infra/dcp/modules/cdc/locals.tf new file mode 100644 index 0000000..80a00b7 --- /dev/null +++ b/infra/dcp/modules/cdc/locals.tf @@ -0,0 +1,90 @@ +# Local variable definitions + +locals { + # Data Commons Data Bucket + gcs_data_bucket_name = var.gcs_data_bucket_name != "" ? var.gcs_data_bucket_name : "${var.namespace}-datacommons-data-${var.project_id}" + + # Use var.maps_api_key if set, otherwise use generated Maps API key + maps_api_key = var.maps_api_key != null ? var.maps_api_key : google_apikeys_key.maps_api_key[0].key_string + + # Data Commons API hostname + dc_api_hostname = "api.datacommons.org" + + # Data Commons API protocol + dc_api_protocol = "https" + + # Data Commons API root URL + dc_api_root = "${local.dc_api_protocol}://${local.dc_api_hostname}" + + # Optionally-configured Redis instance + redis_instance = var.enable_redis ? google_redis_instance.redis_instance[0] : null + + + # Shared environment variables used by the Data Commons web service and the Data + # Commons data loading job + cloud_run_shared_env_variables = [ + { + name = "USE_CLOUDSQL" + value = "true" + }, + { + name = "CLOUDSQL_INSTANCE" + value = google_sql_database_instance.mysql_instance.connection_name + }, + { + name = "DB_NAME" + value = var.mysql_database_name + }, + { + name = "DB_USER" + value = var.mysql_user + }, + { + name = "DB_HOST" + value = "" + }, + { + name = "DB_PORT" + value = "3306" + }, + { + name = "OUTPUT_DIR" + value = "gs://${local.gcs_data_bucket_name}/${var.gcs_data_bucket_output_folder}" + }, + { + name = "FORCE_RESTART" + value = "${timestamp()}" + }, + { + name = "REDIS_HOST" + value = try(local.redis_instance.host, "") + }, + { + name = "REDIS_PORT" + value = try(local.redis_instance.port, "") + } + ] + + # Shared environment variables containing secret refs used by the Data Commons + # web service and the Data Commons data loading job + cloud_run_shared_env_variable_secrets = [ + { + name = "DC_API_KEY" + value_source = { + secret_key_ref = { + secret = google_secret_manager_secret.dc_api_key.secret_id + version = "latest" + } + } + }, + { + name = "DB_PASS" + value_source = { + secret_key_ref = { + secret = google_secret_manager_secret.mysql_password_secret.secret_id + version = "latest" + } + } + } + ] +} diff --git a/infra/dcp/modules/cdc/main.tf b/infra/dcp/modules/cdc/main.tf new file mode 100644 index 0000000..308a064 --- /dev/null +++ b/infra/dcp/modules/cdc/main.tf @@ -0,0 +1,339 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Custom Data Commons terraform resources + +# Reference the default VPC network +data "google_compute_network" "default" { + name = var.vpc_network_name +} + +# Reference the default VPC network subnet +data "google_compute_subnetwork" "default" { + name = var.vpc_network_subnet_name + region = var.region +} + +# Enable required Google Cloud APIs +resource "google_project_service" "required_apis" { + for_each = toset([ + "run.googleapis.com", + "sqladmin.googleapis.com", + "compute.googleapis.com", + "redis.googleapis.com", + "secretmanager.googleapis.com", + "vpcaccess.googleapis.com", + "artifactregistry.googleapis.com", + "iam.googleapis.com" + ]) + project = var.project_id + service = each.value + disable_on_destroy = false +} + +# Cloud SQL instance for Data Commons +resource "google_sql_database_instance" "mysql_instance" { + name = "${var.namespace}-${var.mysql_instance_name}" + database_version = var.mysql_database_version + region = var.region + + settings { + tier = "db-custom-${var.mysql_cpu_count}-${var.mysql_memory_size_mb}" + ip_configuration { + ipv4_enabled = true + } + backup_configuration { + enabled = true + } + } + + deletion_protection = var.deletion_protection + depends_on = [google_project_service.required_apis] +} + +# MySQL Database +resource "google_sql_database" "mysql_db" { + name = var.mysql_database_name + instance = google_sql_database_instance.mysql_instance.name +} + +# Generate a random password for the MySQL user +resource "random_password" "mysql_password" { + length = 16 + special = true +} + +# Store MySQL password in Secret Manager +resource "google_secret_manager_secret" "mysql_password_secret" { + secret_id = "${var.namespace}-mysql-password" + + replication { + auto {} + } + depends_on = [google_project_service.required_apis] +} + +resource "google_secret_manager_secret_version" "mysql_password_secret_version" { + secret = google_secret_manager_secret.mysql_password_secret.id + secret_data = random_password.mysql_password.result +} + +# MySQL User +resource "google_sql_user" "mysql_user" { + name = var.mysql_user + instance = google_sql_database_instance.mysql_instance.name + password = random_password.mysql_password.result +} + +# Optional Redis instance +resource "google_redis_instance" "redis_instance" { + count = var.enable_redis ? 1 : 0 + name = "${var.namespace}-${var.redis_instance_name}" + memory_size_gb = var.redis_memory_size_gb + tier = var.redis_tier + region = var.region + location_id = var.redis_location_id + alternative_location_id = var.redis_alternative_location_id + redis_version = "REDIS_6_X" + display_name = "Data Commons Redis Instance" + reserved_ip_range = null + replica_count = var.redis_replica_count + authorized_network = data.google_compute_network.default.id + connect_mode = "DIRECT_PEERING" + depends_on = [google_project_service.required_apis] +} + +# VPC Access Connector for private connections +resource "google_vpc_access_connector" "connector" { + name = "${var.namespace}-vpc-conn-v2" + region = var.region + network = data.google_compute_network.default.name + ip_cidr_range = var.vpc_connector_cidr + min_instances = 2 + max_instances = 10 + depends_on = [google_project_service.required_apis] +} + +# GCS Bucket for data storage +resource "google_storage_bucket" "data_bucket" { + name = local.gcs_data_bucket_name + location = var.gcs_data_bucket_location + force_destroy = true + + uniform_bucket_level_access = true + depends_on = [google_project_service.required_apis] +} + +# Maps API Key +resource "google_apikeys_key" "maps_api_key" { + count = var.maps_api_key == null && !var.disable_google_maps ? 1 : 0 + name = "${var.namespace}-maps-key" + display_name = "Maps API Key for ${var.namespace}" + project = var.project_id + + restrictions { + api_targets { + service = "maps-backend.googleapis.com" + } + } + depends_on = [google_project_service.required_apis] +} + +# Cloud Run job for data management +resource "google_cloud_run_v2_job" "dc_data_job" { + name = "${var.namespace}-datacommons-data-job" + location = var.region + deletion_protection = var.deletion_protection + + template { + template { + containers { + image = var.dc_data_job_image + resources { + limits = { + cpu = var.dc_data_job_cpu + memory = var.dc_data_job_memory + } + } + dynamic "env" { + for_each = local.cloud_run_shared_env_variables + content { + name = env.value.name + value = env.value.value + } + } + + dynamic "env" { + for_each = local.cloud_run_shared_env_variable_secrets + content { + name = env.value.name + value_source { + secret_key_ref { + secret = env.value.value_source.secret_key_ref.secret + version = env.value.value_source.secret_key_ref.version + } + } + } + } + + env { + name = "GCS_BUCKET" + value = google_storage_bucket.data_bucket.name + } + env { + name = "GCS_INPUT_FOLDER" + value = var.gcs_data_bucket_input_folder + } + env { + name = "GCS_OUTPUT_FOLDER" + value = var.gcs_data_bucket_output_folder + } + env { + name = "INPUT_DIR" + value = "gs://${google_storage_bucket.data_bucket.name}/${var.gcs_data_bucket_input_folder}" + } + } + vpc_access { + connector = google_vpc_access_connector.connector.id + egress = "PRIVATE_RANGES_ONLY" + } + max_retries = 0 + timeout = var.dc_data_job_timeout + service_account = google_service_account.datacommons_service_account.email + } + } + depends_on = [google_project_service.required_apis] +} + +# Run the db init job on terraform apply to create tables +resource "null_resource" "run_db_init" { + depends_on = [ + google_cloud_run_v2_job.dc_data_job, + google_sql_database_instance.mysql_instance + ] + + triggers = { + # Run once per deployment or when the job image changes + job_image = var.dc_data_job_image + } + + provisioner "local-exec" { + command = </dev/null; then + echo "Creating bucket gs://${BUCKET_NAME} in ${PROJECT_ID}..." + gcloud storage buckets create "gs://${BUCKET_NAME}" --project="${PROJECT_ID}" --location=us --uniform-bucket-level-access + + echo "Enabling versioning on gs://${BUCKET_NAME}..." + gcloud storage buckets update "gs://${BUCKET_NAME}" --versioning +else + echo "Bucket gs://${BUCKET_NAME} already exists." +fi + +# Generate the backend.tf file dynamically +echo "Generating backend.tf..." +cat < backend.tf +terraform { + backend "gcs" { + bucket = "${BUCKET_NAME}" + prefix = "terraform/state" + } +} +EOF + +echo "✅ backend.tf created successfully!" +echo "Now running terraform init..." +terraform init + +echo "" +echo "Setup complete! You can now run 'terraform apply' to deploy your infrastructure." diff --git a/infra/dcp/terraform.tfvars.example b/infra/dcp/terraform.tfvars.example new file mode 100644 index 0000000..f91ba57 --- /dev/null +++ b/infra/dcp/terraform.tfvars.example @@ -0,0 +1,20 @@ +# --- Shared Global Variables --- +project_id = "your-project-id" +region = "us-central1" +deletion_protection = false + +# --- Stack Toggles --- +enable_dcp = true +enable_cdc = false + +# --- DCP Stack Variables --- +dcp_image_url = "gcr.io/your-project/datacommons-platform:latest" +dcp_service_name = "datacommons-platform" +dcp_service_account_name = "dcp-runner-sa" +dcp_create_spanner = true + +# --- CDC Stack Variables (Legacy) --- +# cdc_namespace = "cdc" +# cdc_dc_api_key = "your-dc-api-key" +# cdc_maps_api_key = "your-maps-api-key" +# cdc_enable_redis = false diff --git a/infra/dcp/variables.tf b/infra/dcp/variables.tf new file mode 100644 index 0000000..f49479a --- /dev/null +++ b/infra/dcp/variables.tf @@ -0,0 +1,338 @@ +# --- Shared Global Variables --- +variable "project_id" { + description = "GCP Project ID" + type = string +} + +variable "region" { + description = "GCP Region" + type = string + default = "us-central1" +} + +variable "deletion_protection" { + description = "Enable deletion protection for resources (set to true for production)" + type = bool + default = false +} + +# --- Stack Toggles --- +variable "enable_dcp" { + description = "Enable the new Data Commons Platform stack" + type = bool + default = false +} + +variable "enable_cdc" { + description = "Enable the legacy Custom Data Commons stack" + type = bool + default = true +} + +# --- DCP Stack Variables --- +variable "dcp_image_url" { + description = "Docker image URL for DCP" + type = string + default = "gcr.io/datcom-ci/datacommons-platform:latest" +} + +variable "dcp_service_name" { + description = "Cloud Run service name for DCP" + type = string + default = "datacommons-platform" +} + +variable "dcp_service_account_name" { + description = "Service account for DCP" + type = string + default = "dcp-runner-sa" +} + +variable "dcp_create_spanner" { + description = "Create Spanner for DCP" + type = bool + default = false +} + +variable "dcp_spanner_instance_id" { + description = "Spanner instance for DCP" + type = string + default = "dcp-spanner-instance" +} + +variable "dcp_spanner_database_id" { + description = "Spanner database for DCP" + type = string + default = "dcp-spanner-db" +} + +variable "dcp_spanner_processing_units" { + description = "Spanner units for DCP" + type = number + default = 100 +} + +variable "dcp_cpu" { + description = "DCP CPU" + type = string + default = "1000m" +} + +variable "dcp_memory" { + description = "DCP Memory" + type = string + default = "512Mi" +} + +variable "dcp_min_instances" { + description = "DCP min instances" + type = number + default = 0 +} + +variable "dcp_max_instances" { + description = "DCP max instances" + type = number + default = 10 +} + +variable "dcp_concurrency" { + description = "DCP concurrency" + type = number + default = 80 +} + +variable "dcp_timeout_seconds" { + description = "DCP timeout" + type = number + default = 300 +} + +# --- CDC Stack Variables (Legacy) --- +variable "cdc_namespace" { + description = "Prefix for CDC resources" + type = string + default = "cdc" +} + +variable "cdc_dc_api_key" { + description = "DC API Key for CDC" + type = string + default = "" +} + +variable "cdc_maps_api_key" { + description = "Maps API Key for CDC" + type = string + default = null +} + +variable "cdc_disable_google_maps" { + description = "Disable maps in CDC" + type = bool + default = false +} + +variable "cdc_google_analytics_tag_id" { + description = "GA tag for CDC" + type = string + default = null +} + +variable "cdc_gcs_data_bucket_name" { + description = "CDC data bucket" + type = string + default = "" +} + +variable "cdc_gcs_data_bucket_input_folder" { + description = "CDC input folder" + type = string + default = "input" +} + +variable "cdc_gcs_data_bucket_output_folder" { + description = "CDC output folder" + type = string + default = "output" +} + +variable "cdc_gcs_data_bucket_location" { + description = "CDC bucket location" + type = string + default = "US" +} + +variable "cdc_mysql_instance_name" { + description = "CDC MySQL name" + type = string + default = "datacommons-mysql-instance" +} + +variable "cdc_mysql_database_name" { + description = "CDC MySQL DB name" + type = string + default = "datacommons" +} + +variable "cdc_mysql_database_version" { + description = "CDC MySQL version" + type = string + default = "MYSQL_8_0" +} + +variable "cdc_mysql_cpu_count" { + description = "CDC MySQL CPU" + type = number + default = 2 +} + +variable "cdc_mysql_memory_size_mb" { + description = "CDC MySQL RAM" + type = number + default = 7680 +} + +variable "cdc_mysql_storage_size_gb" { + description = "CDC MySQL Disk" + type = number + default = 20 +} + +variable "cdc_mysql_user" { + description = "CDC MySQL user" + type = string + default = "datacommons" +} + +variable "cdc_vpc_connector_cidr" { + description = "CIDR range for the CDC VPC Access Connector" + type = string + default = "10.13.0.0/28" +} + +variable "cdc_web_service_image" { + description = "CDC web image" + type = string + default = "gcr.io/datcom-ci/datacommons-services:stable" +} + +variable "cdc_web_service_min_instance_count" { + description = "CDC min instances" + type = number + default = 1 +} + +variable "cdc_web_service_max_instance_count" { + description = "CDC max instances" + type = number + default = 1 +} + +variable "cdc_web_service_cpu" { + description = "CDC web CPU" + type = string + default = "4" +} + +variable "cdc_web_service_memory" { + description = "CDC web RAM" + type = string + default = "16G" +} + +variable "cdc_make_dc_web_service_public" { + description = "CDC public access" + type = bool + default = true +} + +variable "cdc_data_job_image" { + description = "CDC data job image" + type = string + default = "gcr.io/datcom-ci/datacommons-data:stable" +} + +variable "cdc_data_job_cpu" { + description = "CDC data job CPU" + type = string + default = "2" +} + +variable "cdc_data_job_memory" { + description = "CDC data job RAM" + type = string + default = "8G" +} + +variable "cdc_data_job_timeout" { + description = "CDC data job timeout" + type = string + default = "600s" +} + +variable "cdc_search_scope" { + description = "CDC search scope" + type = string + default = "base_and_custom" +} + +variable "cdc_enable_mcp" { + description = "CDC enable MCP" + type = bool + default = true +} + +variable "cdc_vpc_network_name" { + description = "CDC VPC network" + type = string + default = "default" +} + +variable "cdc_vpc_network_subnet_name" { + description = "CDC VPC subnet" + type = string + default = "default" +} + +variable "cdc_enable_redis" { + description = "CDC enable redis" + type = bool + default = false +} + +variable "cdc_redis_instance_name" { + description = "CDC redis name" + type = string + default = "datacommons-redis-instance" +} + +variable "cdc_redis_memory_size_gb" { + description = "CDC redis size" + type = number + default = 2 +} + +variable "cdc_redis_tier" { + description = "CDC redis tier" + type = string + default = "STANDARD_HA" +} + +variable "cdc_redis_location_id" { + description = "CDC redis zone" + type = string + default = "us-central1-a" +} + +variable "cdc_redis_alternative_location_id" { + description = "CDC redis alt zone" + type = string + default = "us-central1-b" +} + +variable "cdc_redis_replica_count" { + description = "CDC redis replicas" + type = number + default = 1 +} From 7276987ab7169bf574d8d2ede0bbd84ccc6c1987 Mon Sep 17 00:00:00 2001 From: Gabriel Mechali Date: Tue, 10 Mar 2026 19:08:31 -0400 Subject: [PATCH 2/5] Incorporate comments from Dan's review. --- .gitignore | 1 + infra/dcp/main.tf | 29 ++++++++++---- infra/dcp/modules/cdc/locals.tf | 24 +++++++---- infra/dcp/modules/cdc/main.tf | 43 +++++++++++--------- infra/dcp/modules/cdc/outputs.tf | 4 +- infra/dcp/modules/cdc/service_account.tf | 8 ++-- infra/dcp/modules/cdc/variables.tf | 18 +++++++++ infra/dcp/modules/dcp/cloudrun.tf | 18 ++++----- infra/dcp/modules/dcp/iam.tf | 2 +- infra/dcp/modules/dcp/locals.tf | 3 ++ infra/dcp/modules/dcp/outputs.tf | 4 +- infra/dcp/modules/dcp/spanner.tf | 12 +++--- infra/dcp/modules/dcp/variables.tf | 38 +++++++++++------- infra/dcp/terraform.tfvars.example | 17 +++----- infra/dcp/variables.tf | 51 +++++++++++++----------- 15 files changed, 167 insertions(+), 105 deletions(-) create mode 100644 infra/dcp/modules/dcp/locals.tf diff --git a/.gitignore b/.gitignore index bfdfb52..a384d8b 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ _override.tf.json terraform.rc tfplan infra/dcp/backend.tf +.terraform.lock.hcl # Configuration infra/dcp/.env diff --git a/infra/dcp/main.tf b/infra/dcp/main.tf index 4200bf4..9afe8e2 100644 --- a/infra/dcp/main.tf +++ b/infra/dcp/main.tf @@ -40,20 +40,22 @@ module "dcp" { count = var.enable_dcp ? 1 : 0 project_id = var.project_id + namespace = var.namespace region = var.region image_url = var.dcp_image_url service_name = var.dcp_service_name service_account_name = var.dcp_service_account_name - create_spanner = var.dcp_create_spanner + create_spanner_instance = var.dcp_create_spanner_instance + create_spanner_db = var.dcp_create_spanner_db spanner_instance_id = var.dcp_spanner_instance_id spanner_database_id = var.dcp_spanner_database_id spanner_processing_units = var.dcp_spanner_processing_units - cpu = var.dcp_cpu - memory = var.dcp_memory - min_instances = var.dcp_min_instances - max_instances = var.dcp_max_instances - concurrency = var.dcp_concurrency - timeout_seconds = var.dcp_timeout_seconds + service_cpu = var.dcp_service_cpu + service_memory = var.dcp_service_memory + service_min_instances = var.dcp_service_min_instances + service_max_instances = var.dcp_service_max_instances + service_concurrency = var.dcp_service_concurrency + service_timeout_seconds = var.dcp_service_timeout_seconds deletion_protection = var.deletion_protection depends_on = [google_project_service.apis] @@ -65,7 +67,7 @@ module "cdc" { count = var.enable_cdc ? 1 : 0 project_id = var.project_id - namespace = var.cdc_namespace + namespace = var.namespace dc_api_key = var.cdc_dc_api_key maps_api_key = var.cdc_maps_api_key disable_google_maps = var.cdc_disable_google_maps @@ -105,7 +107,18 @@ module "cdc" { redis_alternative_location_id = var.cdc_redis_alternative_location_id redis_replica_count = var.cdc_redis_replica_count vpc_connector_cidr = var.cdc_vpc_connector_cidr + use_spanner = var.enable_dcp + spanner_instance_id = var.enable_dcp ? module.dcp[0].spanner_instance_id : "" + spanner_database_id = var.enable_dcp ? module.dcp[0].spanner_database_id : "" deletion_protection = var.deletion_protection depends_on = [google_project_service.apis] } + +# Ensure Spanner instance ID is provided when not creating a new one +check "spanner_instance_id_provided" { + assert { + condition = var.dcp_create_spanner_instance || var.dcp_spanner_instance_id != "" + error_message = "dcp_spanner_instance_id must be provided when dcp_create_spanner_instance is false." + } +} diff --git a/infra/dcp/modules/cdc/locals.tf b/infra/dcp/modules/cdc/locals.tf index 80a00b7..8de14d2 100644 --- a/infra/dcp/modules/cdc/locals.tf +++ b/infra/dcp/modules/cdc/locals.tf @@ -2,7 +2,8 @@ locals { # Data Commons Data Bucket - gcs_data_bucket_name = var.gcs_data_bucket_name != "" ? var.gcs_data_bucket_name : "${var.namespace}-datacommons-data-${var.project_id}" + name_prefix = var.namespace != "" ? "${var.namespace}-" : "" + gcs_data_bucket_name = var.gcs_data_bucket_name != "" ? var.gcs_data_bucket_name : "${local.name_prefix}datacommons-data-${var.project_id}" # Use var.maps_api_key if set, otherwise use generated Maps API key maps_api_key = var.maps_api_key != null ? var.maps_api_key : google_apikeys_key.maps_api_key[0].key_string @@ -25,11 +26,11 @@ locals { cloud_run_shared_env_variables = [ { name = "USE_CLOUDSQL" - value = "true" + value = var.use_spanner ? "false" : "true" }, { name = "CLOUDSQL_INSTANCE" - value = google_sql_database_instance.mysql_instance.connection_name + value = var.use_spanner ? "" : google_sql_database_instance.mysql_instance[0].connection_name }, { name = "DB_NAME" @@ -62,12 +63,20 @@ locals { { name = "REDIS_PORT" value = try(local.redis_instance.port, "") + }, + { + name = "GCP_SPANNER_INSTANCE_ID" + value = var.spanner_instance_id + }, + { + name = "GCP_SPANNER_DATABASE_NAME" + value = var.spanner_database_id } ] # Shared environment variables containing secret refs used by the Data Commons # web service and the Data Commons data loading job - cloud_run_shared_env_variable_secrets = [ + cloud_run_shared_env_variable_secrets = concat([ { name = "DC_API_KEY" value_source = { @@ -76,15 +85,16 @@ locals { version = "latest" } } - }, + } + ], var.use_spanner ? [] : [ { name = "DB_PASS" value_source = { secret_key_ref = { - secret = google_secret_manager_secret.mysql_password_secret.secret_id + secret = google_secret_manager_secret.mysql_password_secret[0].id version = "latest" } } } - ] + ]) } diff --git a/infra/dcp/modules/cdc/main.tf b/infra/dcp/modules/cdc/main.tf index 308a064..aff1fb4 100644 --- a/infra/dcp/modules/cdc/main.tf +++ b/infra/dcp/modules/cdc/main.tf @@ -44,7 +44,8 @@ resource "google_project_service" "required_apis" { # Cloud SQL instance for Data Commons resource "google_sql_database_instance" "mysql_instance" { - name = "${var.namespace}-${var.mysql_instance_name}" + count = var.use_spanner ? 0 : 1 + name = "${local.name_prefix}${var.mysql_instance_name}" database_version = var.mysql_database_version region = var.region @@ -64,8 +65,9 @@ resource "google_sql_database_instance" "mysql_instance" { # MySQL Database resource "google_sql_database" "mysql_db" { + count = var.use_spanner ? 0 : 1 name = var.mysql_database_name - instance = google_sql_database_instance.mysql_instance.name + instance = google_sql_database_instance.mysql_instance[0].name } # Generate a random password for the MySQL user @@ -76,7 +78,8 @@ resource "random_password" "mysql_password" { # Store MySQL password in Secret Manager resource "google_secret_manager_secret" "mysql_password_secret" { - secret_id = "${var.namespace}-mysql-password" + count = var.use_spanner ? 0 : 1 + secret_id = "${local.name_prefix}mysql-password" replication { auto {} @@ -85,21 +88,23 @@ resource "google_secret_manager_secret" "mysql_password_secret" { } resource "google_secret_manager_secret_version" "mysql_password_secret_version" { - secret = google_secret_manager_secret.mysql_password_secret.id + count = var.use_spanner ? 0 : 1 + secret = google_secret_manager_secret.mysql_password_secret[0].id secret_data = random_password.mysql_password.result } # MySQL User resource "google_sql_user" "mysql_user" { + count = var.use_spanner ? 0 : 1 name = var.mysql_user - instance = google_sql_database_instance.mysql_instance.name + instance = google_sql_database_instance.mysql_instance[0].name password = random_password.mysql_password.result } # Optional Redis instance resource "google_redis_instance" "redis_instance" { count = var.enable_redis ? 1 : 0 - name = "${var.namespace}-${var.redis_instance_name}" + name = "${local.name_prefix}${var.redis_instance_name}" memory_size_gb = var.redis_memory_size_gb tier = var.redis_tier region = var.region @@ -116,7 +121,7 @@ resource "google_redis_instance" "redis_instance" { # VPC Access Connector for private connections resource "google_vpc_access_connector" "connector" { - name = "${var.namespace}-vpc-conn-v2" + name = "${local.name_prefix}vpc-conn" region = var.region network = data.google_compute_network.default.name ip_cidr_range = var.vpc_connector_cidr @@ -138,8 +143,8 @@ resource "google_storage_bucket" "data_bucket" { # Maps API Key resource "google_apikeys_key" "maps_api_key" { count = var.maps_api_key == null && !var.disable_google_maps ? 1 : 0 - name = "${var.namespace}-maps-key" - display_name = "Maps API Key for ${var.namespace}" + name = "${local.name_prefix}maps-key" + display_name = "Maps API Key for ${var.namespace != "" ? var.namespace : "Data Commons"}" project = var.project_id restrictions { @@ -152,7 +157,7 @@ resource "google_apikeys_key" "maps_api_key" { # Cloud Run job for data management resource "google_cloud_run_v2_job" "dc_data_job" { - name = "${var.namespace}-datacommons-data-job" + name = "${local.name_prefix}datacommons-data-job" location = var.region deletion_protection = var.deletion_protection @@ -219,8 +224,7 @@ resource "google_cloud_run_v2_job" "dc_data_job" { # Run the db init job on terraform apply to create tables resource "null_resource" "run_db_init" { depends_on = [ - google_cloud_run_v2_job.dc_data_job, - google_sql_database_instance.mysql_instance + google_cloud_run_v2_job.dc_data_job ] triggers = { @@ -230,7 +234,7 @@ resource "null_resource" "run_db_init" { provisioner "local-exec" { command = < Date: Wed, 11 Mar 2026 08:16:02 -0400 Subject: [PATCH 3/5] Fix the evaluation of whether to create DCP instance --- infra/dcp/modules/dcp/spanner.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/infra/dcp/modules/dcp/spanner.tf b/infra/dcp/modules/dcp/spanner.tf index 5b1d142..1e01083 100644 --- a/infra/dcp/modules/dcp/spanner.tf +++ b/infra/dcp/modules/dcp/spanner.tf @@ -1,6 +1,6 @@ resource "google_spanner_instance" "main" { count = var.create_spanner_instance ? 1 : 0 - name = var.create_spanner_instance ? "${local.name_prefix}${var.spanner_instance_id}" : "unused" + name = (var.create_spanner_instance && var.spanner_instance_id != "") ? "${local.name_prefix}${var.spanner_instance_id}" : "unused-instance" config = "regional-${var.region}" display_name = "${local.name_prefix}${var.spanner_instance_id}" processing_units = var.spanner_processing_units @@ -10,7 +10,7 @@ resource "google_spanner_instance" "main" { resource "google_spanner_database" "database" { count = var.create_spanner_db ? 1 : 0 - instance = var.create_spanner_instance ? google_spanner_instance.main[0].name : (var.spanner_instance_id != "" ? "${local.name_prefix}${var.spanner_instance_id}" : "unused") + instance = var.create_spanner_instance ? google_spanner_instance.main[0].name : (var.spanner_instance_id != "" ? "${local.name_prefix}${var.spanner_instance_id}" : "unused-instance") name = "${local.name_prefix}${var.spanner_database_id}" # Prevent deletion of data From 90ba4f00a7f1a0dbad715d4d0b54f54519731852 Mon Sep 17 00:00:00 2001 From: Gabriel Mechali Date: Wed, 11 Mar 2026 08:52:07 -0400 Subject: [PATCH 4/5] Fix the toggles on Spanner instance creation vs reuse --- infra/dcp/main.tf | 9 ++++----- infra/dcp/modules/cdc/service_account.tf | 18 ++++++++++++++---- infra/dcp/modules/dcp/cloudrun.tf | 4 ++-- infra/dcp/modules/dcp/outputs.tf | 4 ++-- infra/dcp/modules/dcp/spanner.tf | 8 ++++---- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/infra/dcp/main.tf b/infra/dcp/main.tf index 9afe8e2..01ed710 100644 --- a/infra/dcp/main.tf +++ b/infra/dcp/main.tf @@ -18,9 +18,8 @@ provider "google" { # Enable required APIs for both stacks resource "google_project_service" "apis" { - for_each = toset([ + for_each = toset(concat([ "run.googleapis.com", - "spanner.googleapis.com", "iam.googleapis.com", "sqladmin.googleapis.com", "redis.googleapis.com", @@ -28,7 +27,7 @@ resource "google_project_service" "apis" { "vpcaccess.googleapis.com", "artifactregistry.googleapis.com", "compute.googleapis.com" - ]) + ], var.enable_dcp ? ["spanner.googleapis.com"] : [])) service = each.key disable_on_destroy = false @@ -118,7 +117,7 @@ module "cdc" { # Ensure Spanner instance ID is provided when not creating a new one check "spanner_instance_id_provided" { assert { - condition = var.dcp_create_spanner_instance || var.dcp_spanner_instance_id != "" - error_message = "dcp_spanner_instance_id must be provided when dcp_create_spanner_instance is false." + condition = !var.enable_dcp || var.dcp_create_spanner_instance || var.dcp_spanner_instance_id != "" + error_message = "dcp_spanner_instance_id must be provided when reusing an existing instance (dcp_create_spanner_instance = false)." } } diff --git a/infra/dcp/modules/cdc/service_account.tf b/infra/dcp/modules/cdc/service_account.tf index 0892a2b..7ee8520 100644 --- a/infra/dcp/modules/cdc/service_account.tf +++ b/infra/dcp/modules/cdc/service_account.tf @@ -5,10 +5,20 @@ resource "google_service_account" "datacommons_service_account" { } resource "google_project_iam_member" "datacommons_service_account_roles" { - for_each = toset(["roles/compute.networkViewer", "roles/redis.editor", "roles/cloudsql.admin", "roles/storage.objectAdmin", "roles/run.admin", "roles/vpcaccess.user", "roles/iam.serviceAccountUser", "roles/secretmanager.secretAccessor", "roles/spanner.databaseUser"]) - project = var.project_id - member = "serviceAccount:${google_service_account.datacommons_service_account.email}" - role = each.value + for_each = setsubtract(toset([ + "roles/compute.networkViewer", + "roles/redis.editor", + "roles/cloudsql.admin", + "roles/storage.objectAdmin", + "roles/run.admin", + "roles/vpcaccess.user", + "roles/iam.serviceAccountUser", + "roles/secretmanager.secretAccessor", + "roles/spanner.databaseUser" + ]), var.use_spanner ? [] : ["roles/spanner.databaseUser"]) + project = var.project_id + member = "serviceAccount:${google_service_account.datacommons_service_account.email}" + role = each.value } resource "google_secret_manager_secret" "dc_api_key" { diff --git a/infra/dcp/modules/dcp/cloudrun.tf b/infra/dcp/modules/dcp/cloudrun.tf index cd6999f..2359b22 100644 --- a/infra/dcp/modules/dcp/cloudrun.tf +++ b/infra/dcp/modules/dcp/cloudrun.tf @@ -34,11 +34,11 @@ resource "google_cloud_run_v2_service" "dcp_service" { } env { name = "GCP_SPANNER_INSTANCE_ID" - value = var.create_spanner_instance ? google_spanner_instance.main[0].name : "${local.name_prefix}${var.spanner_instance_id}" + value = var.create_spanner_instance ? (var.spanner_instance_id != "" ? "${local.name_prefix}${var.spanner_instance_id}" : "${local.name_prefix}dcp-instance") : var.spanner_instance_id } env { name = "GCP_SPANNER_DATABASE_NAME" - value = var.create_spanner_db ? google_spanner_database.database[0].name : "${local.name_prefix}${var.spanner_database_id}" + value = var.create_spanner_db ? (var.spanner_database_id != "" ? "${local.name_prefix}${var.spanner_database_id}" : "${local.name_prefix}dcp-db") : var.spanner_database_id } } } diff --git a/infra/dcp/modules/dcp/outputs.tf b/infra/dcp/modules/dcp/outputs.tf index 928b2ae..107e49e 100644 --- a/infra/dcp/modules/dcp/outputs.tf +++ b/infra/dcp/modules/dcp/outputs.tf @@ -7,9 +7,9 @@ output "service_account_email" { } output "spanner_instance_id" { - value = var.create_spanner_instance ? google_spanner_instance.main[0].name : "${local.name_prefix}${var.spanner_instance_id}" + value = var.create_spanner_instance ? (var.spanner_instance_id != "" ? "${local.name_prefix}${var.spanner_instance_id}" : "${local.name_prefix}dcp-instance") : var.spanner_instance_id } output "spanner_database_id" { - value = var.create_spanner_db ? google_spanner_database.database[0].name : "${local.name_prefix}${var.spanner_database_id}" + value = var.create_spanner_db ? (var.spanner_database_id != "" ? "${local.name_prefix}${var.spanner_database_id}" : "${local.name_prefix}dcp-db") : var.spanner_database_id } diff --git a/infra/dcp/modules/dcp/spanner.tf b/infra/dcp/modules/dcp/spanner.tf index 1e01083..127795b 100644 --- a/infra/dcp/modules/dcp/spanner.tf +++ b/infra/dcp/modules/dcp/spanner.tf @@ -1,8 +1,8 @@ resource "google_spanner_instance" "main" { count = var.create_spanner_instance ? 1 : 0 - name = (var.create_spanner_instance && var.spanner_instance_id != "") ? "${local.name_prefix}${var.spanner_instance_id}" : "unused-instance" + name = var.create_spanner_instance ? (var.spanner_instance_id != "" ? "${local.name_prefix}${var.spanner_instance_id}" : "${local.name_prefix}dcp-instance") : var.spanner_instance_id config = "regional-${var.region}" - display_name = "${local.name_prefix}${var.spanner_instance_id}" + display_name = var.create_spanner_instance ? (var.spanner_instance_id != "" ? "${local.name_prefix}${var.spanner_instance_id}" : "${local.name_prefix}dcp-instance") : var.spanner_instance_id processing_units = var.spanner_processing_units force_destroy = !var.deletion_protection @@ -10,8 +10,8 @@ resource "google_spanner_instance" "main" { resource "google_spanner_database" "database" { count = var.create_spanner_db ? 1 : 0 - instance = var.create_spanner_instance ? google_spanner_instance.main[0].name : (var.spanner_instance_id != "" ? "${local.name_prefix}${var.spanner_instance_id}" : "unused-instance") - name = "${local.name_prefix}${var.spanner_database_id}" + instance = var.create_spanner_instance ? google_spanner_instance.main[0].name : (var.spanner_instance_id != "" ? var.spanner_instance_id : "unused-instance") + name = var.create_spanner_db ? (var.spanner_database_id != "" ? "${local.name_prefix}${var.spanner_database_id}" : "${local.name_prefix}dcp-db") : var.spanner_database_id # Prevent deletion of data deletion_protection = var.deletion_protection From ac391a66392aaac549fdc0dcfbeaf5d3898c5f39 Mon Sep 17 00:00:00 2001 From: Gabriel Mechali Date: Wed, 11 Mar 2026 09:17:37 -0400 Subject: [PATCH 5/5] Move the MAPS Api Key into a secret. --- infra/dcp/modules/cdc/locals.tf | 9 +++++++++ infra/dcp/modules/cdc/main.tf | 4 ---- infra/dcp/modules/cdc/service_account.tf | 14 ++++++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/infra/dcp/modules/cdc/locals.tf b/infra/dcp/modules/cdc/locals.tf index 8de14d2..35079e2 100644 --- a/infra/dcp/modules/cdc/locals.tf +++ b/infra/dcp/modules/cdc/locals.tf @@ -85,6 +85,15 @@ locals { version = "latest" } } + }, + { + name = "MAPS_API_KEY" + value_source = { + secret_key_ref = { + secret = var.disable_google_maps ? "" : google_secret_manager_secret.maps_api_key[0].secret_id + version = "latest" + } + } } ], var.use_spanner ? [] : [ { diff --git a/infra/dcp/modules/cdc/main.tf b/infra/dcp/modules/cdc/main.tf index aff1fb4..05b72e9 100644 --- a/infra/dcp/modules/cdc/main.tf +++ b/infra/dcp/modules/cdc/main.tf @@ -293,10 +293,6 @@ resource "google_cloud_run_v2_service" "dc_web_service" { } } - env { - name = "MAPS_API_KEY" - value = local.maps_api_key - } env { name = "GOOGLE_ANALYTICS_TAG_ID" value = var.google_analytics_tag_id != null ? var.google_analytics_tag_id : "" diff --git a/infra/dcp/modules/cdc/service_account.tf b/infra/dcp/modules/cdc/service_account.tf index 7ee8520..8b87316 100644 --- a/infra/dcp/modules/cdc/service_account.tf +++ b/infra/dcp/modules/cdc/service_account.tf @@ -32,3 +32,17 @@ resource "google_secret_manager_secret_version" "dc_api_key_version" { secret = google_secret_manager_secret.dc_api_key.id secret_data = var.dc_api_key } + +resource "google_secret_manager_secret" "maps_api_key" { + count = var.disable_google_maps ? 0 : 1 + secret_id = "${local.name_prefix}maps-api-key" + replication { + auto {} + } +} + +resource "google_secret_manager_secret_version" "maps_api_key_version" { + count = var.disable_google_maps ? 0 : 1 + secret = google_secret_manager_secret.maps_api_key[0].id + secret_data = local.maps_api_key +}