diff --git a/.gitignore b/.gitignore index 2d405b66..43df87d6 100644 --- a/.gitignore +++ b/.gitignore @@ -227,6 +227,7 @@ crash.*.log # to change depending on the environment. *.tfvars *.tfvars.json +**/*.tfvars # Ignore override files as they are usually used to override resources locally and so # are not checked in diff --git a/airflow/dags/cwl_dag.py b/airflow/dags/cwl_dag.py index f159464d..06ae84ea 100644 --- a/airflow/dags/cwl_dag.py +++ b/airflow/dags/cwl_dag.py @@ -69,8 +69,8 @@ is_paused_upon_creation=False, catchup=False, schedule=None, - max_active_runs=100, - max_active_tasks=300, + max_active_runs=1000, + max_active_tasks=3000, default_args=dag_default_args, params={ "cwl_workflow": Param( diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index d87fa1f9..c1335d67 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -93,10 +93,11 @@ scheduler: values: ["on-demand"] - key: "karpenter.k8s.aws/instance-family" operator: "In" - values: ["c6i", "c5"] # Choosing compute-optimized instances + # values: ["c6i", "c5"] # Choosing compute-optimized instances + values: ["r5"] # Choosing memory-optimized instance - key: "karpenter.k8s.aws/instance-cpu" operator: "In" - values: ["2", "4"] # Scheduler might benefit from higher CPU + values: ["8"] topologySpreadConstraints: - maxSkew: 1 topologyKey: "topology.kubernetes.io/zone" @@ -117,6 +118,23 @@ triggerer: keda: enabled: true minReplicaCount: 1 + nodeSelector: + "karpenter.sh/nodepool": "airflow-core-components" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "karpenter.sh/capacity-type" + operator: "In" + values: [ "on-demand" ] + - key: "karpenter.k8s.aws/instance-family" + operator: "In" + # values: ["c6i", "c5"] # Choosing compute-optimized instances + values: [ "r5" ] # Choosing memory-optimized instance + - key: "karpenter.k8s.aws/instance-cpu" + operator: "In" + values: [ "8" ] # Scheduler might benefit from higher CPU postgresql: enabled: false @@ -124,6 +142,23 @@ postgresql: pgbouncer: enabled: true replicas: 3 + nodeSelector: + "karpenter.sh/nodepool": "airflow-core-components" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "karpenter.sh/capacity-type" + operator: "In" + values: [ "on-demand" ] + - key: "karpenter.k8s.aws/instance-family" + operator: "In" + # values: ["c6i", "c5"] # Choosing compute-optimized instances + values: [ "r5" ] # Choosing memory-optimized instance + - key: "karpenter.k8s.aws/instance-cpu" + operator: "In" + values: [ "8" ] # Scheduler might benefit from higher CPU webserverSecretKeySecretName: ${webserver_secret_name} @@ -147,10 +182,11 @@ webserver: values: ["on-demand"] - key: "karpenter.k8s.aws/instance-family" operator: "In" - values: ["c6i", "c5"] # Choosing compute-optimized instances + # values: ["c6i", "c5"] # Choosing compute-optimized instances + values: ["r5"] # Choosing memory-optimized instance - key: "karpenter.k8s.aws/instance-cpu" operator: "In" - values: ["2", "4"] # Balancing between CPU and memory + values: ["8"] # Balancing between CPU and memory topologySpreadConstraints: - maxSkew: 1 topologyKey: "topology.kubernetes.io/zone" @@ -184,10 +220,11 @@ workers: - matchExpressions: - key: "karpenter.k8s.aws/instance-family" operator: "In" - values: ["t3"] + # values: ["c6i", "c5"] # Choosing compute-optimized instances + values: ["r5"] # Choosing memory-optimized instance - key: "karpenter.k8s.aws/instance-cpu" operator: "In" - values: ["2", "4"] + values: ["8"] topologySpreadConstraints: - maxSkew: 1 topologyKey: "topology.kubernetes.io/zone" @@ -263,6 +300,23 @@ dags: dagProcessor: enabled: true replicas: 3 + nodeSelector: + "karpenter.sh/nodepool": "airflow-core-components" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "karpenter.sh/capacity-type" + operator: "In" + values: [ "on-demand" ] + - key: "karpenter.k8s.aws/instance-family" + operator: "In" + # values: ["c6i", "c5"] # Choosing compute-optimized instances + values: [ "r5" ] # Choosing memory-optimized instance + - key: "karpenter.k8s.aws/instance-cpu" + operator: "In" + values: [ "8" ] # Scheduler might benefit from higher CPU env: - name: "AIRFLOW_VAR_KUBERNETES_PIPELINE_NAMESPACE" diff --git a/airflow/plugins/unity_sps_utils.py b/airflow/plugins/unity_sps_utils.py index 0f731da1..efb2fbc4 100644 --- a/airflow/plugins/unity_sps_utils.py +++ b/airflow/plugins/unity_sps_utils.py @@ -16,7 +16,7 @@ # Note: each Pod is assigned the same label to assure that (via the anti-affinity requirements) # two Pods with the same label cannot run on the same Node -SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.5.5" +SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.5.6" NODE_POOL_DEFAULT = "airflow-kubernetes-pod-operator" NODE_POOL_HIGH_WORKLOAD = "airflow-kubernetes-pod-operator-high-workload" @@ -27,11 +27,16 @@ LOG_LEVEL_TYPE = {10: "DEBUG", 20: "INFO", 30: "WARNING", 40: "ERROR", 50: "CRITICAL"} EC2_TYPES = { - "t3.micro": { - "desc": "General Purpose", - "cpu": 1, - "memory": 1, - }, + # "t3.nano": { + # "desc": "General Purpose", + # "cpu": 1, + # "memory": 0.5, + # }, + # "t3.micro": { + # "desc": "General Purpose", + # "cpu": 2, + # "memory": 1, + # }, "t3.small": { "desc": "General Purpose", "cpu": 2, @@ -97,23 +102,28 @@ "cpu": 32, "memory": 64, }, - "m5ad.large": { - "desc": "General Purpose with SSD storage", - "cpu": 2, - "memory": 8, + "c6i.12xlarge": { + "desc": "Compute Optimized", + "cpu": 48, + "memory": 96, + }, + "c6i.16xlarge": { + "desc": "Compute Optimized", + "cpu": 64, + "memory": 128, }, "m5ad.xlarge": { - "desc": "General Purpose with SSD storage", + "desc": "General Purpose with SSD local storage", "cpu": 4, "memory": 16, }, "m5ad.2xlarge": { - "desc": "General Purpose with SSD storage", + "desc": "General Purpose with SSD local storage", "cpu": 8, "memory": 32, }, "m5ad.4xlarge": { - "desc": "General Purpose with SSD storage", + "desc": "General Purpose with SSD local storage", "cpu": 16, "memory": 64, }, diff --git a/terraform-unity/.terraform.lock.hcl b/terraform-unity/.terraform.lock.hcl index 3cd21ef2..685d2384 100644 --- a/terraform-unity/.terraform.lock.hcl +++ b/terraform-unity/.terraform.lock.hcl @@ -171,22 +171,3 @@ provider "registry.terraform.io/hashicorp/time" { "zh:e6ac6bba391afe728a099df344dbd6481425b06d61697522017b8f7a59957d44", ] } - -provider "registry.terraform.io/hashicorp/tls" { - version = "4.0.6" - hashes = [ - "h1:n3M50qfWfRSpQV9Pwcvuse03pEizqrmYEryxKky4so4=", - "zh:10de0d8af02f2e578101688fd334da3849f56ea91b0d9bd5b1f7a243417fdda8", - "zh:37fc01f8b2bc9d5b055dc3e78bfd1beb7c42cfb776a4c81106e19c8911366297", - "zh:4578ca03d1dd0b7f572d96bd03f744be24c726bfd282173d54b100fd221608bb", - "zh:6c475491d1250050765a91a493ef330adc24689e8837a0f07da5a0e1269e11c1", - "zh:81bde94d53cdababa5b376bbc6947668be4c45ab655de7aa2e8e4736dfd52509", - "zh:abdce260840b7b050c4e401d4f75c7a199fafe58a8b213947a258f75ac18b3e8", - "zh:b754cebfc5184873840f16a642a7c9ef78c34dc246a8ae29e056c79939963c7a", - "zh:c928b66086078f9917aef0eec15982f2e337914c5c4dbc31dd4741403db7eb18", - "zh:cded27bee5f24de6f2ee0cfd1df46a7f88e84aaffc2ecbf3ff7094160f193d50", - "zh:d65eb3867e8f69aaf1b8bb53bd637c99c6b649ba3db16ded50fa9a01076d1a27", - "zh:ecb0c8b528c7a619fa71852bb3fb5c151d47576c5aab2bf3af4db52588722eeb", - "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - ] -} diff --git a/terraform-unity/README.md b/terraform-unity/README.md index 14611932..9877e637 100644 --- a/terraform-unity/README.md +++ b/terraform-unity/README.md @@ -188,15 +188,15 @@ terraform apply -no-color 2>&1 | tee apply_output.txt | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [airflow\_docker\_images](#input\_airflow\_docker\_images) | Docker images for the associated Airflow services. |
object({
airflow = object({
name = string
tag = string
})
})
|
{
"airflow": {
"name": "ghcr.io/unity-sds/unity-sps/sps-airflow",
"tag": "2.4.0"
}
}
| no | +| [airflow\_docker\_images](#input\_airflow\_docker\_images) | Docker images for the associated Airflow services. |
object({
airflow = object({
name = string
tag = string
})
})
|
{
"airflow": {
"name": "ghcr.io/unity-sds/unity-sps/sps-airflow",
"tag": "2.5.6"
}
}
| no | | [airflow\_webserver\_password](#input\_airflow\_webserver\_password) | The password for the Airflow webserver and UI. | `string` | n/a | yes | | [airflow\_webserver\_username](#input\_airflow\_webserver\_username) | The username for the Airflow webserver and UI. | `string` | `"admin"` | no | -| [dag\_catalog\_repo](#input\_dag\_catalog\_repo) | Git repository that stores the catalog of Airflow DAGs. |
object({
url = string
ref = string
dags_directory_path = string
})
|
{
"dags_directory_path": "airflow/dags",
"ref": "2.4.0",
"url": "https://github.com/unity-sds/unity-sps.git"
}
| no | +| [dag\_catalog\_repo](#input\_dag\_catalog\_repo) | Git repository that stores the catalog of Airflow DAGs. |
object({
url = string
ref = string
dags_directory_path = string
})
|
{
"dags_directory_path": "airflow/dags",
"ref": "2.5.6",
"url": "https://github.com/unity-sds/unity-sps.git"
}
| no | | [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | `""` | no | | [helm\_charts](#input\_helm\_charts) | Helm charts for the associated services. |
map(object({
repository = string
chart = string
version = string
}))
|
{
"airflow": {
"chart": "airflow",
"repository": "https://airflow.apache.org",
"version": "1.15.0"
},
"keda": {
"chart": "keda",
"repository": "https://kedacore.github.io/charts",
"version": "v2.15.1"
}
}
| no | | [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | | [karpenter\_node\_classes](#input\_karpenter\_node\_classes) | Configuration for karpenter\_node\_classes |
map(object({
volume_size = string
}))
|
{
"airflow-kubernetes-pod-operator-high-workload": {
"volume_size": "300Gi"
},
"default": {
"volume_size": "30Gi"
}
}
| no | -| [karpenter\_node\_pools](#input\_karpenter\_node\_pools) | Configuration for Karpenter node pools |
map(object({
requirements : list(object({
key : string
operator : string
values : list(string)
}))
nodeClassRef : string
limits : object({
cpu : string
memory : string
})
disruption : object({
consolidationPolicy : string
consolidateAfter : string
})
}))
|
{
"airflow-celery-workers": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "80",
"memory": "320Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"9"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-core-components": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "40",
"memory": "160Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-kubernetes-pod-operator": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "100",
"memory": "400Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"4095"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-kubernetes-pod-operator-high-workload": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "528",
"memory": "1056Gi"
},
"nodeClassRef": "airflow-kubernetes-pod-operator-high-workload",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"65"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"4095"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"131073"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
}
}
| no | +| [karpenter\_node\_pools](#input\_karpenter\_node\_pools) | Configuration for Karpenter node pools |
map(object({
requirements : list(object({
key : string
operator : string
values : list(string)
}))
nodeClassRef : string
limits : object({
cpu : string
memory : string
})
disruption : object({
consolidationPolicy : string
consolidateAfter : string
})
}))
|
{
"airflow-celery-workers": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "80",
"memory": "320Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5",
"m5ad"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"4095"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"65537"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-core-components": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "40",
"memory": "160Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5",
"m5ad"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"4095"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"65537"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-kubernetes-pod-operator": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "6400",
"memory": "12800Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"m5ad",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"0"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"511"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"65537"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-kubernetes-pod-operator-high-workload": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "6400",
"memory": "12800Gi"
},
"nodeClassRef": "airflow-kubernetes-pod-operator-high-workload",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"m5ad",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"0"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"65"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"511"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"262145"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
}
}
| no | | [kubeconfig\_filepath](#input\_kubeconfig\_filepath) | The path to the kubeconfig file for the Kubernetes cluster. | `string` | n/a | yes | | [mcp\_ami\_owner\_id](#input\_mcp\_ami\_owner\_id) | The owner ID of the MCP AMIs | `string` | `"794625662971"` | no | | [ogc\_processes\_docker\_images](#input\_ogc\_processes\_docker\_images) | Docker images for the associated OGC Processes API services. |
object({
ogc_processes_api = object({
name = string
tag = string
})
git_sync = object({
name = string
tag = string
})
redis = object({
name = string
tag = string
})
})
|
{
"git_sync": {
"name": "registry.k8s.io/git-sync/git-sync",
"tag": "v4.2.4"
},
"ogc_processes_api": {
"name": "ghcr.io/unity-sds/unity-sps-ogc-processes-api/unity-sps-ogc-processes-api",
"tag": "2.0.0"
},
"redis": {
"name": "redis",
"tag": "7.4.0"
}
}
| no | diff --git a/terraform-unity/modules/terraform-unity-sps-eks/README.md b/terraform-unity/modules/terraform-unity-sps-eks/README.md index a27256a6..ae2ee64b 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/README.md +++ b/terraform-unity/modules/terraform-unity-sps-eks/README.md @@ -7,25 +7,29 @@ |------|---------| | [terraform](#requirement\_terraform) | ~> 1.8.2 | | [aws](#requirement\_aws) | 5.67.0 | +| [null](#requirement\_null) | 3.2.3 | ## Providers | Name | Version | |------|---------| | [aws](#provider\_aws) | 5.67.0 | +| [null](#provider\_null) | 3.2.3 | ## Modules | Name | Source | Version | |------|--------|---------| -| [unity-eks](#module\_unity-eks) | git::https://github.com/unity-sds/unity-cs-infra.git//terraform-unity-eks_module | unity-sps-2.4.0 | +| [unity-eks](#module\_unity-eks) | git::https://github.com/unity-sds/unity-cs-infra.git//terraform-unity-eks_module | unity-sps-2.4.1-hotfix1 | ## Resources | Name | Type | |------|------| | [aws_iam_role_policy.sps_airflow_eks_inline_policy](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/resources/iam_role_policy) | resource | +| [null_resource.eks_post_deployment_actions](https://registry.terraform.io/providers/hashicorp/null/3.2.3/docs/resources/resource) | resource | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/caller_identity) | data source | +| [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/5.67.0/docs/data-sources/region) | data source | ## Inputs @@ -33,7 +37,7 @@ |------|-------------|------|---------|:--------:| | [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | `""` | no | | [installprefix](#input\_installprefix) | The install prefix for the service area (unused) | `string` | `""` | no | -| [nodegroups](#input\_nodegroups) | A map of node group configurations |
map(object({
create_iam_role = optional(bool)
iam_role_arn = optional(string)
ami_id = optional(string)
min_size = optional(number)
max_size = optional(number)
desired_size = optional(number)
instance_types = optional(list(string))
capacity_type = optional(string)
enable_bootstrap_user_data = optional(bool)
metadata_options = optional(map(any))
block_device_mappings = optional(map(object({
device_name = string
ebs = object({
volume_size = number
volume_type = string
encrypted = bool
delete_on_termination = bool
})
})))
}))
|
{
"defaultGroup": {
"block_device_mappings": {
"xvda": {
"device_name": "/dev/xvda",
"ebs": {
"delete_on_termination": true,
"encrypted": true,
"volume_size": 100,
"volume_type": "gp2"
}
}
},
"desired_size": 1,
"instance_types": [
"t3.xlarge"
],
"max_size": 1,
"metadata_options": {
"http_endpoint": "enabled",
"http_put_response_hop_limit": 3
},
"min_size": 1
}
}
| no | +| [nodegroups](#input\_nodegroups) | A map of node group configurations |
map(object({
create_iam_role = optional(bool)
iam_role_arn = optional(string)
ami_id = optional(string)
min_size = optional(number)
max_size = optional(number)
desired_size = optional(number)
instance_types = optional(list(string))
capacity_type = optional(string)
enable_bootstrap_user_data = optional(bool)
metadata_options = optional(map(any))
block_device_mappings = optional(map(object({
device_name = string
ebs = object({
volume_size = number
volume_type = string
encrypted = bool
delete_on_termination = bool
})
})))
}))
|
{
"defaultGroup": {
"block_device_mappings": {
"xvda": {
"device_name": "/dev/xvda",
"ebs": {
"delete_on_termination": true,
"encrypted": true,
"volume_size": 100,
"volume_type": "gp2"
}
}
},
"desired_size": 1,
"instance_types": [
"t3.2xlarge"
],
"max_size": 1,
"metadata_options": {
"http_endpoint": "enabled",
"http_put_response_hop_limit": 3
},
"min_size": 1
}
}
| no | | [project](#input\_project) | The project or mission deploying Unity SPS | `string` | `"unity"` | no | | [release](#input\_release) | The software release version. | `string` | `"24.4"` | no | | [service\_area](#input\_service\_area) | The service area owner of the resources being deployed | `string` | `"sps"` | no | diff --git a/terraform-unity/modules/terraform-unity-sps-eks/variables.tf b/terraform-unity/modules/terraform-unity-sps-eks/variables.tf index c6b006f7..2fec4021 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-eks/variables.tf @@ -67,7 +67,7 @@ variable "nodegroups" { })) default = { defaultGroup = { - instance_types = ["t3.xlarge"] + instance_types = ["t3.2xlarge"] min_size = 1 max_size = 1 desired_size = 1 diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index 120073fe..22531f2b 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -88,7 +88,7 @@ resource "kubernetes_deployment" "ogc_processes_api" { match_expressions { key = "karpenter.k8s.aws/instance-cpu" operator = "In" - values = ["2", "4"] + values = ["4"] } } } diff --git a/terraform-unity/variables.tf b/terraform-unity/variables.tf index a3674c4c..edddb6b0 100644 --- a/terraform-unity/variables.tf +++ b/terraform-unity/variables.tf @@ -73,7 +73,7 @@ variable "airflow_docker_images" { default = { airflow = { name = "ghcr.io/unity-sds/unity-sps/sps-airflow" - tag = "2.4.0" + tag = "2.5.6" } } } @@ -161,7 +161,7 @@ variable "karpenter_node_pools" { { key = "karpenter.k8s.aws/instance-cpu" operator = "Gt" - values = ["1"] // From 2 inclusive + values = ["0"] // From 0.5 inclusive }, { key = "karpenter.k8s.aws/instance-cpu" @@ -171,12 +171,13 @@ variable "karpenter_node_pools" { { key = "karpenter.k8s.aws/instance-memory" operator = "Gt" - values = ["4095"] // 4 GiB = 4096 MiB + values = ["511"] // 0.5 GiB = 512 MiB }, { key = "karpenter.k8s.aws/instance-memory" operator = "Lt" - values = ["131073"] // 128 GiB = 131072 MiB + # values = ["131073"] // 128 GiB = 131072 MiB + values = ["262145"] // 256 GiB = 262144 MiB }, { key = "karpenter.k8s.aws/instance-hypervisor", @@ -185,8 +186,8 @@ variable "karpenter_node_pools" { } ] limits = { - cpu = "528" // 11 x 48 - memory = "1056Gi" // 11 x 96 + cpu = "6400" # 64 CPU X 100 + memory = "12800Gi" # 128 Gi X 100 } disruption = { consolidationPolicy = "WhenEmpty" @@ -204,7 +205,7 @@ variable "karpenter_node_pools" { { key = "karpenter.k8s.aws/instance-cpu" operator = "Gt" - values = ["1"] // From 2 inclusive + values = ["0"] // From 0.5 inclusive }, { key = "karpenter.k8s.aws/instance-cpu" @@ -214,12 +215,13 @@ variable "karpenter_node_pools" { { key = "karpenter.k8s.aws/instance-memory" operator = "Gt" - values = ["4095"] // 4 GiB = 4096 MiB + # values = ["4095"] // 4 GiB = 4096 MiB + values = ["511"] // 0.5 GiB = 512 MiB }, { key = "karpenter.k8s.aws/instance-memory" operator = "Lt" - values = ["32769"] // 32 GiB = 32768 MiB + values = ["65537"] // 64 GiB = 65536 MiB }, { key = "karpenter.k8s.aws/instance-hypervisor", @@ -228,8 +230,8 @@ variable "karpenter_node_pools" { } ] limits = { - cpu = "100" - memory = "400Gi" + cpu = "6400" # 64 CPU X 100 + memory = "12800Gi" # 128 Gi X 100 } disruption = { consolidationPolicy = "WhenEmpty" @@ -242,7 +244,7 @@ variable "karpenter_node_pools" { { key = "karpenter.k8s.aws/instance-family" operator = "In" - values = ["m7i", "m6i", "m5", "t3", "c7i", "c6i", "c5", "r7i", "r6i", "r5"] + values = ["m7i", "m6i", "m5", "t3", "c7i", "c6i", "c5", "r7i", "r6i", "r5", "m5ad"] }, { key = "karpenter.k8s.aws/instance-cpu" @@ -252,17 +254,17 @@ variable "karpenter_node_pools" { { key = "karpenter.k8s.aws/instance-cpu" operator = "Lt" - values = ["9"] // To 8 inclusive + values = ["17"] // To 16 inclusive }, { key = "karpenter.k8s.aws/instance-memory" operator = "Gt" - values = ["8191"] // From 8 GB inclusive + values = ["4095"] // From 4 GB inclusive }, { key = "karpenter.k8s.aws/instance-memory" operator = "Lt" - values = ["32769"] // To 32 GB inclusive + values = ["65537"] // To 64 GB inclusive }, { key = "karpenter.k8s.aws/instance-hypervisor", @@ -285,7 +287,7 @@ variable "karpenter_node_pools" { { key = "karpenter.k8s.aws/instance-family" operator = "In" - values = ["m7i", "m6i", "m5", "t3", "c7i", "c6i", "c5", "r7i", "r6i", "r5"] + values = ["m7i", "m6i", "m5", "t3", "c7i", "c6i", "c5", "r7i", "r6i", "r5", "m5ad"] }, { key = "karpenter.k8s.aws/instance-cpu" @@ -300,12 +302,12 @@ variable "karpenter_node_pools" { { key = "karpenter.k8s.aws/instance-memory" operator = "Gt" - values = ["8191"] // From 8 GB inclusive + values = ["4095"] // From 4 GB inclusive }, { key = "karpenter.k8s.aws/instance-memory" operator = "Lt" - values = ["32769"] // To 32 GB inclusive + values = ["65537"] // To 64 GB inclusive }, { key = "karpenter.k8s.aws/instance-hypervisor", @@ -334,7 +336,7 @@ variable "dag_catalog_repo" { }) default = { url = "https://github.com/unity-sds/unity-sps.git" - ref = "2.4.0" + ref = "2.5.6" dags_directory_path = "airflow/dags" } }