Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions .github/workflows/ci-external-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
name: ci [external-config]

on:
push:
branches:
- "**"
pull_request:
branches:
- "main"

jobs:
deploy-at-github:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Kubernetes cluster with Kind
uses: helm/[email protected]
with:
cluster_name: gh-k8s-cluster

- name: Set up Helm
uses: azure/setup-helm@v3
with:
version: v3.12.0

- name: Create CMS namespace
run: |
kubectl create namespace cms

- name: Install Prometheus Operator CRDs
run: |
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
kubectl create namespace monitoring
helm install prometheus-operator prometheus-community/kube-prometheus-stack --namespace monitoring --set prometheusOperator.createCustomResource=false --set defaultRules.create=false --set alertmanager.enabled=false --set prometheus.enabled=false --set grafana.enabled=false

- name: Install KEDA Autoscaler
run: |
helm repo add kedacore https://kedacore.github.io/charts
helm repo update
kubectl create namespace keda
helm install keda kedacore/keda --namespace keda

- name: Mount CVMFS
run: |
kubectl create namespace cvmfs-csi
helm install -n cvmfs-csi cvmfs-csi oci://registry.cern.ch/kubernetes/charts/cvmfs-csi --values cvmfs/values-cvmfs-csi.yaml
kubectl apply -f cvmfs/cvmfs-storageclass.yaml -n cvmfs-csi

- name: Create external Envoy ConfigMap
run: |
kubectl apply -f tests/envoy-config-test.yaml -n cms

- name: Deploy Helm chart with external Envoy config
run: |
helm repo add grafana https://grafana.github.io/helm-charts
helm repo add opentelemetry https://open-telemetry.github.io/opentelemetry-helm-charts
helm dependency build ./helm/supersonic
helm upgrade --install supersonic ./helm/supersonic \
--values tests/values-external-envoy-config.yaml -n cms

- name: CVMFS Mount ready
run: |
kubectl wait --for condition=Ready pod --all -n cvmfs-csi --timeout 120s

- name: Envoy proxy ready
run: |
kubectl wait --for condition=Ready pod -l app.kubernetes.io/component=envoy --timeout 120s -n cms

- name: Triton server ready
run: |
kubectl describe pod -l app.kubernetes.io/component=triton -n cms
kubectl wait --for condition=Ready pod -l app.kubernetes.io/component=triton --timeout 500s -n cms

- name: Validate Deployment
run: |
kubectl get all -n cms

- name: Run Perf Analyzer Job
run: |
kubectl apply -f tests/perf-analyzer-job-ci.yaml
kubectl wait --for=condition=complete job/perf-analyzer-job -n cms --timeout=300s || \
(echo "Perf-analyzer job did not complete in time or failed." && exit 1)

POD_NAME=$(kubectl get pods -n cms -l job-name=perf-analyzer-job -o jsonpath="{.items[0].metadata.name}")
echo "========== Perf Analyzer Logs =========="
kubectl logs -n cms "$POD_NAME"
echo "========================================"

- name: Cleanup
run: kind delete cluster --name gh-k8s-cluster
4 changes: 4 additions & 0 deletions docs/.values-table.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
| envoy.replicas | int | `1` | Number of Envoy Proxy pods in Deployment |
| envoy.image | string | `"envoyproxy/envoy:v1.30.9"` | Envoy Proxy Docker image |
| envoy.args | list | `["--config-path","/etc/envoy/envoy.yaml","--log-level","info","--log-path","/dev/stdout"]` | Arguments for Envoy |
| envoy.external_config | object | `{"configmap_key":"","configmap_name":"","load_from_configmap":false}` | External Envoy configuration settings |
| envoy.external_config.load_from_configmap | bool | `false` | If true, load Envoy configuration from an external ConfigMap instead of generating it dynamically |
| envoy.external_config.configmap_name | string | `""` | Name of the external ConfigMap containing the Envoy configuration |
| envoy.external_config.configmap_key | string | `""` | Key name in the external ConfigMap (defaults to "envoy.yaml") |
| envoy.resources | object | `{"limits":{"cpu":8,"memory":"4G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource requests and limits for Envoy Proxy. Note: an Envoy Proxy with too many connections might run out of CPU |
| envoy.annotations | object | `{}` | Annotations for Envoy pods |
| envoy.nodeSelector | object | `{}` | Node selector for Envoy pods |
Expand Down
38 changes: 36 additions & 2 deletions helm/supersonic/templates/envoy/configmaps.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{{- /* Define the Envoy configuration in YAML at the top level */}}
{{- /* (it will be used only if envoy.external_config.load_from_configmap is false) */}}
{{- define "envoy.configuration.yaml" }}
{{- with . }}
admin:
Expand Down Expand Up @@ -182,6 +183,11 @@ static_resources:
{{- /* Begin iterating over servers */}}
{{- if .Values.envoy.enabled }}

{{- /* Initialize envoyContext variable */}}
{{- $envoyContext := dict }}

{{- if not .Values.envoy.external_config.load_from_configmap }}
{{- /* Only prepare dynamic configuration context when not using external config */}}
{{- /* Define variables for ports */}}
{{- $tritonGrpcPort := "" -}}
{{- $envoyGrpcPort := "" -}}
Expand All @@ -202,7 +208,7 @@ static_resources:
{{- $tritonName := include "supersonic.tritonName" . }}

{{- /* Create a context to pass to the template */}}
{{- $envoyContext := dict
{{- $envoyContext = dict
"envoyAdminPort" $envoyAdminPort
"envoyGrpcPort" $envoyGrpcPort
"tritonGrpcPort" $tritonGrpcPort
Expand All @@ -212,6 +218,7 @@ static_resources:
"tritonName" $tritonName
"root" .
}}
{{- end }}

apiVersion: v1
kind: ConfigMap
Expand All @@ -223,10 +230,37 @@ metadata:
app.kubernetes.io/component: envoy
data:
envoy.yaml: |-
{{- if .Values.envoy.external_config.load_from_configmap }}
{{- if .Values.envoy.external_config.configmap_name }}
{{- /* Load configuration from external ConfigMap */}}
{{- $configmapName := .Values.envoy.external_config.configmap_name }}
{{- $dataKey := .Values.envoy.external_config.configmap_key | default "envoy.yaml" }}
{{- $externalConfig := (lookup "v1" "ConfigMap" .Release.Namespace $configmapName) }}
{{- if $externalConfig }}
{{- if $externalConfig.data }}
{{- if hasKey $externalConfig.data $dataKey }}
{{- /* Use the data from the external ConfigMap */}}
{{- (get $externalConfig.data $dataKey) | nindent 4 }}
{{- else }}
{{- fail (printf "Expected key '%s' not found in ConfigMap '%s/%s'" $dataKey .Release.Namespace $configmapName) }}
{{- end }}
{{- else }}
{{- fail (printf "No data found in ConfigMap '%s/%s'" .Release.Namespace $configmapName) }}
{{- end }}
{{- else }}
{{- fail (printf "External ConfigMap '%s/%s' not found. Please ensure the ConfigMap exists." .Release.Namespace $configmapName) }}
{{- end }}
{{- else }}
{{- /* Error if load_from_configmap is true but configmap_name is empty */}}
{{- fail "envoy.external_config.configmap_name must be specified when envoy.external_config.load_from_configmap is true" }}
{{- end }}
{{- else }}
{{- /* Use dynamically generated configuration */}}
{{ include "envoy.configuration.yaml" $envoyContext | indent 4 }}
{{- end }}
---

{{- if .Values.envoy.rate_limiter.prometheus_based.enabled }}
{{- if and .Values.envoy.rate_limiter.prometheus_based.enabled (not .Values.envoy.external_config.load_from_configmap) }}
{{- /* Create a ConfigMap for the Lua filter */}}
apiVersion: v1
kind: ConfigMap
Expand Down
20 changes: 20 additions & 0 deletions helm/supersonic/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,25 @@
"type": "string"
}
},
"external_config": {
"type": "object",
"properties": {
"load_from_configmap": {
"type": "boolean"
},
"configmap_name": {
"type": "string"
},
"configmap_key": {
"type": "string"
}
},
"required": [
"configmap_key",
"configmap_name",
"load_from_configmap"
]
},
"resources": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -450,6 +469,7 @@
"args",
"auth",
"enabled",
"external_config",
"grpc_route_timeout",
"image",
"ingress",
Expand Down
9 changes: 9 additions & 0 deletions helm/supersonic/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,15 @@ envoy:
# -- Arguments for Envoy
args: ["--config-path", "/etc/envoy/envoy.yaml", "--log-level", "info", "--log-path", "/dev/stdout"]

# -- External Envoy configuration settings
external_config:
# -- If true, load Envoy configuration from an external ConfigMap instead of generating it dynamically
load_from_configmap: false
# -- Name of the external ConfigMap containing the Envoy configuration
configmap_name: ""
# -- Key name in the external ConfigMap (defaults to "envoy.yaml")
configmap_key: ""

# -- Resource requests and limits for Envoy Proxy.
# Note: an Envoy Proxy with too many connections might run out of CPU
resources:
Expand Down
64 changes: 64 additions & 0 deletions tests/envoy-config-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: external-envoy-config
namespace: cms
data:
envoy.yaml: |
# External Envoy configuration for CI testing
admin:
access_log_path: /tmp/admin_access.log
address:
socket_address:
address: 0.0.0.0
port_value: 9901

static_resources:
listeners:
- name: listener_grpc
address:
socket_address:
address: 0.0.0.0
port_value: 8001

filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
generate_request_id: true
stat_prefix: ingress_grpc
codec_type: AUTO
route_config:
name: local_route_grpc
virtual_hosts:
- name: backend_grpc
domains: ["*"]
routes:
- match:
prefix: "/"
route:
cluster: triton_grpc_service
timeout: 0s

http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
http2_protocol_options: {}

clusters:
- name: triton_grpc_service
connect_timeout: 0.25s
type: STRICT_DNS
lb_policy: LEAST_REQUEST
http2_protocol_options: {}
load_assignment:
cluster_name: triton_grpc_service
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: supersonic-triton
port_value: 8001
47 changes: 47 additions & 0 deletions tests/values-external-envoy-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Test values for external Envoy configuration testing

triton:
replicas: 1
image: fastml/triton-torchgeo:22.07-py3-geometric # works for CMSSW run3
command: ["/bin/sh", "-c"]
args:
- |
/opt/tritonserver/bin/tritonserver \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoBTag/Combined/data/models/ \
--log-verbose=0 \
--disable-auto-complete-config \
--exit-timeout-secs=60
resources:
limits: { cpu: 1, memory: 3Gi}
requests: { cpu: 1, memory: 1Gi}
modelRepository:
enabled: true
storageType: cvmfs-pvc
mountPath: /cvmfs
readinessProbe:
reset: true

envoy:
enabled: true
external_config:
load_from_configmap: true
configmap_name: external-envoy-config
configmap_key: envoy.yaml

prometheus:
enabled: false

grafana:
enabled: false

keda:
enabled: false

tempo:
enabled: false

opentelemetry-collector:
enabled: false

metricsCollector:
enabled: false
Loading