Skip to content

feat: Add Kubernetes manifests and Helm Charts (Phase 3 Week 4-5) #47

feat: Add Kubernetes manifests and Helm Charts (Phase 3 Week 4-5)

feat: Add Kubernetes manifests and Helm Charts (Phase 3 Week 4-5) #47

Workflow file for this run

name: CI/CD Pipeline
on:
push:
branches: [main, develop]
tags:
- 'v*.*.*'
pull_request:
branches: [main, develop]
release:
types: [created]
env:
REGISTRY: ghcr.io
IMAGE_PREFIX: ${{ github.repository }}
jobs:
# ========================================
# Job 1: 代码质量检查和测试
# ========================================
quality-check:
name: Code Quality & Tests
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: 'pip'
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -r requirements.txt
pip install black isort mypy pylint pytest pytest-cov pytest-asyncio bandit safety
- name: Black format check
run: black --check --line-length 100 services/ tests/
- name: isort import check
run: isort --check-only services/ tests/
- name: MyPy type check
run: mypy services/ --ignore-missing-imports --explicit-package-bases || true
- name: Pylint linting
run: pylint services/ --fail-under=8.0 || true
- name: Run unit tests
run: |
PYTHONPATH=${GITHUB_WORKSPACE}/services:$PYTHONPATH \
pytest tests/unit/ -v \
--cov=services \
--cov-report=xml \
--cov-report=html \
--cov-report=term-missing \
--cov-fail-under=40
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
files: ./coverage.xml
flags: unittests
name: codecov-umbrella
fail_ci_if_error: false
- name: Security scan (Bandit)
run: |
bandit -r services/ -f json -o bandit-report.json || true
- name: Security scan (Safety)
run: |
safety check --json --output safety-report.json || true
- name: Upload security reports
uses: actions/upload-artifact@v4
with:
name: security-reports
path: |
bandit-report.json
safety-report.json
# ========================================
# Job 2: 构建和推送 Docker 镜像
# ========================================
build-images:
name: Build & Push Images
runs-on: ubuntu-latest
needs: quality-check
if: github.event_name == 'push' || github.event_name == 'release'
strategy:
matrix:
service:
- ai_triage_agent
- alert_ingestor
- alert_normalizer
- automation_orchestrator
- configuration_service
- data_analytics
- monitoring_metrics
- notification_service
- reporting_service
- similarity_search
- web_dashboard
- workflow_engine
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository }}/${{ matrix.service }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha,prefix={{branch}}-
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push
uses: docker/build-push-action@v5
with:
context: ./services/${{ matrix.service }}
file: ./services/${{ matrix.service }}/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: # Temporarily disabled to force rebuild
cache-to: # Temporarily disabled to force rebuild
build-args: |
BUILD_DATE=${{ github.event.repository.updated_at }}
VCS_REF=${{ github.sha }}
VERSION=${{ steps.meta.outputs.version }}
- name: Image scan with Trivy
uses: aquasecurity/trivy-action@master
with:
image-ref: ghcr.io/${{ github.repository }}/${{ matrix.service }}:${{ steps.meta.outputs.version }}
format: 'sarif'
output: 'trivy-results.sarif'
severity: 'CRITICAL,HIGH'
- name: Upload Trivy results to GitHub Security
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: 'trivy-results.sarif'
# ========================================
# Job 3: 部署到 Staging 环境
# ========================================
deploy-staging:
name: Deploy to Staging
runs-on: ubuntu-latest
needs: build-images
if: github.ref == 'refs/heads/develop' && github.event_name == 'push'
environment:
name: staging
url: https://staging.security-triage.example.com
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure kubectl
uses: azure/k8s-set-context@v3
with:
method: kubeconfig
kubeconfig: ${{ secrets.KUBE_CONFIG_STAGING }}
- name: Deploy with Helm
run: |
helm upgrade --install security-triage-staging deployment/helm/security-triage \
--namespace security-triage-staging \
--create-namespace \
--values deployment/helm/security-triage/values-staging.yaml \
--set image.tag=${{ github.sha }} \
--wait \
--timeout 10m \
--atomic
- name: Verify deployment
run: |
kubectl rollout status deployment -n security-triage-staging
kubectl get pods -n security-triage-staging
- name: Run smoke tests
run: |
chmod +x deployment/scripts/smoke-tests.sh
deployment/scripts/smoke-tests.sh https://staging.security-triage.example.com
- name: Notify on success
if: success()
run: |
echo "✅ Staging deployment successful!"
# 可以添加 Slack、Discord 或邮件通知
# ========================================
# Job 4: 部署到 Production 环境
# ========================================
deploy-production:
name: Deploy to Production
runs-on: ubuntu-latest
needs: build-images
if: github.event_name == 'release'
environment:
name: production
url: https://security-triage.example.com
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure kubectl
uses: azure/k8s-set-context@v3
with:
method: kubeconfig
kubeconfig: ${{ secrets.KUBE_CONFIG_PROD }}
- name: Pre-deployment backup
run: |
BACKUP_FILE="backup-$(date +%Y%m%d-%H%M%S).sql"
kubectl exec -n security-triage-prod postgres-0 -- \
pg_dump -U triage_user security_triage > "$BACKUP_FILE"
echo "Backup completed: $BACKUP_FILE"
- name: Deploy with Helm (Blue-Green)
run: |
helm upgrade --install security-triage-prod deployment/helm/security-triage \
--namespace security-triage-prod \
--create-namespace \
--values deployment/helm/security-triage/values-prod.yaml \
--set image.tag=${{ github.ref_name }} \
--wait \
--timeout 15m \
--atomic
- name: Verify deployment
run: |
kubectl rollout status deployment -n security-triage-prod
kubectl get pods -n security-triage-prod
- name: Run E2E tests
run: |
chmod +x deployment/scripts/e2e-tests.sh
deployment/scripts/e2e-tests.sh https://security-triage.example.com
- name: Create GitHub Release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
deployment/helm/security-triage/Chart.yaml
deployment/helm/security-triage/values.yaml
draft: false
prerelease: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Notify on success
if: success()
run: |
echo "✅ Production deployment successful!"
# 可以添加 Slack、Discord 或邮件通知
- name: Rollback on failure
if: failure()
run: |
echo "❌ Deployment failed! Rolling back..."
helm rollback security-triage-prod -n security-triage-prod
# ========================================
# Job 5: 性能测试 (Staging)
# ========================================
performance-test:
name: Performance Tests
runs-on: ubuntu-latest
needs: deploy-staging
if: github.event_name == 'push' && github.ref == 'refs/heads/develop'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up k6
run: |
curl https://github.com/grafana/k6/releases/download/v0.47.0/k6-v0.47.0-linux-amd64.tar.gz -L | tar xvz
sudo mv k6-*/k6 /usr/local/bin/
- name: Run load tests
run: |
k6 run --out json=performance-results.json deployment/tests/load/test-alert-ingestion.js
- name: Upload performance results
uses: actions/upload-artifact@v4
with:
name: performance-results
path: performance-results.json
- name: Comment PR with results
if: github.event_name == 'pull_request'
uses: actions/github-script@v6
with:
script: |
const fs = require('fs');
const results = JSON.parse(fs.readFileSync('performance-results.json', 'utf8'));
const metrics = results.metrics;
const httpReqDuration = metrics['http_req_duration'];
const comment = `
## 📊 性能测试结果
- **P95 延迟**: ${httpReqDuration['p(95)']}ms
- **P99 延迟**: ${httpReqDuration['p(99)']}ms
- **请求成功率**: ${metrics['http_req_failed']}%失败
- **吞吐量**: ${metrics['http_reqs']} 请求/秒
`;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});