Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Helm charts health check, ingress, and values #257

Merged
merged 2 commits into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 8 additions & 20 deletions charts/kokoro-fastapi/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,12 @@
apiVersion: v2
name: kokoro-fastapi
description: A Helm chart for kokoro-fastapi

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
description: A Helm chart for deploying the Kokoro FastAPI TTS service to Kubernetes
type: application
version: 0.2.0
appVersion: "0.2.0"

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
keywords:
- tts
- fastapi
- gpu
- kokoro
54 changes: 54 additions & 0 deletions charts/kokoro-fastapi/examples/aks-tls-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Tested on
# - Azure AKS with GPU node pool with Nvidia GPU operator
# - This setup uses 1 ingress and load balances between 2 replicas, enabling simultaneous requests
#
# Azure CLI command to create a GPU node pool:
# az aks nodepool add \
# --resource-group $AZ_RESOURCE_GROUP \
# --cluster-name $CLUSTER_NAME \
# --name t4gpus \
# --node-vm-size Standard_NC4as_T4_v3 \
# --node-count 2 \
# --enable-cluster-autoscaler \
# --min-count 1 \
# --max-count 2 \
# --priority Spot \
# --eviction-policy Delete \
# --spot-max-price -1 \
# --node-taints "sku=gpu:NoSchedule,kubernetes.azure.com/scalesetpriority=spot:NoSchedule" \
# --skip-gpu-driver-install

kokoroTTS:
replicaCount: 8
port: 8880
tag: v0.2.0
pullPolicy: IfNotPresent

# Azure specific settings for spot t4 GPU nodes with Nvidia GPU operator
tolerations:
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule

ingress:
enabled: true
className: "nginx"
annotations:
# Requires cert-manager and external-dns to be in the cluster for TLS and DNS
cert-manager.io/cluster-issuer: letsencrypt-prod
external-dns.alpha.kubernetes.io/hostname: your-external-dns-enabled-hostname
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
hosts:
- host: your-external-dns-enabled-hostname
paths:
- path: /
pathType: Prefix
tls:
- secretName: kokoro-fastapi-tls
hosts:
- your-external-dns-enabled-hostname
56 changes: 56 additions & 0 deletions charts/kokoro-fastapi/examples/gpu-operator-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Follow the official NVIDIA GPU Operator documentation
# to install the GPU operator with these settings:
# https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html
#
# This example is for a Nvidia T4 16gb GPU node pool with only 1 GPU on each node on Azure AKS.
# It uses time-slicing to share the a and claim to the system that 1 GPU is 4 GPUs.
# So each pod has access to a smaller gpu with 4gb of memory.
#
devicePlugin: # Remove this if you dont want to use time-slicing
config:
create: true
name: "time-slicing-config"
default: "any"
data:
any: |-
version: v1
flags:
migStrategy: none
sharing:
timeSlicing:
resources:
- name: nvidia.com/gpu
replicas: 4

daemonsets:
tolerations:
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule

node-feature-discovery:
master:
tolerations:
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule
worker:
tolerations:
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule
6 changes: 3 additions & 3 deletions charts/kokoro-fastapi/templates/NOTES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
echo http://$SERVICE_IP:{{ .Values.kokoroTTS.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
echo "Visit http://127.0.0.1:8880 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8880:$CONTAINER_PORT
{{- end }}
89 changes: 25 additions & 64 deletions charts/kokoro-fastapi/templates/ingress.yaml
Original file line number Diff line number Diff line change
@@ -1,82 +1,43 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "kokoro-fastapi.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- $rewriteTargets := (list) -}}
{{- with .Values.ingress.host }}
{{- range .endpoints }}
{{- $serviceName := default $fullName .serviceName -}}
{{- $rewrite := .rewrite | default "none" -}}
{{- if not (has $rewrite $rewriteTargets ) -}}
{{- $rewriteTargets = append $rewriteTargets $rewrite -}}
{{- end -}}
{{- end}}
{{- end }}
{{- range $key := $rewriteTargets }}
{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
{{- if eq $key "none" }}
name: {{ $fullName }}
{{- else }}
name: {{ $fullName }}-{{ $expandedRewrite }}
{{- end }}
name: {{ include "kokoro-fastapi.fullname" . }}
labels:
{{- include "kokoro-fastapi.labels" $ | nindent 4 }}
{{- if ne $key "none" }}
{{- include "kokoro-fastapi.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2
{{- end }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if $.Values.ingress.tls }}
{{- with .Values.ingress.className }}
ingressClassName: {{ . }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range $.Values.ingress.tls }}
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
{{- end }}
rules:
{{- with $.Values.ingress.host }}
- host: {{ .name | quote }}
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .endpoints }}
{{- $serviceName := default $fullName .serviceName -}}
{{- $servicePort := default (print "http") .servicePort -}}
{{- if eq ( .rewrite | default "none" ) $key }}
{{- range .paths }}
{{- if not (contains "@" .) }}
{{- if eq $key "none" }}
- path: {{ . }}
{{- else }}
- path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*)
{{- end }}
pathType: Prefix
backend:
service:
name: "{{ $fullName }}-{{ $serviceName }}"
port:
number: {{ $servicePort }}
{{- else }}
{{- $path := . -}}
{{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}}
{{- range $count, $e := until ($replicaCount|int) }}
- path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*)
pathType: Prefix
backend:
service:
name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}"
port:
number: {{ $servicePort }}
{{- end }}
{{- end }}
{{- range .paths }}
- path: {{ .path }}
{{- with .pathType }}
pathType: {{ . }}
{{- end }}
backend:
service:
name: {{ include "kokoro-fastapi.fullname" $ }}-kokoro-tts-service
port:
number: {{ $.Values.kokoroTTS.port }}
{{- end }}
{{- end }}
{{- end }}
---
{{- end }}
{{- end }}
{{- end }}
8 changes: 7 additions & 1 deletion charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ spec:
labels:
{{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }}
spec:
{{- with .Values.images.imagePullSecrets }}
{{- with .Values.kokoroTTS.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
Expand Down Expand Up @@ -49,10 +49,16 @@ spec:
httpGet:
path: /health
port: kokoro-tts-http
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /health
port: kokoro-tts-http
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 5
resources:
{{- toYaml .Values.kokoroTTS.resources | nindent 12 }}
volumeMounts: []
Expand Down
2 changes: 1 addition & 1 deletion charts/kokoro-fastapi/templates/tests/test-connection.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ spec:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}']
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.kokoroTTS.port }}']
restartPolicy: Never
67 changes: 24 additions & 43 deletions charts/kokoro-fastapi/values.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
# Default values for kokoro-fastapi.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

images:
pullPolicy: "Always"
imagePullSecrets: [ ]
kokoroTTS:
replicaCount: 1
# The name of the deployment repository
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
imagePullSecrets: [] # Set if using a private image or getting rate limited
tag: "latest"
pullPolicy: Always
port: 8880
resources:
limits:
nvidia.com/gpu: 1
requests:
nvidia.com/gpu: 1

nameOverride: ""
fullnameOverride: ""
Expand Down Expand Up @@ -38,47 +45,21 @@ service:

ingress:
enabled: false
className: ""
className: "nginx"
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
host:
name: kokoro.example.com
endpoints:
- paths:
- "/"
serviceName: "fastapi"
servicePort: 8880
# cert-manager.io/cluster-issuer: letsencrypt-prod
# external-dns.alpha.kubernetes.io/hostname: kokoro.example.com
# external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
hosts:
- host: kokoro.example.com
paths:
- path: /
pathType: Prefix

tls: []
# - secretName: chart-example-tls
# - secretName: kokoro-fastapi-tls
# hosts:
# - chart-example.local

kokoroTTS:
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
tag: "latest"
pullPolicy: Always
serviceName: "fastapi"
port: 8880
replicaCount: 1
resources:
limits:
nvidia.com/gpu: 1
requests:
nvidia.com/gpu: 1


# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# - kokoro.example.com

autoscaling:
enabled: false
Expand Down
Loading