diff --git a/helm/.helmignore b/helm/.helmignore
new file mode 100644
index 0000000..2b2f68b
--- /dev/null
+++ b/helm/.helmignore
@@ -0,0 +1,28 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+# ReadMEs
+*.md
+# Rendered templates
+values-*.yaml
+output-*.yaml
\ No newline at end of file
diff --git a/helm/Chart.yaml b/helm/Chart.yaml
new file mode 100644
index 0000000..300e738
--- /dev/null
+++ b/helm/Chart.yaml
@@ -0,0 +1,24 @@
+apiVersion: v2
+name: llm-d-modelservice
+description: A Helm chart for ModelService
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.0.1
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "0.0.1"
diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl
new file mode 100644
index 0000000..92a620c
--- /dev/null
+++ b/helm/templates/_helpers.tpl
@@ -0,0 +1,203 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "llm-d-modelservice.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "llm-d-modelservice.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "llm-d-modelservice.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "llm-d-modelservice.labels" -}}
+helm.sh/chart: {{ include "llm-d-modelservice.chart" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/* Sanitized model name (DNS compliant) */}}
+{{- define "llm-d-modelservice.sanitizedModelName" -}}
+  {{- $name := .Release.Name | lower | trim -}}
+  {{- $name = regexReplaceAll "[^a-z0-9_.-]" $name "-" -}}
+  {{- $name = regexReplaceAll "^[\\-._]+" $name "" -}}
+  {{- $name = regexReplaceAll "[\\-._]+$" $name "" -}}
+  {{- $name = regexReplaceAll "\\." $name "-" -}}
+
+  {{- if gt (len $name) 63 -}}
+    {{- $name = substr 0 63 $name -}}
+  {{- end -}}
+
+{{- $name -}}
+{{- end }}
+
+{{/* Common P/D labels */}}
+{{- define "llm-d-modelservice.pdlabels" -}}
+llm-d.ai/inferenceServing: "true"
+llm-d.ai/model: {{ (include "llm-d-modelservice.sanitizedModelName" .) -}}
+{{- end }}
+
+{{/* prefill labels */}}
+{{- define "llm-d-modelservice.prefilllabels" -}}
+{{ include "llm-d-modelservice.pdlabels" . }}
+llm-d.ai/role: prefill
+{{- end }}
+
+{{/* decode labels */}}
+{{- define "llm-d-modelservice.decodelabels" -}}
+{{ include "llm-d-modelservice.pdlabels" . }}
+llm-d.ai/role: decode
+{{- end }}
+
+{{/* affinity from acceleratorTypes */}}
+{{- define "llm-d-modelservice.acceleratorTypes" -}}
+affinity:
+  nodeAffinity:
+    requiredDuringSchedulingIgnoredDuringExecution:
+      nodeSelectorTerms:
+        - matchExpressions:
+          - key: {{ .labelKey }}
+            operator: In
+            {{- with .labelValues }}
+            values:
+            {{- toYaml . | nindent 14 }}
+            {{- end }}
+{{- end }}
+
+{{/* Routing proxy -- sidecar for decode pods */}}
+{{- define "llm-d-modelservice.routingProxy" -}}
+initContainers:
+  - name: routing-proxy
+    args:
+      - --port={{ default 8080 .servicePort }}
+      - --vllm-port={{ default 8200 .proxy.targetPort }}
+      - --connector=nixlv2
+      - -v={{ default 5 .proxy.debugLevel }}
+    image: {{ .image }}
+    imagePullPolicy: Always
+    ports:
+      - containerPort: {{ default 8080 .servicePort }}
+    protocol: TCP
+    resources: {}
+    restartPolicy: Always
+    securityContext:
+    allowPrivilegeEscalation: false
+    runAsNonRoot: true
+{{- end }}
+
+{{- define "llm-d-modelservice.parallelism" -}}
+{{- $parallelism := dict "tensor" 1 "data" 1 -}}
+{{- if and . .tensor }}
+{{- $parallelism = mergeOverwrite $parallelism (dict "tensor" .tensor) -}}
+{{- end }}
+{{- if and . .data }}
+{{- $parallelism = mergeOverwrite $parallelism (dict "data" .data) -}}
+{{- end }}
+{{- $parallelism | toYaml | nindent 0 }}
+{{- end }}
+
+{{- define "llm-d-modelservice.resources" -}}
+resources:
+  limits:
+    {{- $limits := dict -}}
+    {{- if and .resources .resources.limits -}}
+    {{- $limits = omit .resources.limits "nvidia.com/gpu" }}
+    {{- if gt (len $limits) 0 }}
+    {{- toYaml $limits | nindent 4 }}
+    {{- end }}
+    {{- end }}
+    nvidia.com/gpu: {{ .parallelism.tensor }}
+  requests:
+    {{- $requests := dict -}}
+    {{- if and .resources .resources.requests -}}
+    {{- $requests = omit .resources.requests "nvidia.com/gpu" }}
+    {{- end }}
+    {{- if gt (len $requests) 0 }}
+    {{- toYaml $requests | nindent 4 }}
+    {{- end }}
+    nvidia.com/gpu: {{ .parallelism.tensor }}
+{{- end }}
+
+{{/* P/D service account name */}}
+{{- define "llm-d-modelservice.pdServiceAccountName" -}}
+{{ include "llm-d-modelservice.sanitizedModelName" . }}-sa
+{{- end }}
+
+{{/* EPP service account name */}}
+{{- define "llm-d-modelservice.eppServiceAccountName" -}}
+{{ include "llm-d-modelservice.sanitizedModelName" . }}-epp-sa
+{{- end }}
+
+{{/*
+EPP selector labels
+*/}}
+{{- define "llm-d-modelservice.eppSelectorLabels" -}}
+app.kubernetes.io/name: {{ include "llm-d-modelservice.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+llm-d.ai/epp: {{ include "llm-d-modelservice.fullname" . }}-epp
+{{- end }}
+
+{{/*
+Volumes for PD containers based on model artifact prefix
+*/}}
+{{- define "llm-d-modelservice.mountModelVolumeVolumes" -}}
+{{- if eq .Values.modelArtifacts.prefix "hf" -}}
+- name: model-storage
+  emptyDir: 
+    sizeLimit: {{ default "0" .Values.modelArtifacts.size }}
+{{- else if eq .Values.modelArtifacts.prefix "pvc" }}
+- name: model-storage
+  persistentVolumeClaim:
+    claimName: {{ .Values.modelArtifacts.artifact }}
+    readOnly: true
+{{- else if eq .Values.modelArtifacts.prefix "oci" }}
+- name: model-storage
+  image:
+    reference: {{ .Values.modelArtifacts.artifact }}
+    pullPolicy: {{ default "Always" .Values.modelArtifacts.imagePullPolicy }}
+{{- end }}
+{{- end }}
+
+{{/*
+VolumeMount for a PD container
+Supplies model-storage mount if mountModelVolume: true for the container
+*/}}
+{{- define "llm-d-modelservice.mountModelVolumeVolumeMounts" -}}
+{{- if or .volumeMounts .mountModelVolume }}
+volumeMounts:
+{{- end }}
+{{- /* user supplied volume mount in values */}}
+{{- with .volumeMounts }}
+  {{- toYaml . | nindent 8 }}
+{{- end }}
+{{- /* what we add if mounModelVolume is true */}}
+{{- if .mountModelVolume }}
+  - name: model-storage
+    mountPath: /model-cache
+{{- end }}
+{{- end }}
diff --git a/helm/templates/decode-deployment.yaml b/helm/templates/decode-deployment.yaml
new file mode 100644
index 0000000..e1be745
--- /dev/null
+++ b/helm/templates/decode-deployment.yaml
@@ -0,0 +1,106 @@
+{{- $parallelism := (include "llm-d-modelservice.parallelism" .Values.decode.parallelism) | fromYaml -}}
+{{- if .Values.decode }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "llm-d-modelservice.fullname" . }}-decode
+  labels:
+    {{- include "llm-d-modelservice.labels" . | nindent 4 }}
+spec:
+  replicas: {{ default 1 .Values.decode.replicas }}
+  selector:
+    matchLabels:
+      {{- include "llm-d-modelservice.decodelabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "llm-d-modelservice.decodelabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.decode.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "llm-d-modelservice.pdServiceAccountName" . }}
+      {{- with .Values.podSecurityContext }}
+      securityContext:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.decode.acceleratorTypes }}
+      {{- include "llm-d-modelservice.acceleratorTypes" . | nindent 6 }}
+      {{- end }}
+      {{- /* initContainers */}}
+      {{- with .Values.decode.initContainers }}
+      initContainers:
+        {{- toYaml . | nindent 6 }}
+      {{- end }}     
+      {{- /* range $.Values.decode.containers */}}
+      {{- with .Values.decode.containers }}
+      containers:
+      {{- range . }}
+      - name: {{ default "vllm" .name }}
+        image: {{ required "image of container is required" .image }}
+        {{- with .securityContext }}
+        securityContext:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .imagePullPolicy }}
+        imagePullPolicy: {{ . }}
+        {{- end }}
+        {{- with .command }}
+        command:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .args }}
+        args:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- /* insert user's env for this container */}}
+        {{- if or .env .mountModelVolume }}
+        env:
+        {{- end }}
+        {{- with .env }}
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- /* insert envs based on what modelArtifact prefix */}}
+        {{- if .mountModelVolume }}
+          - name: HF_HOME
+            value: /model-cache
+          {{- with $.Values.modelArtifacts.authSecretName }}
+          - name: HF_TOKEN
+            valueFrom:
+              secretKeyRef:
+                name: {{ . }}
+                key: HF_TOKEN
+          {{- end }}
+        {{- end }}
+        {{- with .livenessProbe }}
+        livenessProbe:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .readinessProbe }}
+        readinessProbe:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .resources }}
+        resources:
+          limits:
+            {{- if .limits -}}
+            {{- omit .limits "nvidia.com/gpu"  | toYaml | nindent 12 }}
+            {{- end }}
+            {{- /* nvidia.com/gpu: "{{ $parallelism.tensor }}" */}}
+          requests:
+            {{- if .limits -}}
+            {{- omit .requests "nvidia.com/gpu" | toYaml | nindent 12 }}
+            {{- end }}
+            {{- /* nvidia.com/gpu: "{{ $parallelism.tensor }}" */}}
+        {{- end }}
+        {{- /* volumeMount */}}
+        {{- include "llm-d-modelservice.mountModelVolumeVolumeMounts" . | nindent 8 }}
+        {{- end }}
+      {{- end }}
+      volumes:
+      {{- with .Values.decode.volumes }}
+        {{- toYaml . | nindent 8 }}
+      {{- end -}}
+      {{- include "llm-d-modelservice.mountModelVolumeVolumes" . | nindent 8 }} 
+{{- end }}
\ No newline at end of file
diff --git a/helm/templates/decode-lws.yaml b/helm/templates/decode-lws.yaml
new file mode 100644
index 0000000..9300c09
--- /dev/null
+++ b/helm/templates/decode-lws.yaml
@@ -0,0 +1,151 @@
+{{- $parallelism := (include "llm-d-modelservice.parallelism" .Values.decode.parallelism) | fromYaml -}}
+{{- if and $parallelism.data (gt (int $parallelism.data) 1) }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: {{ include "llm-d-modelservice.fullname" . }}-decode
+  labels:
+    {{- include "llm-d-modelservice.labels" . | nindent 4 }}
+    {{- include "llm-d-modelservice.decodelabels" . | nindent 4 }}
+spec:
+  {{- if not .Values.decode.autoscaling.enabled }}
+  replicas: {{ default 1 .Values.decode.replicas }}
+  {{- end }}
+  leaderWorkerTemplate:
+    size: {{ .Values.decode.parallelism.data }}
+    leaderTemplate:
+      metadata:
+        labels:
+          {{- include "llm-d-modelservice.decodelabels" . | nindent 10 }}
+      spec:
+        {{- with .Values.routing }}
+        {{ (include "llm-d-modelservice.routingProxy" .) | nindent 8 }}
+        {{- end }}
+
+        {{- with .Values.decode.imagePullSecrets }}
+        imagePullSecrets:
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+        serviceAccountName: {{ include "llm-d-modelservice.pdServiceAccountName" . }}
+        {{- with .Values.podSecurityContext }}
+        securityContext:
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+        {{- with .Values.decode.acceleratorTypes }}
+        {{- include "llm-d-modelservice.acceleratorTypes" . | nindent 8 }}
+        {{- end }}
+        {{- with .Values.decode.containers }}
+        containers:
+        {{- range . }}
+          - name: {{ default "vllm" .name }}
+            image: {{ required "image of container is required" .image }}
+            {{- with .securityContext }}
+            securityContext:
+              {{- toYaml . | nindent 14 }}
+            {{- end }}
+            {{- with .imagePullPolicy }}
+            imagePullPolicy: {{ . }}
+            {{- end }}
+            {{- with .command }}
+            command:
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+            {{- with .args }}
+            args:
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+            {{- /* insert user's env for this container */}}
+            {{- if or .env .mountModelVolume}}
+            env:
+            {{- end }}
+            {{- with .env }}
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+            {{- /* insert envs based on what modelArtifact prefix */}}
+            {{- if .mountModelVolume }}
+            - name: HF_HOME
+              value: /model-cache
+            {{- with $.Values.modelArtifacts.authSecretName }}
+            - name: HF_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: {{ . }}
+                  key: HF_TOKEN
+            {{- end }}
+            {{- end }}
+            {{- with .livenessProbe }}
+            livenessProbe:
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+            {{- with .readinessProbe }}
+            readinessProbe:
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+            resources:
+              limits:
+                {{- if and .resources .resources.limits }}
+                {{- omit .resources.limits "nvidia.com/gpu"  | toYaml | nindent 16 }}
+                {{- end }}
+                nvidia.com/gpu: {{ $parallelism.tensor }}
+              requests:
+                {{- if and .resources .resources.limits }}
+                {{- omit .resources.requests "nvidia.com/gpu" | toYaml | nindent 16 }}
+                {{- end }}
+                nvidia.com/gpu: {{ $parallelism.tensor }}
+            {{- /* volumeMount */}}
+            {{- if or .volumeMounts .mountModelVolume }}
+            volumeMounts:
+            {{- end -}}
+            {{- /* user supplied volume mount in values */}}
+            {{- with .volumeMounts }}
+              {{- toYaml . | nindent 14 }}
+            {{- end }}
+            {{- /* what we add if mounModelVolume is true */}}
+            {{- if .mountModelVolume }}
+              - name: model-storage
+                mountPath: /model-cache
+            {{- end }}
+
+            {{- with .workingDir }}
+            workingDir: {{ . }}
+            {{- end }}
+            {{- with .stdin }}
+            stdin: {{ . }}
+            {{- end }}
+            {{- with .tty }}
+            tty: {{ . }}
+            {{- end }}
+        {{- end }} {{/* range . */}}
+        {{- end }} {{/* with .Values.decode.containers */}}
+      volumes:
+      {{- with .Values.decode.volumes }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if eq .Values.modelArtifacts.prefix "hf" }}
+        - name: model-storage
+          emptyDir: 
+            sizeLimit: {{ default "0" .Values.modelArtifacts.size }}
+      {{- else if eq .Values.modelArtifacts.prefix "pvc" }}
+        - name: model-storage
+          persistentVolumeClaim:
+            claimName: {{ .Values.modelArtifacts.artifact }}
+            readOnly: true
+      {{- else if eq .Values.modelArtifacts.prefix "oci" }}
+        - name: model-storage
+          image:
+            reference: {{ .Values.modelArtifacts.artifact }}
+            {{- with .Values.modelArtifacts.imagePullPolicy }} 
+            pullPolicy: {{ . }}
+            {{- end }}
+      {{- end }}
+
+    workerTemplate:
+      metadata:
+        labels:
+          {{- include "llm-d-modelservice.decodelabels" . | nindent 10 }}
+      spec:
+        {{- with .Values.decode.acceleratorTypes }}
+        {{- (include "llm-d-modelservice.acceleratorTypes" .) | nindent 6 }}
+        {{- end }}
+        serviceAccountName: {{ (include "llm-d-modelservice.pdServiceAccountName" .)}}
+{{- end }} {{/* if and $parallelism.data (gt $parallelism.data 1) */}}
\ No newline at end of file
diff --git a/helm/templates/epp-deployment-mk.yaml b/helm/templates/epp-deployment-mk.yaml
new file mode 100644
index 0000000..fc3e1d2
--- /dev/null
+++ b/helm/templates/epp-deployment-mk.yaml
@@ -0,0 +1,91 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "llm-d-modelservice.fullname" . }}-epp
+  labels:
+    {{- include "llm-d-modelservice.labels" . | nindent 4 }}
+spec:
+  {{- if not .Values.endpointPicker.autoscaling.enabled }}
+  replicas: {{ default 1 .Values.decode.replicas }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "llm-d-modelservice.eppSelectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "llm-d-modelservice.labels" . | nindent 8 }}
+    spec:
+      {{- with .Values.endpointPicker.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "llm-d-modelservice.eppServiceAccountName" . }}
+      {{- with .Values.endpointPicker.podSecurityContext }}
+      securityContext:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- range $.Values.endpointPicker.containers }}
+      containers:
+        - name: {{ .name }}
+          {{- with $.Values.endpointPicker.securityContext }}
+          securityContext:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          image: "{{ .image }}"
+          {{- with .imagePullPolicy }}
+          imagePullPolicy: {{ . }}
+          {{- end }}
+          {{- with .command }}
+          command:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          args:
+            - poolName
+            - POOLNAME
+            - poolNamespace
+            - {{ $.Release.Namespace }}
+          {{- with .args }}
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .env }}
+          env:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          ports:
+            - name: http2
+              containerPort: {{ $.Values.endpointPicker.service.port }}
+              protocol: TCP
+          {{- with .livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .readinessProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .volumeMounts }}
+          volumeMounts:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+      {{- end }} {{/* range $.Values.endpointPicker.containers */}}
+      {{- with .Values.endpointPicker.volumes }}
+      volumes:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.endpointPicker.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.endpointPicker.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.endpointPicker.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
diff --git a/helm/templates/epp-deployment.yaml b/helm/templates/epp-deployment.yaml
new file mode 100644
index 0000000..59c38f8
--- /dev/null
+++ b/helm/templates/epp-deployment.yaml
@@ -0,0 +1,103 @@
+apiVersion: apps/v1 
+kind: Deployment 
+metadata: 
+  name: {{ include "llm-d-modelservice.fullname" . }}-epp
+  labels: 
+    llm-d.ai/epp: {{ include "llm-d-modelservice.fullname" . }}-epp
+  namespace: {{ .Release.Namespace }}
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      llm-d.ai/epp: {{ include "llm-d-modelservice.fullname" . }}-epp
+  template:
+    metadata:
+      labels:
+        llm-d.ai/epp: {{ include "llm-d-modelservice.fullname" . }}-epp
+    spec:
+      containers:
+      - name: epp 
+        imagePullPolicy: Always 
+        image: {{ default "ghcr.io/llm-d/llm-d-inference-scheduler:0.0.3" .Values.endpointPicker.image }}
+        args:
+        - --poolName
+        - {{ include "llm-d-modelservice.fullname" . }}-inference-pool
+        - --poolNamespace
+        - {{ .Release.Namespace }}
+        - -v
+        - "4"
+        - --zap-encoder
+        - json
+        - --grpcPort
+        - "9002"
+        - --grpcHealthPort
+        - "9003"
+        env:
+        - name: ENABLE_KVCACHE_AWARE_SCORER
+          value: "false"
+        - name: ENABLE_LOAD_AWARE_SCORER
+          value: "true"
+        - name: ENABLE_PREFIX_AWARE_SCORER
+          value: "true"
+        - name: ENABLE_SESSION_AWARE_SCORER
+          value: "false"
+        - name: KVCACHE_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: KVCACHE_INDEXER_REDIS_ADDR
+        - name: LOAD_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: PD_ENABLED
+          value: "false"
+        - name: PD_PROMPT_LEN_THRESHOLD
+          value: "10"
+        - name: PREFILL_ENABLE_KVCACHE_AWARE_SCORER
+          value: "false"
+        - name: PREFILL_ENABLE_LOAD_AWARE_SCORER
+          value: "false"
+        - name: PREFILL_ENABLE_PREFIX_AWARE_SCORER
+          value: "false"
+        - name: PREFILL_ENABLE_SESSION_AWARE_SCORER
+          value: "false"
+        - name: PREFILL_KVCACHE_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: PREFILL_KVCACHE_INDEXER_REDIS_ADDR
+        - name: PREFILL_LOAD_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: PREFILL_PREFIX_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: PREFILL_SESSION_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: PREFIX_AWARE_SCORER_WEIGHT
+          value: "2"
+        - name: SESSION_AWARE_SCORER_WEIGHT
+          value: "1"
+        ports:
+        - containerPort: 9002
+          name: grpc
+          protocol: TCP
+        - containerPort: 9003
+          name: grpc-health
+          protocol: TCP
+        - containerPort: 9090
+          name: metrics
+          protocol: TCP
+      serviceAccount: {{ include "llm-d-modelservice.eppServiceAccountName" . }}
+      serviceAccountName: {{ include "llm-d-modelservice.eppServiceAccountName" . }}
+      readinessProbe:
+        grpc:
+          port: 9003
+          service: envoy.service.ext_proc.v3.ExternalProcessor
+        initialDelaySeconds: 5
+        timeoutSeconds: 1
+        periodSeconds: 10
+        successThreshold: 1
+        failureThreshold: 3
+      livenessProbe:
+        grpc:
+          port: 9003
+          service: envoy.service.ext_proc.v3.ExternalProcessor
+        initialDelaySeconds: 5
+        timeoutSeconds: 1
+        periodSeconds: 10
+        successThreshold: 1
+        failureThreshold: 3
\ No newline at end of file
diff --git a/helm/templates/epp-sa.yaml b/helm/templates/epp-sa.yaml
new file mode 100644
index 0000000..05feab6
--- /dev/null
+++ b/helm/templates/epp-sa.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "llm-d-modelservice.eppServiceAccountName" . }}
+  labels:
+    {{- include "llm-d-modelservice.labels" . | nindent 4 }}
+  {{- with .Values.eppServiceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: {{ default true .Values.eppServiceAccount.automount }}
+{{- end }}
diff --git a/helm/templates/epp-service.yaml b/helm/templates/epp-service.yaml
new file mode 100644
index 0000000..d13254e
--- /dev/null
+++ b/helm/templates/epp-service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "llm-d-modelservice.fullname" . }}-epp
+  labels:
+    {{- include "llm-d-modelservice.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.endpointPicker.service.port }}
+      targetPort: {{ .Values.endpointPicker.service.targetPort }}
+      protocol: TCP
+      appProtocol: {{ .Values.endpointPicker.service.appProtocol }}
+  selector:
+    {{- include "llm-d-modelservice.eppSelectorLabels" . | nindent 4 }}
diff --git a/helm/templates/examples/README.md b/helm/templates/examples/README.md
new file mode 100644
index 0000000..1a53b66
--- /dev/null
+++ b/helm/templates/examples/README.md
@@ -0,0 +1,15 @@
+# Examples
+
+Contains example values file and their rendered templates.
+
+```
+cd helm 
+helm template [RELEASE-NAME] . -f [VALUES-FILEPATH]
+```
+
+1. `facebook/opt-125m`: downloads from Hugging Face 
+
+    ```
+    cd helm 
+    helm template facebook . -f templates/examples/values-facebook.yaml > templates/examples/output-facebook.yaml
+    ```
\ No newline at end of file
diff --git a/helm/templates/examples/output-facebook.yaml b/helm/templates/examples/output-facebook.yaml
new file mode 100644
index 0000000..18050d4
--- /dev/null
+++ b/helm/templates/examples/output-facebook.yaml
@@ -0,0 +1,382 @@
+---
+# Source: llm-d-modelservice/templates/epp-sa.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: facebook-epp-sa
+  labels:
+    helm.sh/chart: llm-d-modelservice-0.0.1
+    app.kubernetes.io/version: "0.0.1"
+    app.kubernetes.io/managed-by: Helm
+automountServiceAccountToken: true
+---
+# Source: llm-d-modelservice/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: facebook-sa
+  labels:
+    helm.sh/chart: llm-d-modelservice-0.0.1
+    app.kubernetes.io/version: "0.0.1"
+    app.kubernetes.io/managed-by: Helm
+automountServiceAccountToken: true
+---
+# Source: llm-d-modelservice/templates/epp-service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: facebook-llm-d-modelservice-epp
+  labels:
+    helm.sh/chart: llm-d-modelservice-0.0.1
+    app.kubernetes.io/version: "0.0.1"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - port: 9002
+      targetPort: 9002
+      protocol: TCP
+      appProtocol: http2
+  selector:
+    app.kubernetes.io/name: llm-d-modelservice
+    app.kubernetes.io/instance: facebook
+    llm-d.ai/epp: facebook-llm-d-modelservice-epp
+---
+# Source: llm-d-modelservice/templates/decode-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: facebook-llm-d-modelservice-decode
+  labels:
+    helm.sh/chart: llm-d-modelservice-0.0.1
+    app.kubernetes.io/version: "0.0.1"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      llm-d.ai/inferenceServing: "true"
+      llm-d.ai/model: facebook
+      llm-d.ai/role: decode
+  template:
+    metadata:
+      labels:
+        llm-d.ai/inferenceServing: "true"
+        llm-d.ai/model: facebook
+        llm-d.ai/role: decode
+    spec:
+      serviceAccountName: facebook-sa
+      initContainers:
+      - args:
+        - --port=8000
+        - --vllm-port=8200
+        - --connector=nixlv2
+        - -v=6
+        image: ghcr.io/llm-d/llm-d-routing-sidecar:0.0.6
+        imagePullPolicy: Always
+        name: routing-proxy
+        ports:
+        - containerPort: 8000
+          protocol: TCP
+        restartPolicy: Always
+        securityContext:
+          allowPrivilegeEscalation: false
+          runAsNonRoot: true
+      containers:
+      - name: vllm
+        image: ghcr.io/llm-d/llm-d:0.0.8
+        command:
+          - vllm
+          - serve
+        args:
+          - --port
+          - "8200"
+          - --enforce-eager
+          - --kv-transfer-config
+          - '{"kv_connector":"NixlConnector", "kv_role":"kv_both"}'
+        env:
+          - name: CUDA_VISIBLE_DEVICES
+            value: "0"
+          - name: UCX_TLS
+            value: cuda_ipc,cuda_copy,tcp
+          - name: HF_HOME
+            value: /model-cache
+          - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP
+          - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+            value: "5557"
+          - name: VLLM_LOGGING_LEVEL
+            value: DEBUG
+          - name: HF_HOME
+            value: /model-cache
+        resources:
+          limits:
+            {}
+          requests:
+            cpu: "16"
+            memory: 16Gi
+        
+        volumeMounts:
+          - name: model-storage
+            mountPath: /model-cache
+      volumes:
+        - name: model-storage
+          emptyDir: 
+            sizeLimit: 5Mi
+---
+# Source: llm-d-modelservice/templates/epp-deployment-mk.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: facebook-llm-d-modelservice-epp
+  labels:
+    helm.sh/chart: llm-d-modelservice-0.0.1
+    app.kubernetes.io/version: "0.0.1"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: llm-d-modelservice
+      app.kubernetes.io/instance: facebook
+      llm-d.ai/epp: facebook-llm-d-modelservice-epp
+  template:
+    metadata:
+      labels:
+        helm.sh/chart: llm-d-modelservice-0.0.1
+        app.kubernetes.io/version: "0.0.1"
+        app.kubernetes.io/managed-by: Helm
+    spec:
+      serviceAccountName: facebook-epp-sa
+---
+# Source: llm-d-modelservice/templates/epp-deployment.yaml
+apiVersion: apps/v1 
+kind: Deployment 
+metadata: 
+  name: facebook-llm-d-modelservice-epp
+  labels: 
+    llm-d.ai/epp: facebook-llm-d-modelservice-epp
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      llm-d.ai/epp: facebook-llm-d-modelservice-epp
+  template:
+    metadata:
+      labels:
+        llm-d.ai/epp: facebook-llm-d-modelservice-epp
+    spec:
+      containers:
+      - name: epp 
+        imagePullPolicy: Always 
+        image: ghcr.io/llm-d/llm-d-inference-scheduler:0.0.3
+        args:
+        - --poolName
+        - facebook-llm-d-modelservice-inference-pool
+        - --poolNamespace
+        - default
+        - -v
+        - "4"
+        - --zap-encoder
+        - json
+        - --grpcPort
+        - "9002"
+        - --grpcHealthPort
+        - "9003"
+        env:
+        - name: ENABLE_KVCACHE_AWARE_SCORER
+          value: "false"
+        - name: ENABLE_LOAD_AWARE_SCORER
+          value: "true"
+        - name: ENABLE_PREFIX_AWARE_SCORER
+          value: "true"
+        - name: ENABLE_SESSION_AWARE_SCORER
+          value: "false"
+        - name: KVCACHE_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: KVCACHE_INDEXER_REDIS_ADDR
+        - name: LOAD_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: PD_ENABLED
+          value: "false"
+        - name: PD_PROMPT_LEN_THRESHOLD
+          value: "10"
+        - name: PREFILL_ENABLE_KVCACHE_AWARE_SCORER
+          value: "false"
+        - name: PREFILL_ENABLE_LOAD_AWARE_SCORER
+          value: "false"
+        - name: PREFILL_ENABLE_PREFIX_AWARE_SCORER
+          value: "false"
+        - name: PREFILL_ENABLE_SESSION_AWARE_SCORER
+          value: "false"
+        - name: PREFILL_KVCACHE_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: PREFILL_KVCACHE_INDEXER_REDIS_ADDR
+        - name: PREFILL_LOAD_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: PREFILL_PREFIX_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: PREFILL_SESSION_AWARE_SCORER_WEIGHT
+          value: "1"
+        - name: PREFIX_AWARE_SCORER_WEIGHT
+          value: "2"
+        - name: SESSION_AWARE_SCORER_WEIGHT
+          value: "1"
+        ports:
+        - containerPort: 9002
+          name: grpc
+          protocol: TCP
+        - containerPort: 9003
+          name: grpc-health
+          protocol: TCP
+        - containerPort: 9090
+          name: metrics
+          protocol: TCP
+      serviceAccount: facebook-epp-sa
+      serviceAccountName: facebook-epp-sa
+      readinessProbe:
+        grpc:
+          port: 9003
+          service: envoy.service.ext_proc.v3.ExternalProcessor
+        initialDelaySeconds: 5
+        timeoutSeconds: 1
+        periodSeconds: 10
+        successThreshold: 1
+        failureThreshold: 3
+      livenessProbe:
+        grpc:
+          port: 9003
+          service: envoy.service.ext_proc.v3.ExternalProcessor
+        initialDelaySeconds: 5
+        timeoutSeconds: 1
+        periodSeconds: 10
+        successThreshold: 1
+        failureThreshold: 3
+---
+# Source: llm-d-modelservice/templates/prefill-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: facebook-llm-d-modelservice-prefill
+  labels:
+    helm.sh/chart: llm-d-modelservice-0.0.1
+    app.kubernetes.io/version: "0.0.1"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      llm-d.ai/inferenceServing: "true"
+      llm-d.ai/model: facebook
+      llm-d.ai/role: prefill
+  template:
+    metadata:
+      labels:
+        llm-d.ai/inferenceServing: "true"
+        llm-d.ai/model: facebook
+        llm-d.ai/role: prefill
+    spec:
+      serviceAccountName: facebook-sa
+      containers:
+      - name: vllm
+        image: ghcr.io/llm-d/llm-d:0.0.8
+        command:
+          - vllm
+          - serve
+        args:
+          - --port
+          - "8000"
+          - --enforce-eager
+          - --kv-transfer-config
+          - '{"kv_connector":"NixlConnector", "kv_role":"kv_both"}'
+        env:
+          - name: CUDA_VISIBLE_DEVICES
+            value: "0"
+          - name: UCX_TLS
+            value: cuda_ipc,cuda_copy,tcp
+          - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+            value: "5557"
+          - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP
+          - name: VLLM_LOGGING_LEVEL
+            value: DEBUG
+        resources:
+          limits:
+            {}
+          requests:
+            cpu: "16"
+            memory: 16Gi
+        
+      volumes:
+        - name: model-storage
+          emptyDir: 
+            sizeLimit: 5Mi
+---
+# Source: llm-d-modelservice/templates/routing.yaml
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: facebook-llm-d-modelservice-http-route
+  namespace: default
+  labels:
+    helm.sh/chart: llm-d-modelservice-0.0.1
+    app.kubernetes.io/version: "0.0.1"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Istio
+    name: inference-gateway
+  rules:
+  - backendRefs:
+    - group: inference.networking.x-k8s.io
+      kind: InferencePool
+      name: facebook-llm-d-modelservice-inference-pool
+      port: 8000
+      weight: 1
+    matches:
+    - headers:
+      - name: x-model-name
+        type: Exact
+        value: facebook/opt-125m
+      path:
+        type: PathPrefix
+        value: /
+---
+# Source: llm-d-modelservice/templates/routing.yaml
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferenceModel
+metadata:
+  name: facebook-llm-d-modelservice-inference-model
+  namespace: default
+  labels:
+    llm-d.ai/inferenceServing: "true"
+    llm-d.ai/model: facebook
+spec:
+  modelName: facebook/opt-125m
+  poolRef:
+    group: inference.networking.x-k8s.io
+    kind: InferencePool
+    name: facebook-llm-d-modelservice-inference-pool
+---
+# Source: llm-d-modelservice/templates/routing.yaml
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferencePool
+metadata:
+  name: facebook-llm-d-modelservice-inference-pool
+  namespace: default
+spec:
+  extensionRef:
+    failureMode: FailClose
+    group: ""
+    kind: Service
+    name: facebook-llm-d-modelservice-epp-service
+  selector:
+    llm-d.ai/inferenceServing: "true"
+    llm-d.ai/model: facebook
+  targetPortNumber: 8000
diff --git a/helm/templates/examples/values-facebook.yaml b/helm/templates/examples/values-facebook.yaml
new file mode 100644
index 0000000..ed6f437
--- /dev/null
+++ b/helm/templates/examples/values-facebook.yaml
@@ -0,0 +1,152 @@
+# This values.yaml file creates the resources for facebook/opt-125m 
+
+lws: false          # If true, creates LWS instead of deployments  
+inferencePool: true 
+inferenceModel: true 
+httpRoute: true 
+
+routing: 
+  # This is the model name for the OpenAI request
+  modelName: facebook/opt-125m
+  ports:
+    servicePort: 8000   # Sidecar listens on this port for requests. If there's no sidecar, the request goes here
+    internalPort: 8200  # Sidecar forwards request to vllm container on this port 
+    proxy:
+      targetPort: 8000
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Istio
+    name: inference-gateway
+
+modelArtifacts:
+  prefix: "hf"
+  artifact: facebook/opt-125m
+  size: 5Mi
+
+# describe decode pods
+decode:
+  enableService: false
+  replicas: 1
+  # parallelism:  
+  #   tensor: 3
+    # data: 2
+    # dataLocal: 1
+  initContainers: 
+  - name: routing-proxy
+    image: ghcr.io/llm-d/llm-d-routing-sidecar:0.0.6
+    imagePullPolicy: Always
+    securityContext:
+      allowPrivilegeEscalation: false
+      runAsNonRoot: true
+    args:
+      - "--port=8000"       # servicePort
+      - "--vllm-port=8200"  # internalPort
+      - "--connector=nixlv2"
+      - "-v=6"
+    ports:
+      - containerPort: 8000 # servicePort
+        protocol: TCP
+    restartPolicy: Always
+  containers:
+  - name: "vllm"
+    image: "ghcr.io/llm-d/llm-d:0.0.8"  
+    command: 
+      - vllm
+      - serve
+    args:
+      - "--port"
+      - "8200"  # internalPort
+      - "--enforce-eager"
+      - "--kv-transfer-config"
+      - '{"kv_connector":"NixlConnector", "kv_role":"kv_both"}'
+    env:
+      - name: CUDA_VISIBLE_DEVICES
+        value: "0"
+      - name: UCX_TLS
+        value: "cuda_ipc,cuda_copy,tcp"
+      - name: HF_HOME
+        value: /model-cache
+      - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+        valueFrom:
+          fieldRef:
+            fieldPath: status.podIP
+      - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+        value: "5557"
+      - name: VLLM_LOGGING_LEVEL
+        value: DEBUG
+    ports:
+      - containerPort: 5557
+        protocol: TCP
+    resources:
+      limits:
+        nvidia.com/gpu: "1"
+      requests:
+        cpu: "16"
+        memory: 16Gi
+        nvidia.com/gpu: "1"
+    mountModelVolume: true 
+
+# describe the prefill pods (looks the same as above)
+prefill:
+  replicas: 1
+  containers:
+  - name: "vllm"
+    image: "ghcr.io/llm-d/llm-d:0.0.8"  
+    command: 
+      - vllm 
+      - serve
+    args:
+      - "--port"
+      - "8000"  # servicePort
+      - "--enforce-eager"
+      - "--kv-transfer-config"
+      - '{"kv_connector":"NixlConnector", "kv_role":"kv_both"}'
+    env:
+      - name: CUDA_VISIBLE_DEVICES
+        value: "0"
+      - name: UCX_TLS
+        value: "cuda_ipc,cuda_copy,tcp"
+      - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+        value: "5557"
+      - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+        valueFrom:
+          fieldRef:
+            fieldPath: status.podIP
+      - name: VLLM_LOGGING_LEVEL
+        value: DEBUG
+    ports:
+      - containerPort: 8000
+        protocol: TCP
+      - containerPort: 5557
+        protocol: TCP
+    resources:
+      limits:
+        nvidia.com/gpu: 1
+      requests:
+        cpu: "16"
+        memory: 16Gi
+        nvidia.com/gpu: 1
+    
+endpointPicker:
+  # This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
+  service:
+    # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
+    type: ClusterIP
+    # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
+    port: 9002
+    targetPort: 9002
+    appProtocol: http2
+  
+  autoscaling:
+    enabled: false
+  replicas: 1
+
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+eppServiceAccount:
+  # Specifies whether a service account should be created
+  create: true
diff --git a/helm/templates/prefill-deployment.yaml b/helm/templates/prefill-deployment.yaml
new file mode 100644
index 0000000..45f38fe
--- /dev/null
+++ b/helm/templates/prefill-deployment.yaml
@@ -0,0 +1,106 @@
+{{- $parallelism := (include "llm-d-modelservice.parallelism" .Values.prefill.parallelism) | fromYaml -}}
+{{- if .Values.prefill }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "llm-d-modelservice.fullname" . }}-prefill
+  labels:
+    {{- include "llm-d-modelservice.labels" . | nindent 4 }}
+spec:
+  replicas: {{ default 1 .Values.prefill.replicas }}
+  selector:
+    matchLabels:
+      {{- include "llm-d-modelservice.prefilllabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "llm-d-modelservice.prefilllabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.prefill.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "llm-d-modelservice.pdServiceAccountName" . }}
+      {{- with .Values.podSecurityContext }}
+      securityContext:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.prefill.acceleratorTypes }}
+      {{- include "llm-d-modelservice.acceleratorTypes" . | nindent 6 }}
+      {{- end }}
+      {{- /* initContainers */}}
+      {{- with .Values.prefill.initContainers }}
+      initContainers:
+        {{- toYaml . | nindent 6 }}
+      {{- end }}     
+      {{- /* range $.Values.prefill.containers */}}
+      {{- with .Values.prefill.containers }}
+      containers:
+      {{- range . }}
+      - name: {{ default "vllm" .name }}
+        image: {{ required "image of container is required" .image }}
+        {{- with .securityContext }}
+        securityContext:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .imagePullPolicy }}
+        imagePullPolicy: {{ . }}
+        {{- end }}
+        {{- with .command }}
+        command:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .args }}
+        args:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- /* insert user's env for this container */}}
+        {{- if or .env .mountModelVolume }}
+        env:
+        {{- end }}
+        {{- with .env }}
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- /* insert envs based on what modelArtifact prefix */}}
+        {{- if .mountModelVolume }}
+          - name: HF_HOME
+            value: /model-cache
+          {{- with $.Values.modelArtifacts.authSecretName }}
+          - name: HF_TOKEN
+            valueFrom:
+              secretKeyRef:
+                name: {{ . }}
+                key: HF_TOKEN
+          {{- end }}
+        {{- end }}
+        {{- with .livenessProbe }}
+        livenessProbe:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .readinessProbe }}
+        readinessProbe:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .resources }}
+        resources:
+          limits:
+            {{- if .limits -}}
+            {{- omit .limits "nvidia.com/gpu"  | toYaml | nindent 12 }}
+            {{- end }}
+            {{- /* nvidia.com/gpu: "{{ $parallelism.tensor }}" */}}
+          requests:
+            {{- if .limits -}}
+            {{- omit .requests "nvidia.com/gpu" | toYaml | nindent 12 }}
+            {{- end }}
+            {{- /* nvidia.com/gpu: "{{ $parallelism.tensor }}" */}}
+        {{- end }}
+        {{- /* volumeMount */}}
+        {{- include "llm-d-modelservice.mountModelVolumeVolumeMounts" . | nindent 8 }}
+        {{- end }}
+      {{- end }}
+      volumes:
+      {{- with .Values.prefill.volumes }}
+        {{- toYaml . | nindent 8 }}
+      {{- end -}}
+      {{- include "llm-d-modelservice.mountModelVolumeVolumes" . | nindent 8 }} 
+{{- end }}
\ No newline at end of file
diff --git a/helm/templates/routing.yaml b/helm/templates/routing.yaml
new file mode 100644
index 0000000..6459d1d
--- /dev/null
+++ b/helm/templates/routing.yaml
@@ -0,0 +1,63 @@
+{{- /* Routing templates: InferencePool, InferenceModel, and HttpRoute */}}
+{{- if .Values.inferencePool }}
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferencePool
+metadata:
+  name: {{ include "llm-d-modelservice.fullname" . }}-inference-pool
+  namespace: {{ .Release.Namespace }}
+spec:
+  extensionRef:
+    failureMode: FailClose
+    group: ""
+    kind: Service
+    name: {{ include "llm-d-modelservice.fullname" . }}-epp-service
+  selector:
+    {{- include "llm-d-modelservice.pdlabels" . | nindent 4 }}
+  targetPortNumber: 8000
+{{- end }}
+--- 
+{{- if .Values.inferenceModel }}
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferenceModel
+metadata:
+  name: {{ include "llm-d-modelservice.fullname" . }}-inference-model
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "llm-d-modelservice.pdlabels" . | nindent 4 }}
+spec:
+  modelName: {{ .Values.routing.modelName }}
+  poolRef:
+    group: inference.networking.x-k8s.io
+    kind: InferencePool
+    name: {{ include "llm-d-modelservice.fullname" . }}-inference-pool
+{{- end }}
+--- 
+{{- if .Values.httpRoute }}
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: {{ include "llm-d-modelservice.fullname" . }}-http-route
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "llm-d-modelservice.labels" . | nindent 4 }}
+spec:
+  {{- with .Values.routing.parentRefs }}
+  parentRefs:
+  {{- . | toYaml | nindent 2}}
+  {{- end }}
+  rules:
+  - backendRefs:
+    - group: inference.networking.x-k8s.io
+      kind: InferencePool
+      name: {{ include "llm-d-modelservice.fullname" . }}-inference-pool
+      port: {{ .Values.routing.ports.servicePort  }}
+      weight: 1
+    matches:
+    - headers:
+      - name: x-model-name
+        type: Exact
+        value: {{ .Values.routing.modelName }}
+      path:
+        type: PathPrefix
+        value: /
+{{- end }}
\ No newline at end of file
diff --git a/helm/templates/serviceaccount.yaml b/helm/templates/serviceaccount.yaml
new file mode 100644
index 0000000..32174c4
--- /dev/null
+++ b/helm/templates/serviceaccount.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "llm-d-modelservice.pdServiceAccountName" . }}
+  labels:
+    {{- include "llm-d-modelservice.labels" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: {{ default true .Values.serviceAccount.automount }}
+{{- end }}
diff --git a/helm/values-msvc-mk.yaml b/helm/values-msvc-mk.yaml
new file mode 100644
index 0000000..7859b66
--- /dev/null
+++ b/helm/values-msvc-mk.yaml
@@ -0,0 +1,291 @@
+# TODO
+# decoupleScaling: false
+
+lws: false  # If true, creates LWS instead of deployments  
+inferencePool: true 
+inferenceModel: true 
+httpRoute: true 
+
+routing: 
+  # This is the model name for the OpenAI request
+  modelName: deepsk-ai-deepsk-coder-v1-lite-instruct
+  servicePort: 8080   # Sidecar listens on this port for requests. If there's no sidecar, the request goes here
+  proxy:
+    image: ghcr.io/llm-d/llm-d-routing-sidecar:0.0.6
+    targetPort: 8200
+    debugLevel: 5
+
+modelArtifacts:
+  # When specfying the URI with `hf` prefix, the <repo-id>/<model-id> string
+  # is extracted and exposed as a template variable that can be used as {{ .HFModelName }}
+  
+  # uri: hf://facebook/opt-125m
+  prefix: "hf"
+  artficat: deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct
+  authSecretName: "hf-secret"
+  size: 5Mi 
+
+# describe decode pods
+decode:
+  autoscaling: 
+    enabled: false
+  replicas: 1
+
+  # for LWS
+  parallelism:  
+    tensor: 3
+    data: 2
+    dataLocal: 1 
+  
+  acceleratorTypes:
+    labelKey: nvidia.com/gpu.product
+    labelValues:
+      # According to the blog, Scout requires H100s
+      - NVIDIA-H100
+      - NVIDIA-H200
+  # initContainers:
+  containers:
+    - name: vllm-worker
+      image: "quay.io/tms/vllm-dev-base:0.0.15"
+      imagePullPolicy: Always
+      workingDir: /app
+      stdin: true
+      tty: true
+      command: ["/bin/sh","-c"]
+      args:
+        - |
+          # Squash a warning.
+          rm /etc/libibverbs.d/vmw_pvrdma.driver
+          #################
+          # Install vLLM
+          #################
+          /init-scripts/init-vllm.sh
+          #################
+          # RUN vLLM
+          #################
+          START_RANK=$(( ${LWS_WORKER_INDEX:-0} * DP_SIZE_LOCAL ))
+          if [ "${LWS_WORKER_INDEX:-0}" -eq 0 ]; then
+            #################
+            # Leader-only launch
+            #################
+            exec /app/venv/bin/vllm serve \
+              {{ .HFModelName }} \
+              --port {{ "internal_port" | getPort }} \
+              --disable-log-requests \
+              --enable-expert-parallel \
+              --tensor-parallel-size $TP_SIZE \
+              --data-parallel-size $DP_SIZE \
+              --data-parallel-size-local $DP_SIZE_LOCAL \
+              --data-parallel-address $(LWS_LEADER_ADDRESS) \
+              --data-parallel-rpc-port 5555 \
+              --data-parallel-start-rank $START_RANK \
+              --trust-remote-code \
+              --kv-transfer-config \
+                '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' \
+              --enforce-eager
+          else
+            #################
+            # Worker-only launch
+            #################
+            exec /app/venv/bin/vllm serve \
+              {{ .HFModelName }} \
+              --port {{ "internal_port" | getPort }} \
+              --disable-log-requests \
+              --enable-expert-parallel \
+              --tensor-parallel-size $TP_SIZE \
+              --data-parallel-size $DP_SIZE \
+              --data-parallel-size-local $DP_SIZE_LOCAL \
+              --data-parallel-address $(LWS_LEADER_ADDRESS) \
+              --data-parallel-rpc-port 5555 \
+              --data-parallel-start-rank $START_RANK \
+              --trust-remote-code \
+              --kv-transfer-config \
+                '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' \
+              --enforce-eager \
+              --headless
+          fi
+      env:
+        - name: DP_SIZE
+          value: "{{ .DecodeDataParallelism }}"
+        - name: TP_SIZE
+          value: "{{ .DecodeTensorParallelism }}"
+        - name: DP_SIZE_LOCAL
+          value: "1"
+        - name: VLLM_REPO_URL
+          value: "https://github.com/vllm-project/vllm.git"
+        - name: VLLM_BRANCH
+          value: "main"
+        - name: VLLM_ALL2ALL_BACKEND
+#                    value: "naive"
+          value: "pplx"
+#                    value: "deepep_high_throughput"
+#                    value: "deepep_low_latency"
+#
+          # Needed for GDRCOPY to be used.
+          # See: https://github.com/NVIDIA/nvidia-container-toolkit/releases/tag/v1.15.0
+        - name: NVIDIA_GDRCOPY
+          value: "enabled"
+#                  - name: NVIDIA_NVSWITCH
+#                    value: "enabled"
+#                  - name: NVIDIA_GDS
+#                    value: "enabled"
+
+        # NVIDIA_MOFED is likely needed for using IBGDA but causes crashes
+#                  - name: NVIDIA_MOFED
+#                    value: "enabled"
+#
+        - name: NCCL_DEBUG
+          value: "INFO"
+        - name: NVSHMEM_DEBUG
+          value: "TRACE"
+        - name: NVSHMEM_DEBUG_SUBSYS
+          value: "TRANSPORT,INIT,MEM,COLL,BOOTSTRAP"
+        - name: NVSHMEM_REMOTE_TRANSPORT
+          value: "ibrc"
+        - name: NVSHMEM_IB_ENABLE_IBGDA
+          value: "true"
+        - name: NVSHMEM_ENABLE_NIC_PE_MAPPING
+          value: "true"
+        - name: NVSHMEM_HCA_LIST
+          value: "ibp0:1,ibp1:1,ibp2:1,ibp3:1,ibp4:1,ibp5:1,ibp6:1,ibp7:1"
+        - name: NVSHMEM_BOOTSTRAP_UID_SOCK_IFNAME
+          value: "eth0"
+        - name: GLOO_SOCKET_IFNAME
+          value: "eth0"
+        - name: NCCL_SOCKET_IFNAME
+          value: "eth0"
+        - name: NCCL_IB_HCA
+          value: "ibp"
+        - name: VLLM_LOGGING_LEVEL
+          value: "DEBUG"
+        - name: HF_TOKEN
+          valueFrom:
+            secretKeyRef:
+              name: hf-secret
+              key: HF_TOKEN
+              optional: true
+        - name: GH_TOKEN_FROM_SECRET
+          valueFrom:
+            secretKeyRef:
+              name: gh-token-secret
+              key: GH_TOKEN
+              optional: true
+        - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+          value: "6555"
+        - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
+
+      securityContext:
+        capabilities:
+          add: [ "IPC_LOCK" ]
+      resources:
+        limits:
+          nvidia.com/gpu: "{{ .DecodeTensorParallelism }}"
+          memory: 64Gi
+          ephemeral-storage: 256Gi
+          rdma/ib: 1
+        requests:
+          cpu: 8
+          memory: 64Gi
+          ephemeral-storage: 256Gi
+          nvidia.com/gpu: "{{ .DecodeTensorParallelism }}"
+          rdma/ib: 1
+      volumeMounts:
+        - mountPath: /dev/shm
+          name: dshm
+        - name: init-scripts-volume
+          mountPath: /init-scripts
+      mountModelVolume: true
+
+# describe the prefill pods (looks the same as above)
+prefill:
+  replicas: 1
+  containers:
+    - name: "vllm"
+      args:
+        - "HFModelName"
+    
+endpointPicker:
+  # This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
+  service:
+    # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
+    type: ClusterIP
+    # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
+    port: 9002
+    targetPort: 9002
+    appProtocol: http2
+
+  # enableService: true
+
+  autoscaling:
+    enabled: false
+  replicas: 1
+  
+  containers:
+  - name: "epp"
+    image: "ghcr.io/llm-d/llm-d-inference-scheduler:0.0.3"  
+    # command: 
+    args:
+      # - -poolName
+      # - InferencePoolName
+      # - -poolNamespace
+      # - llmd-kalantar
+      - -v
+      - "5"
+      - --zap-encoder
+      - json
+      - -grpcPort
+      - "9002"
+      - -grpcHealthPort
+      - "9003"
+    env:
+      - name: PD_ENABLED
+        value: "true"
+      - name: PD_PROMPT_LEN_THRESHOLD
+        value: "10"
+    ports:
+      - containerPort: 9002
+        protocol: TCP
+      - containerPort: 9003
+        protocol: TCP
+      - containerPort: 9090
+        name: metrics
+        protocol: TCP
+    livenessProbe:
+      failureThreshold: 3
+      grpc:
+        port: 9003
+        service: envoy.service.ext_proc.v3.ExternalProcessor
+      initialDelaySeconds: 5
+      periodSeconds: 10
+    readinessProbe:
+      failureThreshold: 3
+      grpc:
+        port: 9003
+        service: envoy.service.ext_proc.v3.ExternalProcessor
+      initialDelaySeconds: 5
+      periodSeconds: 10
+
+
+
+
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+eppServiceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+
diff --git a/helm/values-msvc.yaml b/helm/values-msvc.yaml
new file mode 100644
index 0000000..642d8a9
--- /dev/null
+++ b/helm/values-msvc.yaml
@@ -0,0 +1,172 @@
+# TODO
+# decoupleScaling: false
+
+lws: false  # If true, creates LWS instead of deployments  
+inferencePool: true 
+inferenceModel: true 
+httpRoute: true 
+
+routing: 
+  # This is the model name for the OpenAI request
+  modelName: facebook/opt-125m
+  ports:
+    servicePort: 8000   # Sidecar listens on this port for requests. If there's no sidecar, the request goes here
+    internalPort: 8200  # Sidecar forwards request to vllm container on this port 
+    proxy:
+      targetPort: 8000
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway-kgateway
+
+modelArtifacts:
+  # When specfying the URI with `hf` prefix, the <repo-id>/<model-id> string
+  # is extracted and exposed as a template variable that can be used as {{ .HFModelName }}
+  prefix: "oci"
+  artifact: facebook/opt-125m
+  authSecretName: "hf-secret"
+  size: 5Mi 
+  imagePullPolicy: IfNotPresent
+
+# describe decode pods
+# decode:
+#   enableService: false
+#   replicas: 1
+  
+#   # for LWS
+#   parallelism:  
+#     tensor: 8
+#     data: 1
+#     dataLocal: 1 
+  
+#   acceleratorTypes:
+#     labelKey: nvidia.com/gpu.product
+#     labelValues:
+#       # According to the blog, Scout requires H100s
+#       - NVIDIA-H100
+#   # initContainers:
+#   containers:
+#   - name: "vllm"
+#     image: "vllm-ai/vllm:latest"  
+#     args:
+#       - "HFModelName"
+#     env:
+#     - name: "VLLM_LOG_LEVEL"
+#       value: "DEBUG"  # Set to DEBUG for more detailed logs, or INFO for less verbose logs
+#     envFrom:
+#       - configMapRef:
+#           name: vllm-config
+#     resources:
+#       requests:
+#         cpu: "1"          # Request 1 CPU core
+#         memory: "4Gi"    # Request 4 GiB of memory
+#       limits:
+#         cpu: "2"          # Limit to 2 CPU cores
+#         memory: "8Gi"     # Limit to 8 GiB of memory
+#     mountModelVolume: true 
+
+# describe the prefill pods (looks the same as above)
+prefill:
+  replicas: 1
+  containers:
+    - name: "vllm"
+      image: "vllm-ai/vllm:latest"  
+      args:
+        - "HFModelName"
+      env: 
+        - name: ok 
+          value: ok 
+      mountModelVolume: true
+    - name: "v2"
+      image: "vllm-ai/vllm:latest"  
+      volumeMounts: 
+        - name: whatever 
+          mountPath: something 
+  volumes: 
+    - name: ok 
+      emptyDir:
+        sizeLimit: 5Gi 
+    - name: ok2
+      emptyDir:
+        sizeLimit: 5Gi 
+    
+endpointPicker:
+  # This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
+  service:
+    # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
+    type: ClusterIP
+    # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
+    port: 9002
+    targetPort: 9002
+    appProtocol: http2
+
+  # enableService: true
+
+  autoscaling:
+    enabled: false
+  replicas: 1
+  
+  containers:
+  - name: "epp"
+    image: "ghcr.io/llm-d/llm-d-inference-scheduler:0.0.3"  
+    # command: 
+    args:
+      # - -poolName
+      # - InferencePoolName
+      # - -poolNamespace
+      # - llmd-kalantar
+      - -v
+      - "5"
+      - --zap-encoder
+      - json
+      - -grpcPort
+      - "9002"
+      - -grpcHealthPort
+      - "9003"
+    env:
+      - name: PD_ENABLED
+        value: "true"
+      - name: PD_PROMPT_LEN_THRESHOLD
+        value: "10"
+    ports:
+      - containerPort: 9002
+        protocol: TCP
+      - containerPort: 9003
+        protocol: TCP
+      - containerPort: 9090
+        name: metrics
+        protocol: TCP
+    livenessProbe:
+      failureThreshold: 3
+      grpc:
+        port: 9003
+        service: envoy.service.ext_proc.v3.ExternalProcessor
+      initialDelaySeconds: 5
+      periodSeconds: 10
+    readinessProbe:
+      failureThreshold: 3
+      grpc:
+        port: 9003
+        service: envoy.service.ext_proc.v3.ExternalProcessor
+      initialDelaySeconds: 5
+      periodSeconds: 10
+
+
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+eppServiceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+
diff --git a/helm/values.yaml b/helm/values.yaml
new file mode 100644
index 0000000..3e5575d
--- /dev/null
+++ b/helm/values.yaml
@@ -0,0 +1,123 @@
+# Default values for llm-d-modelservice.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
+replicaCount: 1
+
+# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
+image:
+  repository: nginx
+  # This sets the pull policy for images.
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: ""
+
+# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
+imagePullSecrets: []
+# This is to override the chart name.
+nameOverride: ""
+fullnameOverride: ""
+
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  name: ""
+
+# This is for setting Kubernetes Annotations to a Pod.
+# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
+podAnnotations: {}
+# This is for setting Kubernetes Labels to a Pod.
+# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
+podLabels: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext: {}
+  # capabilities:
+  #   drop:
+  #   - ALL
+  # readOnlyRootFilesystem: true
+  # runAsNonRoot: true
+  # runAsUser: 1000
+
+# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
+service:
+  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
+  type: ClusterIP
+  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
+  port: 80
+
+# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
+ingress:
+  enabled: false
+  className: ""
+  annotations: {}
+    # kubernetes.io/ingress.class: nginx
+    # kubernetes.io/tls-acme: "true"
+  hosts:
+    - host: chart-example.local
+      paths:
+        - path: /
+          pathType: ImplementationSpecific
+  tls: []
+  #  - secretName: chart-example-tls
+  #    hosts:
+  #      - chart-example.local
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
+livenessProbe:
+  httpGet:
+    path: /
+    port: http
+readinessProbe:
+  httpGet:
+    path: /
+    port: http
+
+# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
+autoscaling:
+  enabled: false
+  minReplicas: 1
+  maxReplicas: 100
+  targetCPUUtilizationPercentage: 80
+  # targetMemoryUtilizationPercentage: 80
+
+# Additional volumes on the output Deployment definition.
+volumes: []
+# - name: foo
+#   secret:
+#     secretName: mysecret
+#     optional: false
+
+# Additional volumeMounts on the output Deployment definition.
+volumeMounts: []
+# - name: foo
+#   mountPath: "/etc/foo"
+#   readOnly: true
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}