Implement pod-level persistence for LocalAI models (#28) (#25)

mattfsourcecode · web-flow · commit 9c560142d974 · 2025-02-11T05:19:43.000-05:00
* Update model persistence for LocalAI (#28) * Enhance pod listing to display all namespaces * Fix language detection for Helm charts * Add debug configuration for CodeLLDB * Improve Helm chart * Update README
diff --git a/.gitattributes b/.gitattributes
@@ -1 +1,11 @@
-helm-chart/** linguist-language=Rust
+# Mark actual Rust files
+*.rs linguist-language=Rust
+
+# Mark Helm/Kubernetes YAML files appropriately
+helm-chart/**/*.yaml linguist-language=YAML
+helm-chart/**/*.yml linguist-language=YAML
+helm-chart/**/*.tpl linguist-language=Smarty
+
+# If you want to mark some helm files as documentation
+helm-chart/**/README.md linguist-documentation
+helm-chart/**/NOTES.txt linguist-documentation
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,36 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "type": "lldb",
+      "request": "launch",
+      "name": "Debug executable 'kube_app'",
+      "cargo": {
+        "args": ["build", "--bin=kube_app", "--package=kube_app"],
+        "filter": {
+          "name": "kube_app",
+          "kind": "bin"
+        }
+      },
+      "args": [],
+      "cwd": "${workspaceFolder}"
+    },
+    {
+      "type": "lldb",
+      "request": "launch",
+      "name": "Debug unit tests in executable 'kube_app'",
+      "cargo": {
+        "args": ["test", "--no-run", "--bin=kube_app", "--package=kube_app"],
+        "filter": {
+          "name": "kube_app",
+          "kind": "bin"
+        }
+      },
+      "args": [],
+      "cwd": "${workspaceFolder}"
+    }
+  ]
+}
diff --git a/README.md b/README.md
@@ -1,24 +1,34 @@
 # Kubernetes with Rust, MicroK8s, cert-manager, Gateway API, and LocalAI
 
-This repository demonstrates a [**Kubernetes**](https://kubernetes.io/) cluster managed with [**MicroK8s**](https://microk8s.io/), integrating a [**Rust**](https://www.rust-lang.org/) application that interacts with the Kubernetes API using [**Tokio**](https://tokio.rs/) and the [**kube**](https://kube.rs/) crate. It features modern networking with the [Gateway API](https://gateway-api.sigs.k8s.io/), TLS certificate management via [**cert-manager**](https://cert-manager.io/), and AI capabilities using [**LocalAI**](https://localai.io/). This project is suitable for development and testing purposes, leveraging MicroK8s for a lightweight and efficient Kubernetes environment.
-
----
+This repository demonstrates a [**Kubernetes**](https://kubernetes.io/) cluster managed with [**MicroK8s**](https://microk8s.io/), integrating a [**Rust**](https://www.rust-lang.org/) application that interacts with the Kubernetes API using [**Tokio**](https://tokio.rs/) and the [**kube**](https://kube.rs/) crate. It features modern service networking with the [Gateway API](https://gateway-api.sigs.k8s.io/), TLS certificate management via [**cert-manager**](https://cert-manager.io/), and AI capabilities using [**LocalAI**](https://localai.io/).
 
 ## ⚠️ Development-Only Notice
 
-**This application uses a `selfsigned-issuer` for TLS certificates, which is suitable for development but NOT recommended for production.** The self-signed certificates provided by cert-manager are ideal for testing environments; however, they lack the security guarantees required for production use. This repository is NOT intended for production deployments and should not be used as such.
+These local Kubernetes deployments use TLS certificates created by a `selfsigned-issuer`, which are suitable for testing only and NOT intended for production use cases. A transition to a production-grade setup would require using [Let's Encrypt](https://letsencrypt.org/) or another trusted certificate authority.
 
 ---
 
 ## Features
 
-- **Gateway API**: Next-generation service networking with HTTPRoute for granular traffic control and TLS integration.
-- **Cert-Manager**: Automates TLS certificate issuance using a self-signed issuer for development.
-- **Rust Application**: Interacts with the Kubernetes API to list Pods and can be extended for advanced cluster operations.
-- **LocalAI**: Self-hosted AI capabilities with robust potential for privacy-focused, scalable, and cost-efficient AI model deployment.
-
----
-
-## Getting Started
-
-This repository provides a lightweight platform for exploring Kubernetes features like Gateway API, cert-manager, and LocalAI in a local development environment. Extend the Rust application or experiment with Kubernetes networking and AI capabilities to suit your needs.
+- **Gateway API**: Next-generation service networking with HTTPRoute for granular traffic control and TLS.
+- **Cert-Manager**: Automates TLS certificate issuance using a `selfsigned-issuer` for development environments.
+- **Rust Application**: A minimal implementation demonstrating [Kubernetes API](https://kubernetes.io/docs/reference/kubernetes-api/) interaction by listing Pods across all namespaces with their statuses, serving as a foundation for building advanced cluster operations such as pod management, resource monitoring, CRD handling, event watching, and multi-cluster management.
+- **LocalAI**: Self-hosted AI capabilities with persistent storage for models, offering potential for privacy-focused, scalable, and cost-efficient AI model deployment.
+
+## Direction
+
+This project showcases an educational and experimental setup, offering a starting point for advanced use cases. Here are some possibilities:
+
+- **Production-Grade TLS Certificates**: Transition to production-ready deployments by integrating trusted certificate authorities like Let's Encrypt or custom enterprise CAs for secure and scalable HTTPS traffic management.
+- **Dynamic Cluster Management**: Automate scaling, monitoring, and resource optimization across multiple clusters.
+- **Custom Resource Definitions (CRDs)**: Implement and manage custom Kubernetes resources tailored to specific application requirements.
+- **Event-Driven Automation**: Extend the Rust app to respond to Kubernetes events or webhooks for real-time cluster adjustments.
+- **AI Workload Orchestration**: Use LocalAI to manage and deploy advanced AI models for edge computing, predictive analytics, or machine learning tasks.
+- **Security Enhancements**: Integrate advanced authentication mechanisms and Role-Based Access Control (RBAC) policies for secure multi-user environments.
+- **Multi-Tenancy Support**: Enable resource isolation and quota management for multi-tenant Kubernetes clusters.
+- **Advanced Networking**: Leverage Gateway API features for traffic splitting, failover mechanisms, and routing policies based on performance metrics.
+- **Hybrid Cloud Deployments**: Adapt the setup for hybrid or multi-cloud Kubernetes deployments.
+- **Natural Language Processing (NLP)**: Implement AI-powered features such as text summarization, sentiment analysis, or chatbot functionality for applications requiring language understanding.
+- **Image and Video Processing**: Use AI models to enable facial recognition, object detection, image classification, or video analytics for multimedia applications.
+- **Predictive Analytics**: Leverage AI for forecasting trends, optimizing business operations, or detecting anomalies in datasets for finance, healthcare, or logistics.
+- **Custom AI Model Training**: Train and fine-tune models for domain-specific use cases, such as personalized recommendations, scientific research, or custom automation workflows.
diff --git a/helm-chart/files/models.yaml b/helm-chart/files/models.yaml
diff --git a/helm-chart/localai-values.yaml b/helm-chart/localai-values.yaml
diff --git a/helm-chart/templates/localai-deployment.yaml b/helm-chart/templates/localai-deployment.yaml
@@ -50,6 +50,10 @@ spec:
           resources:
             {{- toYaml .Values.localai.resources | nindent 16 }}
           volumeMounts:
+            {{- if .Values.localai.persistence.models.enabled }}
+            - name: models
+              mountPath: {{ .Values.localai.persistence.models.mountPath }}
+            {{- end }}
             {{- with .Values.localai.volumeMounts }}
             {{- toYaml . | nindent 12 }}
             {{- end }}
diff --git a/helm-chart/templates/localai-pvc.yaml b/helm-chart/templates/localai-pvc.yaml
diff --git a/helm-chart/templates/models-configmap.yaml b/helm-chart/templates/models-configmap.yaml
@@ -9,7 +9,7 @@ data:
   models.yaml: |
     models:
       - name: mistral
-        path: /models/mistral-7b-instruct-v0.1.Q4_K_M.gguf
+        path: build/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf
         parameters:
           model: mistral-7b-instruct-v0.1.Q4_K_M
           temperature: 0.7
diff --git a/helm-chart/templates/pvc.yaml b/helm-chart/templates/pvc.yaml
@@ -0,0 +1,41 @@
+{{- if .Values.localai.persistence.models.enabled }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "localai.fullname" . }}-models
+  labels:
+    {{- include "localai.labels" . | nindent 4 }}
+    app.kubernetes.io/component: models-storage
+  annotations:
+    "helm.sh/resource-policy": keep
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: {{ .Values.localai.persistence.models.size | quote }}
+  {{- if .Values.localai.persistence.models.storageClass }}
+  storageClassName: {{ .Values.localai.persistence.models.storageClass }}
+  {{- end }}
+---
+{{- end }}
+{{- if .Values.localai.persistence.output.enabled }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "localai.fullname" . }}-output
+  labels:
+    {{- include "localai.labels" . | nindent 4 }}
+    app.kubernetes.io/component: output-storage
+  annotations:
+    "helm.sh/resource-policy": keep
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: {{ .Values.localai.persistence.output.size | quote }}
+  {{- if .Values.localai.persistence.output.storageClass }}
+  storageClassName: {{ .Values.localai.persistence.output.storageClass }}
+  {{- end }}
+{{- end }}
diff --git a/helm-chart/templates/storage-class.yaml b/helm-chart/templates/storage-class.yaml
@@ -0,0 +1,8 @@
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: localai-storage-wait
+provisioner: microk8s.io/hostpath
+reclaimPolicy: Retain
+volumeBindingMode: WaitForFirstConsumer
+allowVolumeExpansion: true
diff --git a/helm-chart/templates/tests/test-connection.yaml b/helm-chart/templates/tests/test-connection.yaml
@@ -11,5 +11,5 @@ spec:
     - name: wget
       image: busybox
       command: ['wget']
-      args: ['{{ include "helm-chart.fullname" . }}:{{ .Values.localai.service.port }}']
+      args: ['{{ include "helm-chart.fullname" . }}:{{ .Values.service.port }}']
   restartPolicy: Never
diff --git a/helm-chart/values.yaml b/helm-chart/values.yaml
@@ -19,41 +19,46 @@ serviceAccount:
   # AutomountServiceAccountToken indicates whether pods running as this service account should have an API token automatically mounted
   automount: true
 
+# LocalAI configuration 
 localai:
   enabled: true
   replicaCount: 1
   image:
     repository: localai/localai
     tag: latest
     pullPolicy: IfNotPresent
-  
+
+  # Environment variables configuration
+  env:
+    - name: LOCALAI_MODELS_PATH
+      value: /build/models
+
   # Model configuration
   models:
-    path: /models
+    path: /build/models
     preload:
       - name: llama-3.2-1b-instruct-q4_k_m.gguf
         url: https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.2-1b-instruct-q4_k_m.gguf
-  
+
   # Persistent volume configuration for models
   persistence:
     models:
       enabled: true
-      accessMode: ReadWriteOnce
+      createClaim: true
       size: 10Gi
-      storageClass: microk8s-hostpath
-      mountPath: /models
-  
-  # Volume mounts configuration
-  volumeMounts:
-    - name: models
-      mountPath: /models
-      readOnly: false
+      storageClass: "microk8s-hostpath"
+      mountPath: /build/models
+    output:
+      enabled: false
+      createClaim: false
+      size: 1Gi
+      storageClass: "microk8s-hostpath"
+      mountPath: /build/output
 
   service:
     type: ClusterIP
     port: 8080
 
-
 # Gateway API configuration
 gateway:
   enabled: true
@@ -86,3 +91,6 @@ gateway:
         - path:
             type: PathPrefix
             value: /
+service:
+  type: ClusterIP
+  port: 8080
diff --git a/helm-chart/values/localai-values.yaml b/helm-chart/values/localai-values.yaml
diff --git a/helm-chart/values/temp-values.yaml b/helm-chart/values/temp-values.yaml
diff --git a/src/main.rs b/src/main.rs
@@ -1,19 +1,45 @@
 use kube::{Client, api::Api};
-use k8s_openapi::api::core::v1::Pod;
+use k8s_openapi::api::core::v1::{Pod, Namespace};
 use tokio;
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // Initialize Kubernetes client
     let client = Client::try_default().await?;
 
-    // Access the Pod API in the "default" namespace
-    let pods: Api<Pod> = Api::namespaced(client, "default");
+    // Access the Namespace API
+    let namespaces: Api<Namespace> = Api::all(client.clone());
 
-    // List all Pods
+    // List parameters
     let lp = kube::api::ListParams::default();
-    for pod in pods.list(&lp).await? {
-        println!("Found Pod: {}", pod.metadata.name.unwrap_or_default());
+
+    // Get all namespaces
+    println!("\nKubernetes Cluster Pod Overview:");
+    println!("===============================");
+    
+    for ns in namespaces.list(&lp).await? {
+        let ns_name = ns.metadata.name.unwrap_or_default();
+        println!("\nNamespace: {}", ns_name);
+        println!("{}","-".repeat(ns_name.len() + 10));
+
+        // Get pods in this namespace
+        let pods: Api<Pod> = Api::namespaced(client.clone(), &ns_name);
+        
+        // List all pods in the namespace
+        match pods.list(&lp).await {
+            Ok(pod_list) => {
+                if pod_list.items.is_empty() {
+                    println!("  No pods found");
+                } else {
+                    for pod in pod_list {
+                        let pod_name = pod.metadata.name.unwrap_or_default();
+                        let status = pod.status.and_then(|s| s.phase).unwrap_or_default();
+                        println!("  Pod: {} (Status: {})", pod_name, status);
+                    }
+                }
+            },
+            Err(e) => println!("  Error listing pods: {}", e),
+        }
     }
 
     Ok(())