Skip to content

Commit 5f46b78

Browse files
committed
fix: resolve StatefulSet detection and WebSocket error handling issues (akash-network#295)
This commit addresses two critical issues in the Akash provider: 1. StatefulSet Detection Fix: - Fixed incorrect logic in ServiceStatus that was checking for any mounted storage instead of persistent storage to determine workload type - The bug caused services with RAM volumes to incorrectly attempt StatefulSet operations, resulting in "statefulsets.apps not found" errors - Now correctly checks storage.Attributes.Find(sdl.StorageAttributePersistent) to match the deployment creation logic 2. WebSocket Error Handling in lease-shell: - Fixed improper error handling when ServiceStatus fails during shell operations - Previously used http.Error() on WebSocket connections, causing protocol violations - Now properly uses WebSocket writer with LeaseShellCodeFailure and logs errors - Prevents silent failures and improves debugging for shell access issues Added comprehensive unit tests for StatefulSet detection logic covering: - Services with persistent storage (should use StatefulSet) - Services with non-persistent storage (should use Deployment) - Services with no storage (should use Deployment) These fixes ensure proper workload type detection and improve error visibility for lease-shell operations, resolving issues with shell access to deployments using RAM volumes.
1 parent 17937d6 commit 5f46b78

File tree

3 files changed

+162
-7
lines changed

3 files changed

+162
-7
lines changed

cluster/kube/client.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -935,13 +935,15 @@ func (c *client) ServiceStatus(ctx context.Context, lid mtypes.LeaseID, name str
935935
return nil, kubeclienterrors.ErrNoServiceForLease
936936
}
937937

938+
// Check if this service uses persistent storage (should be StatefulSet)
939+
// This logic must match the deployment creation logic in Deploy()
938940
isDeployment := true
939-
if params := svc.Params; params != nil {
940-
for _, param := range params.Storage {
941-
if param.Mount != "" {
942-
isDeployment = false
943-
break
944-
}
941+
persistent := false
942+
for i := range svc.Resources.Storage {
943+
attrVal := svc.Resources.Storage[i].Attributes.Find(sdl.StorageAttributePersistent)
944+
if persistent, _ = attrVal.AsBool(); persistent {
945+
isDeployment = false
946+
break
945947
}
946948
}
947949

cluster/kube/client_test.go

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
manifest "github.com/akash-network/akash-api/go/manifest/v2beta2"
88
mtypes "github.com/akash-network/akash-api/go/node/market/v1beta4"
99
types "github.com/akash-network/akash-api/go/node/types/v1beta3"
10+
"github.com/akash-network/node/sdl"
1011
"github.com/akash-network/node/testutil"
1112
"github.com/stretchr/testify/require"
1213
appsv1 "k8s.io/api/apps/v1"
@@ -687,3 +688,153 @@ func TestServiceStatusWithoutIngress(t *testing.T) {
687688
require.NotNil(t, status)
688689
require.Len(t, status.URIs, 0)
689690
}
691+
692+
func TestServiceStatusStatefulSetDetection(t *testing.T) {
693+
lid := testutil.LeaseID(t)
694+
ns := builder.LidNS(lid)
695+
696+
lns := &corev1.Namespace{
697+
ObjectMeta: metav1.ObjectMeta{
698+
Name: ns,
699+
},
700+
}
701+
702+
testCases := []struct {
703+
name string
704+
serviceName string
705+
services manifest.Services
706+
expectedWorkload string
707+
}{
708+
{
709+
name: "PersistentStorage_UsesStatefulSet",
710+
serviceName: "postgres",
711+
services: manifest.Services{
712+
{
713+
Name: "postgres",
714+
Image: "postgres:latest",
715+
Resources: types.Resources{
716+
Storage: []types.Storage{
717+
{
718+
Name: "data",
719+
Quantity: types.NewResourceValue(10737418240), // 10Gi
720+
Attributes: types.Attributes{
721+
{
722+
Key: sdl.StorageAttributePersistent,
723+
Value: "true",
724+
},
725+
},
726+
},
727+
},
728+
},
729+
Count: 1,
730+
},
731+
},
732+
expectedWorkload: "StatefulSet",
733+
},
734+
{
735+
name: "NonPersistentStorage_UsesDeployment",
736+
serviceName: "web",
737+
services: manifest.Services{
738+
{
739+
Name: "web",
740+
Image: "nginx:latest",
741+
Resources: types.Resources{
742+
Storage: []types.Storage{
743+
{
744+
Name: "tmp",
745+
Quantity: types.NewResourceValue(1073741824), // 1Gi
746+
Attributes: types.Attributes{
747+
{
748+
Key: sdl.StorageAttributePersistent,
749+
Value: "false",
750+
},
751+
},
752+
},
753+
},
754+
},
755+
Count: 2,
756+
},
757+
},
758+
expectedWorkload: "Deployment",
759+
},
760+
{
761+
name: "NoStorage_UsesDeployment",
762+
serviceName: "api",
763+
services: manifest.Services{
764+
{
765+
Name: "api",
766+
Image: "myapp:latest",
767+
Resources: types.Resources{},
768+
Count: 3,
769+
},
770+
},
771+
expectedWorkload: "Deployment",
772+
},
773+
}
774+
775+
for _, tc := range testCases {
776+
t.Run(tc.name, func(t *testing.T) {
777+
// Create the manifest
778+
mg := &manifest.Group{
779+
Name: "test-group",
780+
Services: tc.services,
781+
}
782+
783+
cparams := crd.ClusterSettings{
784+
SchedulerParams: make([]*crd.SchedulerParams, len(mg.Services)),
785+
}
786+
787+
m, err := crd.NewManifest(testKubeClientNs, lid, mg, cparams)
788+
require.NoError(t, err)
789+
790+
// Create the deployment or statefulset
791+
var kobjs []runtime.Object
792+
kobjs = append(kobjs, lns)
793+
794+
if tc.expectedWorkload == "StatefulSet" {
795+
ss := &appsv1.StatefulSet{
796+
ObjectMeta: metav1.ObjectMeta{
797+
Name: tc.serviceName,
798+
Namespace: ns,
799+
},
800+
Spec: appsv1.StatefulSetSpec{},
801+
Status: appsv1.StatefulSetStatus{
802+
AvailableReplicas: 1,
803+
Replicas: 1,
804+
},
805+
}
806+
kobjs = append(kobjs, ss)
807+
} else {
808+
depl := &appsv1.Deployment{
809+
ObjectMeta: metav1.ObjectMeta{
810+
Name: tc.serviceName,
811+
Namespace: ns,
812+
},
813+
Spec: appsv1.DeploymentSpec{},
814+
Status: appsv1.DeploymentStatus{
815+
AvailableReplicas: int32(tc.services[0].Count),
816+
Replicas: int32(tc.services[0].Count),
817+
},
818+
}
819+
kobjs = append(kobjs, depl)
820+
}
821+
822+
clientInterface := clientForTest(t, kobjs, []runtime.Object{m})
823+
824+
// Test ServiceStatus
825+
status, err := clientInterface.ServiceStatus(context.Background(), lid, tc.serviceName)
826+
require.NoError(t, err)
827+
require.NotNil(t, status)
828+
require.Equal(t, tc.serviceName, status.Name)
829+
830+
// Verify we can find the correct workload type
831+
if tc.expectedWorkload == "StatefulSet" {
832+
require.Equal(t, uint32(1), status.Available)
833+
require.Equal(t, uint32(1), status.Total)
834+
} else {
835+
require.Equal(t, tc.services[0].Count, status.Available)
836+
require.Equal(t, tc.services[0].Count, status.Total)
837+
}
838+
})
839+
}
840+
}

gateway/rest/router.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,9 @@ func leaseShellHandler(log log.Logger, cclient cluster.Client) http.HandlerFunc
372372
if cluster.ErrorIsOkToSendToClient(err) || errors.Is(err, kubeclienterrors.ErrNoServiceForLease) {
373373
responseData.Message = err.Error()
374374
} else {
375-
http.Error(rw, err.Error(), http.StatusInternalServerError)
375+
resultWriter = wsutil.NewWsWriterWrapper(shellWs, LeaseShellCodeFailure, l)
376+
encodeData = false
377+
localLog.Error("service status check failed", "err", err)
376378
}
377379
}
378380

0 commit comments

Comments
 (0)