Skip to content

Commit 621d893

Browse files
committed
fix: addressed MR feedback
1 parent 206826a commit 621d893

20 files changed

Lines changed: 506 additions & 136 deletions

docker/production/Dockerfile.carbide-nsm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ COPY sdk/standard/go.mod ./sdk/standard/
3939
# Download dependencies
4040
RUN go mod download
4141

42+
COPY common/ ./common/
4243
COPY nvswitch-manager/ ./nvswitch-manager/
4344

4445
WORKDIR /workspace/nvswitch-manager

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ require (
3737
github.com/gogo/status v1.1.1
3838
github.com/golang-jwt/jwt/v5 v5.3.0
3939
github.com/golang/mock v1.6.0
40+
github.com/golang/protobuf v1.5.4
4041
github.com/google/uuid v1.6.0
4142
github.com/gorilla/mux v1.8.1
4243
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0
@@ -150,7 +151,6 @@ require (
150151
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
151152
github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a // indirect
152153
github.com/gogo/protobuf v1.3.2 // indirect
153-
github.com/golang/protobuf v1.5.4 // indirect
154154
github.com/golang/snappy v0.0.4 // indirect
155155
github.com/google/gnostic-models v0.7.0 // indirect
156156
github.com/google/go-cmp v0.7.0 // indirect

nvswitch-manager/internal/proto/v1/nvswitch-manager.pb.go

Lines changed: 24 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nvswitch-manager/internal/proto/v1/nvswitch-manager.proto

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -278,25 +278,45 @@ message QueueUpdateResponse {
278278

279279
// QueueUpdatesRequest queues firmware updates for multiple switches.
280280
// Registered switches are identified by UUID; unregistered devices use FirmwareTarget.
281+
//
282+
// `switch_uuids` and `targets` are mutually exclusive: a single request must
283+
// populate exactly one of them. The handler does not deduplicate across the
284+
// two lists, so allowing both would risk queuing duplicate updates for any
285+
// device whose UUID and FirmwareTarget are both supplied. Callers with mixed
286+
// inventory should issue one RPC per registration state. A request that
287+
// populates neither is rejected for the same reason it would be a no-op.
288+
//
289+
// Whichever list is populated is processed best-effort, per item: a failure
290+
// for one switch (invalid UUID, validation, auto-registration, queue
291+
// rejection) does not abort processing of the remaining items and does not
292+
// produce a non-OK gRPC status. Callers must inspect each
293+
// `QueueUpdatesResponse.results[i].status` to determine per-item outcome.
294+
// The RPC only returns a top-level error on global failures (firmware
295+
// manager uninitialized, malformed `components` enum value, or violation of
296+
// the mutual-exclusivity contract above).
281297
message QueueUpdatesRequest {
282-
repeated string switch_uuids = 1; // UUIDs of registered NV-Switches to update
298+
repeated string switch_uuids = 1; // UUIDs of registered NV-Switches to update; mutually exclusive with `targets`. Processed best-effort.
283299
string bundle_version = 2; // Version of the firmware bundle
284300
repeated NVSwitchComponent components = 3; // Components to update (empty = all)
285-
repeated FirmwareTarget targets = 4; // Unregistered switches (auto-registered before update)
301+
repeated FirmwareTarget targets = 4; // Unregistered switches (auto-registered before update); mutually exclusive with `switch_uuids`. Processed best-effort.
286302
}
287303

288-
// QueueUpdatesResponse returns the results for each switch.
304+
// QueueUpdatesResponse returns the results for each switch. Length and
305+
// ordering correspond to whichever of `switch_uuids` or `targets` was
306+
// populated in the originating request (the two are mutually exclusive).
307+
// Each entry reports its own outcome; see QueueUpdatesRequest for the
308+
// best-effort contract.
289309
message QueueUpdatesResponse {
290310
repeated QueueUpdateResult results = 1;
291311
}
292312

293313
// QueueUpdateResult contains the result for a single switch update.
294314
message QueueUpdateResult {
295-
string switch_uuid = 1; // UUID of the switch
296-
StatusCode status = 2; // SUCCESS or error
297-
string error = 3; // Error message if status != SUCCESS
315+
string switch_uuid = 1; // UUID of the switch
316+
StatusCode status = 2; // SUCCESS or error; per-item outcome under QueueUpdatesRequest's best-effort contract
317+
string error = 3; // Error message if status != SUCCESS
298318
repeated FirmwareUpdateInfo updates = 4; // Queued updates if successful
299-
string bmc_ip = 5; // Set for FirmwareTarget responses; empty for UUID-based
319+
string bmc_ip = 5; // Set for FirmwareTarget responses; empty for UUID-based
300320
}
301321

302322
// GetUpdateRequest gets a specific update by ID.

nvswitch-manager/internal/service/server_impl.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,22 @@ func (s *NVSwitchManagerServerImpl) QueueUpdates(ctx context.Context, req *pb.Qu
422422
return nil, status.Error(codes.Unavailable, "firmware manager not initialized")
423423
}
424424

425+
// switch_uuids and targets are mutually exclusive: a request must populate
426+
// exactly one. Allowing both would risk queuing duplicate updates for any
427+
// device whose UUID is in switch_uuids and whose FirmwareTarget is in
428+
// targets, since the handler does not deduplicate across the two lists.
429+
// Empty requests are rejected as well — almost certainly a client bug.
430+
hasUUIDs := len(req.SwitchUuids) > 0
431+
hasTargets := len(req.Targets) > 0
432+
switch {
433+
case !hasUUIDs && !hasTargets:
434+
return nil, status.Error(codes.InvalidArgument,
435+
"request must populate either switch_uuids or targets")
436+
case hasUUIDs && hasTargets:
437+
return nil, status.Error(codes.InvalidArgument,
438+
"request must populate only one of switch_uuids or targets, not both")
439+
}
440+
425441
// Convert proto components to domain components
426442
var components []nvswitch.Component
427443
for _, c := range req.Components {
@@ -454,6 +470,14 @@ func (s *NVSwitchManagerServerImpl) QueueUpdates(ctx context.Context, req *pb.Qu
454470

455471
// Direct path: auto-register unregistered targets, then queue updates
456472
for _, target := range req.Targets {
473+
if target == nil {
474+
results = append(results, &pb.QueueUpdateResult{
475+
Status: pb.StatusCode_INVALID_ARGUMENT,
476+
Error: "target is nil",
477+
})
478+
continue
479+
}
480+
457481
result := &pb.QueueUpdateResult{
458482
BmcIp: target.BmcIp,
459483
}

nvswitch-manager/internal/service/server_impl_test.go

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,13 @@ import (
2222
"testing"
2323

2424
pb "github.com/NVIDIA/ncx-infra-controller-rest/nvswitch-manager/internal/proto/v1"
25+
"github.com/NVIDIA/ncx-infra-controller-rest/nvswitch-manager/pkg/firmwaremanager"
2526
"github.com/NVIDIA/ncx-infra-controller-rest/nvswitch-manager/pkg/redfish"
2627

2728
"github.com/stretchr/testify/assert"
2829
"github.com/stretchr/testify/require"
30+
"google.golang.org/grpc/codes"
31+
"google.golang.org/grpc/status"
2932
)
3033

3134
func TestResetTarget_InvalidIP(t *testing.T) {
@@ -206,18 +209,63 @@ func TestFirmwareTargetToRegisterRequest(t *testing.T) {
206209
assert.Equal(t, int32(22), req.Nvos.Port)
207210
}
208211

209-
func TestQueueUpdates_InvalidTarget(t *testing.T) {
212+
// TestQueueUpdates_NilFirmwareManager guards the early-return in QueueUpdates
213+
// that protects against being called before the firmware manager has been
214+
// initialized (e.g. when persistent mode is configured but the firmware
215+
// packages directory is unset). Per-target validation is exercised by the
216+
// TestValidateFirmwareTarget_* tests; driving end-to-end validation through
217+
// the handler would require a non-nil fwm and is left for handler-level
218+
// tests with a mocked FirmwareManager.
219+
func TestQueueUpdates_NilFirmwareManager(t *testing.T) {
210220
srv := &NVSwitchManagerServerImpl{}
211-
// fwm is nil, but the target validation should fail before reaching fwm
212-
// We can't test the full flow without a fwm, but we can test validation errors
213-
// by checking that the handler returns results (not a gRPC error) for invalid targets.
214221

215-
// Without fwm, QueueUpdates returns "firmware manager not initialized"
216-
// So we only test validateFirmwareTarget directly here.
217-
target := validFirmwareTarget()
218-
target.BmcIp = "invalid"
219-
err := validateFirmwareTarget(target)
220-
assert.Error(t, err)
222+
resp, err := srv.QueueUpdates(context.Background(), &pb.QueueUpdatesRequest{})
223+
224+
require.Error(t, err)
225+
assert.Nil(t, resp)
226+
st, ok := status.FromError(err)
227+
require.True(t, ok, "expected a gRPC status error, got %T", err)
228+
assert.Equal(t, codes.Unavailable, st.Code())
229+
assert.Contains(t, st.Message(), "firmware manager not initialized")
230+
}
231+
232+
// TestQueueUpdates_MutualExclusivity locks in the contract that
233+
// QueueUpdates rejects requests violating the switch_uuids/targets
234+
// mutual-exclusivity constraint. See QueueUpdatesRequest in
235+
// nvswitch-manager.proto for the full contract documentation.
236+
func TestQueueUpdates_MutualExclusivity(t *testing.T) {
237+
cases := map[string]struct {
238+
req *pb.QueueUpdatesRequest
239+
wantMessage string
240+
}{
241+
"empty request rejected (no work to do)": {
242+
req: &pb.QueueUpdatesRequest{},
243+
wantMessage: "either switch_uuids or targets",
244+
},
245+
"both populated rejected (would risk duplicate updates)": {
246+
req: &pb.QueueUpdatesRequest{
247+
SwitchUuids: []string{"00000000-0000-0000-0000-000000000001"},
248+
Targets: []*pb.FirmwareTarget{validFirmwareTarget()},
249+
},
250+
wantMessage: "not both",
251+
},
252+
}
253+
254+
for name, tc := range cases {
255+
t.Run(name, func(t *testing.T) {
256+
// Non-nil fwm so we get past the Unavailable guard. The
257+
// mutual-exclusivity validation rejects before any fwm method
258+
// is invoked, so a zero-value pointer is sufficient.
259+
srv := &NVSwitchManagerServerImpl{fwm: &firmwaremanager.FirmwareManager{}}
260+
261+
resp, err := srv.QueueUpdates(context.Background(), tc.req)
221262

222-
_ = srv // suppress unused
263+
require.Error(t, err)
264+
assert.Nil(t, resp)
265+
st, ok := status.FromError(err)
266+
require.True(t, ok, "expected a gRPC status error, got %T", err)
267+
assert.Equal(t, codes.InvalidArgument, st.Code())
268+
assert.Contains(t, st.Message(), tc.wantMessage)
269+
})
270+
}
223271
}

nvswitch-manager/internal/service/service.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,10 @@ type Service struct {
5656
func New(ctx context.Context, c Config) (*Service, error) {
5757
// Connect to database first if configured (needed for persistent registry)
5858
var db *bun.DB
59-
if c.DataStoreType == nvswitchmanager.DatastoreTypePersistent && c.DBConf.Host != "" {
59+
if c.DataStoreType == nvswitchmanager.DatastoreTypePersistent {
60+
if c.DBConf.Host == "" {
61+
return nil, fmt.Errorf("DB host is required for persistent mode")
62+
}
6063
pg, err := postgres.New(ctx, c.DBConf)
6164
if err != nil {
6265
return nil, fmt.Errorf("database connection required for persistent mode: %w", err)

nvswitch-manager/pkg/credentials/inmemory.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package credentials
2020
import (
2121
"context"
2222
"errors"
23+
"fmt"
2324
"net"
2425
"sync"
2526

@@ -86,6 +87,10 @@ func (m *InMemoryCredentialManager) GetBMC(ctx context.Context, mac net.Hardware
8687
// PutBMC stores the BMC credential for mac. If an identical entry exists, this is a no-op.
8788
// If a different entry exists, the new value overwrites (with a warning log).
8889
func (m *InMemoryCredentialManager) PutBMC(ctx context.Context, mac net.HardwareAddr, cred *credential.Credential) error {
90+
if cred == nil {
91+
return fmt.Errorf("BMC credential for %s is nil", mac)
92+
}
93+
8994
m.mu.Lock()
9095
defer m.mu.Unlock()
9196
key := m.bmcKey(mac)
@@ -102,6 +107,10 @@ func (m *InMemoryCredentialManager) PutBMC(ctx context.Context, mac net.Hardware
102107

103108
// PatchBMC updates the BMC credential for mac (replaces current value).
104109
func (m *InMemoryCredentialManager) PatchBMC(ctx context.Context, mac net.HardwareAddr, cred *credential.Credential) error {
110+
if cred == nil {
111+
return fmt.Errorf("BMC credential for %s is nil", mac)
112+
}
113+
105114
m.mu.Lock()
106115
defer m.mu.Unlock()
107116
key := m.bmcKey(mac)
@@ -144,6 +153,10 @@ func (m *InMemoryCredentialManager) GetNVOS(ctx context.Context, mac net.Hardwar
144153
// PutNVOS stores the NVOS credential for mac. If an identical entry exists, this is a no-op.
145154
// If a different entry exists, the new value overwrites (with a warning log).
146155
func (m *InMemoryCredentialManager) PutNVOS(ctx context.Context, mac net.HardwareAddr, cred *credential.Credential) error {
156+
if cred == nil {
157+
return fmt.Errorf("NVOS credential for %s is nil", mac)
158+
}
159+
147160
m.mu.Lock()
148161
defer m.mu.Unlock()
149162
key := m.nvosKey(mac)
@@ -160,6 +173,10 @@ func (m *InMemoryCredentialManager) PutNVOS(ctx context.Context, mac net.Hardwar
160173

161174
// PatchNVOS updates the NVOS credential for mac (replaces current value).
162175
func (m *InMemoryCredentialManager) PatchNVOS(ctx context.Context, mac net.HardwareAddr, cred *credential.Credential) error {
176+
if cred == nil {
177+
return fmt.Errorf("NVOS credential for %s is nil", mac)
178+
}
179+
163180
m.mu.Lock()
164181
defer m.mu.Unlock()
165182
key := m.nvosKey(mac)

nvswitch-manager/pkg/credentials/inmemory_test.go

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,15 @@ func TestInMemoryStartStop(t *testing.T) {
6161

6262
func TestInMemoryBMCPutGet(t *testing.T) {
6363
testCases := map[string]struct {
64-
initialPut bool
65-
putMAC string
66-
putCred *credential.Credential
67-
getMAC string
68-
wantErr bool
69-
wantUser string
70-
wantPass string
71-
samePtr bool
64+
initialPut bool
65+
putMAC string
66+
putCred *credential.Credential
67+
putSameAgain bool // if true, immediately re-put a fresh-but-equal credential to exercise the idempotent skip path
68+
getMAC string
69+
wantErr bool
70+
wantUser string
71+
wantPass string
72+
samePtr bool
7273
}{
7374
"get existing valid BMC credential": {
7475
initialPut: true,
@@ -93,14 +94,15 @@ func TestInMemoryBMCPutGet(t *testing.T) {
9394
wantErr: true,
9495
},
9596
"put same credential is no-op": {
96-
initialPut: true,
97-
putMAC: "aa:bb:cc:dd:ee:ff",
98-
putCred: newCredential("user1", "p1"),
99-
getMAC: "aa:bb:cc:dd:ee:ff",
100-
wantErr: false,
101-
wantUser: "user1",
102-
wantPass: "p1",
103-
samePtr: true,
97+
initialPut: true,
98+
putMAC: "aa:bb:cc:dd:ee:ff",
99+
putCred: newCredential("user1", "p1"),
100+
putSameAgain: true,
101+
getMAC: "aa:bb:cc:dd:ee:ff",
102+
wantErr: false,
103+
wantUser: "user1",
104+
wantPass: "p1",
105+
samePtr: true, // second put with equal-but-fresh pointer must be skipped, leaving original pointer in place
104106
},
105107
}
106108

@@ -113,9 +115,12 @@ func TestInMemoryBMCPutGet(t *testing.T) {
113115
if tc.initialPut {
114116
mac := parseMAC(t, tc.putMAC)
115117
assert.NoError(t, mgr.PutBMC(ctx, mac, tc.putCred))
116-
// For the idempotent scenario, put the same credential again
117-
if name == "put same credential is no-op" {
118-
assert.NoError(t, mgr.PutBMC(ctx, mac, newCredential("user1", "p1")))
118+
// Exercise the idempotent skip path with a fresh-but-equal
119+
// credential pointer. samePtr below verifies the original
120+
// pointer survived (i.e. the second Put was actually skipped,
121+
// not just rewritten with the same values).
122+
if tc.putSameAgain {
123+
assert.NoError(t, mgr.PutBMC(ctx, mac, newCredential(tc.putCred.User, tc.putCred.Password.Value)))
119124
}
120125
}
121126

0 commit comments

Comments
 (0)