Skip to content

Commit 11a09a0

Browse files
authored
Move to json storage, address pblindex issue (#721)
This patch changes the way we persist the data on disk when running Calico/VPP. Instead of using struc and binary format we transition to json files. Size should not be an issue as number of pods per node are typically low (~100). This will make troubleshooting easier and errors clearer when parsing fails. We thus remove the /bin/debug troubleshooting utility as the data format is not human readable. Doing this, we address an issue where PBL indexes were reused upon dataplane restart, as they were stored in a list. We now will use a map to retain the containerIP mapping. We also split the configuration from runtime spec in LocalPodSpec and add a step to clear it when corresponding VRFs are not found in VPP. Finally we address an issue where uRPF was not properly set up for ipv6. Signed-off-by: Nathan Skrzypczak <[email protected]>
1 parent bf40e29 commit 11a09a0

30 files changed

+1071
-1083
lines changed

calico-vpp-agent/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ FROM ubuntu:22.04
33
LABEL maintainer="[email protected]"
44

55
ADD bin/gobgp /bin/gobgp
6-
ADD bin/debug /bin/debug
76
ADD version /etc/calicovppversion
87
ADD bin/felix-api-proxy /bin/felix-api-proxy
98
ADD bin/calico-vpp-agent /bin/calico-vpp-agent

calico-vpp-agent/Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ felix-api-proxy: bin
1818

1919
build: felix-api-proxy bin
2020
${DOCKER_RUN} go build -o ./bin/calico-vpp-agent ./cmd
21-
${DOCKER_RUN} go build -o ./bin/debug ./cmd/debug-state
2221

2322
gobgp: bin
2423
${DOCKER_RUN} go build -o ./bin/gobgp github.com/osrg/gobgp/v3/cmd/gobgp/

calico-vpp-agent/cmd/calico_vpp_dataplane.go

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"context"
2020
"os"
2121
"os/signal"
22+
"runtime/coverage"
2223
"syscall"
2324
"time"
2425

@@ -219,20 +220,41 @@ func main() {
219220

220221
log.Infof("Agent started")
221222

222-
interruptSignalChannel := make(chan os.Signal, 2)
223-
signal.Notify(interruptSignalChannel, os.Interrupt, syscall.SIGTERM)
224-
225-
usr1SignalChannel := make(chan os.Signal, 2)
226-
signal.Notify(usr1SignalChannel, syscall.SIGUSR1)
223+
sigChan := make(chan os.Signal, 2)
224+
signal.Notify(sigChan,
225+
os.Interrupt,
226+
syscall.SIGTERM,
227+
syscall.SIGUSR1,
228+
syscall.SIGUSR2,
229+
)
227230

228231
select {
229-
case <-usr1SignalChannel:
230-
/* vpp-manager pokes us with USR1 if VPP terminates */
231-
log.Warnf("Vpp stopped, exiting...")
232-
t.Kill(errors.Errorf("Caught signal USR1"))
233-
case <-interruptSignalChannel:
234-
log.Infof("SIG received, exiting")
235-
t.Kill(errors.Errorf("Caught INT signal"))
232+
case sig := <-sigChan:
233+
switch sig {
234+
case os.Interrupt:
235+
fallthrough
236+
case syscall.SIGTERM:
237+
log.Infof("SIG received, exiting")
238+
t.Kill(errors.Errorf("Caught INT signal"))
239+
case syscall.SIGUSR1:
240+
// vpp-manager pokes us with USR1 if VPP terminates
241+
log.Warnf("Vpp stopped, exiting...")
242+
t.Kill(errors.Errorf("Caught signal USR1"))
243+
case syscall.SIGUSR2:
244+
// the USR2 signal outputs the coverage data,
245+
// provided the binary is compiled with -cover and
246+
// GOCOVERDIR is set. This allows us to not require
247+
// a proper binary termination in order to get coverage data.
248+
log.Warn("Received SIGUSR2, writing coverage")
249+
err := coverage.WriteCountersDir(os.Getenv("GOCOVERDIR"))
250+
if err != nil {
251+
log.WithError(err).Error("Could not write counters dir")
252+
}
253+
err = coverage.WriteMetaDir(os.Getenv("GOCOVERDIR"))
254+
if err != nil {
255+
log.WithError(err).Error("Could not write meta dir")
256+
}
257+
}
236258
case <-t.Dying():
237259
log.Errorf("tomb Dying %s", t.Err())
238260
}

calico-vpp-agent/cmd/debug-state/debug-state.go

Lines changed: 0 additions & 43 deletions
This file was deleted.

calico-vpp-agent/cni/cni_pod_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ var _ = Describe("Pod-related functionality of CNI", func() {
182182
Workload: &cniproto.WorkloadIDs{
183183
Annotations: map[string]string{
184184
// needed just for setting up steering of traffic to default Tun/Tap and to secondary Memif
185-
cni.VppAnnotationPrefix + cni.MemifPortAnnotation: fmt.Sprintf("tcp:%d-%d,udp:%d-%d",
185+
config.MemifPortAnnotation: fmt.Sprintf("tcp:%d-%d,udp:%d-%d",
186186
memifTCPPortStart, memifTCPPortEnd, memifUDPPortStart, memifUDPPortEnd),
187187
},
188188
},
@@ -418,7 +418,7 @@ var _ = Describe("Pod-related functionality of CNI", func() {
418418
Workload: &cniproto.WorkloadIDs{
419419
Annotations: map[string]string{
420420
// needed just for setting up steering of traffic to default Tun/Tap and to secondary Memif
421-
cni.VppAnnotationPrefix + cni.MemifPortAnnotation: fmt.Sprintf("tcp:%d-%d,udp:%d-%d",
421+
config.MemifPortAnnotation: fmt.Sprintf("tcp:%d-%d,udp:%d-%d",
422422
memifTCPPortStart, memifTCPPortEnd, memifUDPPortStart, memifUDPPortEnd),
423423
},
424424
},

0 commit comments

Comments
 (0)