Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions pkg/controller/pxcbackup/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,17 +197,28 @@ func (r *ReconcilePerconaXtraDBClusterBackup) Reconcile(ctx context.Context, req
}()

if err := r.checkDeadlines(ctx, cluster, cr); err != nil {
// There was an error with checking the deadline itself, we must retry.
if !errors.Is(err, errDeadlineExceeded) {
return reconcile.Result{}, errors.Wrap(err, "check deadlines")
}
// If a deadline was exceeded, we must set the status to failed so the backup can stop.
if err := r.setFailedStatus(ctx, cr, err); err != nil {
return rr, errors.Wrap(err, "update status")
return reconcile.Result{}, errors.Wrap(err, "update status")
}

if errors.Is(err, errSuspendedDeadlineExceeded) {
log.Info("cleaning up suspended backup job")
if err := r.cleanUpSuspendedJob(ctx, cluster, cr); err != nil {
if err := r.cleanUpJob(ctx, cluster, cr); err != nil {
return reconcile.Result{}, errors.Wrap(err, "clean up suspended job")
}
}

if errors.Is(err, errRunningDeadlineExceeded) {
log.Info("running deadline exceeded, deleting the job and its pods")
if err := r.cleanUpJob(ctx, cluster, cr); err != nil {
return reconcile.Result{}, errors.Wrap(err, "clean up running deadline exceeded job")
}
}
return reconcile.Result{}, nil
}

Expand Down Expand Up @@ -863,7 +874,7 @@ func (r *ReconcilePerconaXtraDBClusterBackup) getBackupJob(
return job, nil
}

func (r *ReconcilePerconaXtraDBClusterBackup) cleanUpSuspendedJob(
func (r *ReconcilePerconaXtraDBClusterBackup) cleanUpJob(
ctx context.Context,
cluster *api.PerconaXtraDBCluster,
cr *api.PerconaXtraDBClusterBackup,
Expand All @@ -873,7 +884,8 @@ func (r *ReconcilePerconaXtraDBClusterBackup) cleanUpSuspendedJob(
return errors.Wrap(err, "get job")
}

if err := r.client.Delete(ctx, job); err != nil {
// Set propagationPolicy=background because default deletion preserves pods.
if err := r.client.Delete(ctx, job, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil {
return errors.Wrap(err, "delete job")
}

Expand Down
276 changes: 276 additions & 0 deletions pkg/controller/pxcbackup/controller_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
package pxcbackup

import (
"context"
"sync"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/percona/percona-xtradb-cluster-operator/clientcmd"
pxcv1 "github.com/percona/percona-xtradb-cluster-operator/pkg/apis/pxc/v1"
"github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/backup"
"github.com/percona/percona-xtradb-cluster-operator/pkg/version"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

var _ = Describe("PerconaXtraDBClusterBackup", Ordered, func() {
ctx := context.Background()
const ns = "pxc"
namespace := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: ns,
Namespace: ns,
},
}
clusterName := "cluster1"
cluster := &pxcv1.PerconaXtraDBCluster{}

reconciler := &ReconcilePerconaXtraDBClusterBackup{}
BeforeAll(func() {
By("Creating the Namespace to perform the tests")
err := k8sClient.Create(ctx, namespace)
Expect(err).To(Not(HaveOccurred()))

By("Creating a PXC Cluster to perform the tests")
cluster, err = readDefaultCR(clusterName, ns)
Expect(err).To(Not(HaveOccurred()))

err = k8sClient.Create(ctx, cluster)
Expect(err).To(Not(HaveOccurred()))

mockPXCReadyStatus(ctx, cluster)
reconciler = newTestReconciler()
})

AfterAll(func() {
// TODO(user): Attention if you improve this code by adding other context test you MUST
// be aware of the current delete namespace limitations. More info: https://book.kubebuilder.io/reference/envtest.html#testing-considerations
By("Deleting the Namespace to perform the tests")
_ = k8sClient.Delete(ctx, namespace)

By("Deleting the PXC Cluster to perform the tests")
cluster, err := readDefaultCR(clusterName, ns)
Expect(err).To(Not(HaveOccurred()))
err = k8sClient.Delete(ctx, cluster)
Expect(err).To(Not(HaveOccurred()))
})

It("Should reconcile backup", func() {
pxcBackup, err := newBackup("backup1", ns)
Expect(err).To(Not(HaveOccurred()))

err = k8sClient.Create(ctx, pxcBackup)
Expect(err).To(Not(HaveOccurred()))

_, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: types.NamespacedName{
Name: pxcBackup.Name,
Namespace: pxcBackup.Namespace,
}})
Expect(err).To(Succeed())

// Check that a job was created
bcp := backup.New(cluster)
job := bcp.Job(pxcBackup, cluster)
err = k8sClient.Get(ctx, types.NamespacedName{
Name: job.Name,
Namespace: job.Namespace,
}, job)
Expect(err).To(Not(HaveOccurred()))
})
})

var _ = Describe("Error checking deadlines", Ordered, func() {
ctx := context.Background()
const ns = "pxc-deadline-error"
namespace := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: ns,
Namespace: ns,
},
}
clusterName := "cluster1"
cluster := &pxcv1.PerconaXtraDBCluster{}
reconciler := &ReconcilePerconaXtraDBClusterBackup{}
BeforeAll(func() {
By("Creating the Namespace to perform the tests")
err := k8sClient.Create(ctx, namespace)
Expect(err).To(Not(HaveOccurred()))

By("Creating a PXC Cluster to perform the tests")
cluster, err = readDefaultCR(clusterName, ns)
Expect(err).To(Not(HaveOccurred()))

err = k8sClient.Create(ctx, cluster)
Expect(err).To(Not(HaveOccurred()))

mockPXCReadyStatus(ctx, cluster)
reconciler = newTestReconciler()
})

AfterAll(func() {
// TODO(user): Attention if you improve this code by adding other context test you MUST
// be aware of the current delete namespace limitations. More info: https://book.kubebuilder.io/reference/envtest.html#testing-considerations
By("Deleting the Namespace to perform the tests")
_ = k8sClient.Delete(ctx, namespace)

By("Deleting the PXC Cluster to perform the tests")
cluster, err := readDefaultCR(clusterName, ns)
Expect(err).To(Not(HaveOccurred()))
err = k8sClient.Delete(ctx, cluster)
Expect(err).To(Not(HaveOccurred()))
})

It("Should not fail when error checking deadline", func() {
pxcBackup, err := newBackup("backup1", ns)
Expect(err).To(Not(HaveOccurred()))
pxcBackup.Spec.RunningDeadlineSeconds = ptr.To(int64(300))

pxcBackupReq := reconcile.Request{NamespacedName: types.NamespacedName{
Name: pxcBackup.Name,
Namespace: pxcBackup.Namespace,
}}

err = k8sClient.Create(ctx, pxcBackup)
Expect(err).To(Not(HaveOccurred()))

_, err = reconciler.Reconcile(ctx, pxcBackupReq)
Expect(err).To(Succeed())

err = k8sClient.Get(ctx, pxcBackupReq.NamespacedName, pxcBackup)
Expect(err).To(Not(HaveOccurred()))
Expect(pxcBackup.Status.State).To(Equal(pxcv1.BackupStarting))

// We will delete the job, this will cause the deadline check to fail
job := backup.New(cluster).Job(pxcBackup, cluster)
err = k8sClient.Delete(ctx, job, client.PropagationPolicy(metav1.DeletePropagationBackground))
Expect(err).To(Not(HaveOccurred()))

_, err = reconciler.Reconcile(ctx, pxcBackupReq)
Expect(err).To((HaveOccurred()))
Expect(err.Error()).To(ContainSubstring("check deadlines"))

// Make sure that the backup is not marked as failed
err = k8sClient.Get(ctx, pxcBackupReq.NamespacedName, pxcBackup)
Expect(pxcBackup.Status.State).To(Equal(pxcv1.BackupStarting))
})
})

var _ = Describe("Backup Job deleted when running deadline is exceeded", Ordered, func() {
ctx := context.Background()
const ns = "pxc-job-running-deadline-exceeded"
namespace := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: ns,
Namespace: ns,
},
}
clusterName := "cluster1"
cluster := &pxcv1.PerconaXtraDBCluster{}
reconciler := &ReconcilePerconaXtraDBClusterBackup{}
BeforeAll(func() {
By("Creating the Namespace to perform the tests")
err := k8sClient.Create(ctx, namespace)
Expect(err).To(Not(HaveOccurred()))

By("Creating a PXC Cluster to perform the tests")
cluster, err = readDefaultCR(clusterName, ns)
Expect(err).To(Not(HaveOccurred()))

err = k8sClient.Create(ctx, cluster)
Expect(err).To(Not(HaveOccurred()))

mockPXCReadyStatus(ctx, cluster)
reconciler = newTestReconciler()
})

AfterAll(func() {
// TODO(user): Attention if you improve this code by adding other context test you MUST
// be aware of the current delete namespace limitations. More info: https://book.kubebuilder.io/reference/envtest.html#testing-considerations
By("Deleting the Namespace to perform the tests")
_ = k8sClient.Delete(ctx, namespace)

By("Deleting the PXC Cluster to perform the tests")
cluster, err := readDefaultCR(clusterName, ns)
Expect(err).To(Not(HaveOccurred()))
err = k8sClient.Delete(ctx, cluster)
Expect(err).To(Not(HaveOccurred()))
})

It("Should delete the backup job when running deadline is exceeded", func() {
pxcBackup, err := newBackup("backup1", ns)
Expect(err).To(Not(HaveOccurred()))
pxcBackup.Spec.RunningDeadlineSeconds = ptr.To(int64(5))

pxcBackupReq := reconcile.Request{NamespacedName: types.NamespacedName{
Name: pxcBackup.Name,
Namespace: pxcBackup.Namespace,
}}

err = k8sClient.Create(ctx, pxcBackup)
Expect(err).To(Not(HaveOccurred()))

_, err = reconciler.Reconcile(ctx, pxcBackupReq)
Expect(err).To(Succeed())

err = k8sClient.Get(ctx, pxcBackupReq.NamespacedName, pxcBackup)
Expect(err).To(Not(HaveOccurred()))
Expect(pxcBackup.Status.State).To(Equal(pxcv1.BackupStarting))

time.Sleep(6 * time.Second)

_, err = reconciler.Reconcile(ctx, pxcBackupReq)
Expect(err).To(Not(HaveOccurred()))

// Make sure that the backup is marked as failed
err = k8sClient.Get(ctx, pxcBackupReq.NamespacedName, pxcBackup)
Expect(err).To(Not(HaveOccurred()))
Expect(pxcBackup.Status.State).To(Equal(pxcv1.BackupFailed))
Expect(pxcBackup.Status.Error).To(ContainSubstring("running deadline seconds exceeded"))

// Make sure that the job is deleted
job := backup.New(cluster).Job(pxcBackup, cluster)
err = k8sClient.Get(ctx, types.NamespacedName{
Name: job.Name,
Namespace: job.Namespace,
}, job)
Expect(err).To(HaveOccurred())
Expect(k8serrors.IsNotFound(err)).To(BeTrue())
})
})

func newBackup(name, ns string) (*pxcv1.PerconaXtraDBClusterBackup, error) {
bkp, err := readDefaultBackup(name, ns)
if err != nil {
return nil, err
}
bkp.Spec.PXCCluster = "cluster1"
return bkp, nil
}

func newTestReconciler() *ReconcilePerconaXtraDBClusterBackup {
cli, _ := clientcmd.NewClient()
return &ReconcilePerconaXtraDBClusterBackup{
client: k8sClient,
scheme: k8sClient.Scheme(),
serverVersion: &version.ServerVersion{
Platform: version.PlatformKubernetes,
},
clientcmd: cli,
chLimit: make(chan struct{}, 10),
bcpDeleteInProgress: new(sync.Map),
}
}

func mockPXCReadyStatus(ctx context.Context, cluster *pxcv1.PerconaXtraDBCluster) {
cluster.Status.Status = pxcv1.AppStateReady
cluster.Status.PXC.Ready = cluster.Spec.PXC.Size
err := k8sClient.Status().Update(ctx, cluster)
Expect(err).To(Not(HaveOccurred()))
}
8 changes: 5 additions & 3 deletions pkg/controller/pxcbackup/deadline.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ import (
api "github.com/percona/percona-xtradb-cluster-operator/pkg/apis/pxc/v1"
)

var errDeadlineExceeded = errors.New("deadline exceeded")

var (
errSuspendedDeadlineExceeded = errors.New("suspended deadline seconds exceeded")
errStartingDeadlineExceeded = errors.New("starting deadline seconds exceeded")
errRunningDeadlineExceeded = errors.New("running deadline seconds exceeded")
errSuspendedDeadlineExceeded = errors.Wrap(errDeadlineExceeded, "suspended deadline seconds exceeded")
errStartingDeadlineExceeded = errors.Wrap(errDeadlineExceeded, "starting deadline seconds exceeded")
errRunningDeadlineExceeded = errors.Wrap(errDeadlineExceeded, "running deadline seconds exceeded")
)

func (r *ReconcilePerconaXtraDBClusterBackup) checkDeadlines(ctx context.Context, cluster *api.PerconaXtraDBCluster, cr *api.PerconaXtraDBClusterBackup) error {
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/pxcbackup/deadline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ var _ = Describe("Suspended deadline", func() {
err = r.checkSuspendedDeadline(context.Background(), cluster, cr)
Expect(err).To(HaveOccurred())

err = r.cleanUpSuspendedJob(context.Background(), cluster, cr)
err = r.cleanUpJob(context.Background(), cluster, cr)
Expect(err).NotTo(HaveOccurred())

j := new(batchv1.Job)
Expand Down
Loading
Loading