Skip to content

Commit d7540ec

Browse files
committed
Introduce RetryOnFailure lifecycle management strategy
Signed-off-by: Matheus Pimenta <[email protected]>
1 parent 079ae1b commit d7540ec

File tree

9 files changed

+493
-7
lines changed

9 files changed

+493
-7
lines changed

api/v2/helmrelease_types.go

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,21 @@ type Remediation interface {
437437
RetriesExhausted(hr *HelmRelease) bool
438438
}
439439

440+
// Strategy defines a consistent interface for InstallStrategy and
441+
// UpgradeStrategy.
442+
// +kubebuilder:object:generate=false
443+
type Strategy interface {
444+
GetRetry() Retry
445+
}
446+
447+
// Retry defines a consistent interface for retry strategies from
448+
// InstallStrategy and UpgradeStrategy.
449+
// +kubebuilder:object:generate=false
450+
type Retry interface {
451+
GetRetryInterval() time.Duration
452+
GetAction() ReleaseAction
453+
}
454+
440455
// Install holds the configuration for Helm install actions performed for this
441456
// HelmRelease.
442457
type Install struct {
@@ -448,6 +463,11 @@ type Install struct {
448463
// +optional
449464
Timeout *metav1.Duration `json:"timeout,omitempty"`
450465

466+
// Strategy defines the install strategy to use for this HelmRelease.
467+
// Defaults to 'RemediateOnFailure'.
468+
// +optional
469+
Strategy *InstallStrategy `json:"strategy,omitempty"`
470+
451471
// Remediation holds the remediation configuration for when the Helm install
452472
// action for the HelmRelease fails. The default is to not perform any action.
453473
// +optional
@@ -541,6 +561,51 @@ func (in Install) GetRemediation() Remediation {
541561
return *in.Remediation
542562
}
543563

564+
// GetRetry returns the configured retry strategy for the Helm install
565+
// action.
566+
func (in Install) GetRetry() Retry {
567+
if in.Strategy == nil ||
568+
(in.Strategy.Name != string(ActionStrategyRetryOnFailure) &&
569+
in.Strategy.Name != string(ActionStrategyRetryWithUpgradeOnFailure)) {
570+
return nil
571+
}
572+
return in.Strategy
573+
}
574+
575+
// InstallStrategy holds the configuration for Helm install strategy.
576+
// +kubebuilder:validation:XValidation:rule="!has(self.retryInterval) || self.name != 'RemediateOnFailure'", message=".retryInterval cannot be set when .name is 'RemediateOnFailure'"
577+
type InstallStrategy struct {
578+
// Name of the install strategy.
579+
// +kubebuilder:validation:Enum=RemediateOnFailure;RetryOnFailure;RetryWithUpgradeOnFailure
580+
// +required
581+
Name string `json:"name"`
582+
583+
// RetryInterval is the interval at which to retry a failed install.
584+
// Can be used only when Name is set to RetryOnFailure.
585+
// Defaults to '5m'.
586+
// +kubebuilder:validation:Type=string
587+
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ms|s|m|h))+$"
588+
// +optional
589+
RetryInterval *metav1.Duration `json:"retryInterval,omitempty"`
590+
}
591+
592+
// GetRetryInterval returns the configured retry interval for the Helm install
593+
// action, or the default.
594+
func (in InstallStrategy) GetRetryInterval() time.Duration {
595+
if in.RetryInterval == nil {
596+
return 5 * time.Minute
597+
}
598+
return in.RetryInterval.Duration
599+
}
600+
601+
// GetAction returns the retry action for the Helm install action.
602+
func (in InstallStrategy) GetAction() ReleaseAction {
603+
if in.Name == string(ActionStrategyRetryWithUpgradeOnFailure) {
604+
return ReleaseActionUpgrade
605+
}
606+
return ReleaseActionInstall
607+
}
608+
544609
// InstallRemediation holds the configuration for Helm install remediation.
545610
type InstallRemediation struct {
546611
// Retries is the number of retries that should be attempted on failures before
@@ -631,6 +696,11 @@ type Upgrade struct {
631696
// +optional
632697
Timeout *metav1.Duration `json:"timeout,omitempty"`
633698

699+
// Strategy defines the upgrade strategy to use for this HelmRelease.
700+
// Defaults to 'RemediateOnFailure'.
701+
// +optional
702+
Strategy *UpgradeStrategy `json:"strategy,omitempty"`
703+
634704
// Remediation holds the remediation configuration for when the Helm upgrade
635705
// action for the HelmRelease fails. The default is to not perform any action.
636706
// +optional
@@ -719,6 +789,46 @@ func (in Upgrade) GetRemediation() Remediation {
719789
return *in.Remediation
720790
}
721791

792+
// GetRetry returns the configured retry strategy for the Helm upgrade
793+
// action.
794+
func (in Upgrade) GetRetry() Retry {
795+
if in.Strategy == nil || in.Strategy.Name != string(ActionStrategyRetryOnFailure) {
796+
return nil
797+
}
798+
return in.Strategy
799+
}
800+
801+
// UpgradeStrategy holds the configuration for Helm upgrade strategy.
802+
// +kubebuilder:validation:XValidation:rule="!has(self.retryInterval) || self.name == 'RetryOnFailure'", message=".retryInterval can only be set when .name is 'RetryOnFailure'"
803+
type UpgradeStrategy struct {
804+
// Name of the upgrade strategy.
805+
// +kubebuilder:validation:Enum=RemediateOnFailure;RetryOnFailure
806+
// +required
807+
Name string `json:"name"`
808+
809+
// RetryInterval is the interval at which to retry a failed upgrade.
810+
// Can be used only when Name is set to RetryOnFailure.
811+
// Defaults to '5m'.
812+
// +kubebuilder:validation:Type=string
813+
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ms|s|m|h))+$"
814+
// +optional
815+
RetryInterval *metav1.Duration `json:"retryInterval,omitempty"`
816+
}
817+
818+
// GetRetryInterval returns the configured retry interval for the Helm upgrade
819+
// action, or the default.
820+
func (in UpgradeStrategy) GetRetryInterval() time.Duration {
821+
if in.RetryInterval == nil {
822+
return 5 * time.Minute
823+
}
824+
return in.RetryInterval.Duration
825+
}
826+
827+
// GetAction returns the retry action for the Helm upgrade action.
828+
func (in UpgradeStrategy) GetAction() ReleaseAction {
829+
return ReleaseActionUpgrade
830+
}
831+
722832
// UpgradeRemediation holds the configuration for Helm upgrade remediation.
723833
type UpgradeRemediation struct {
724834
// Retries is the number of retries that should be attempted on failures before
@@ -791,6 +901,23 @@ func (in UpgradeRemediation) RetriesExhausted(hr *HelmRelease) bool {
791901
return in.Retries >= 0 && in.GetFailureCount(hr) > int64(in.Retries)
792902
}
793903

904+
// ActionStrategyName is a valid name for an action strategy.
905+
type ActionStrategyName string
906+
907+
const (
908+
// ActionStrategyRemediateOnFailure is the action strategy name for
909+
// remediate on failure.
910+
ActionStrategyRemediateOnFailure ActionStrategyName = "RemediateOnFailure"
911+
912+
// ActionStrategyRetryOnFailure is the action strategy name for retry on
913+
// failure.
914+
ActionStrategyRetryOnFailure ActionStrategyName = "RetryOnFailure"
915+
916+
// ActionStrategyRetryWithUpgradeOnFailure is the action strategy name for retry
917+
// with an upgrade following an install failure.
918+
ActionStrategyRetryWithUpgradeOnFailure ActionStrategyName = "RetryWithUpgradeOnFailure"
919+
)
920+
794921
// RemediationStrategy returns the strategy to use to remediate a failed install
795922
// or upgrade.
796923
type RemediationStrategy string
@@ -1012,7 +1139,8 @@ type HelmReleaseStatus struct {
10121139
History Snapshots `json:"history,omitempty"`
10131140

10141141
// LastAttemptedReleaseAction is the last release action performed for this
1015-
// HelmRelease. It is used to determine the active remediation strategy.
1142+
// HelmRelease. It is used to determine the active retry or remediation
1143+
// strategy.
10161144
// +kubebuilder:validation:Enum=install;upgrade
10171145
// +optional
10181146
LastAttemptedReleaseAction ReleaseAction `json:"lastAttemptedReleaseAction,omitempty"`
@@ -1189,6 +1317,19 @@ func (in HelmRelease) GetActiveRemediation() Remediation {
11891317
}
11901318
}
11911319

1320+
// GetActiveRetry returns the active retry configuration for the
1321+
// HelmRelease.
1322+
func (in HelmRelease) GetActiveRetry() Retry {
1323+
switch in.Status.LastAttemptedReleaseAction {
1324+
case ReleaseActionInstall:
1325+
return in.GetInstall().GetRetry()
1326+
case ReleaseActionUpgrade:
1327+
return in.GetUpgrade().GetRetry()
1328+
default:
1329+
return nil
1330+
}
1331+
}
1332+
11921333
// GetRequeueAfter returns the duration after which the HelmRelease
11931334
// must be reconciled again.
11941335
func (in HelmRelease) GetRequeueAfter() time.Duration {

api/v2/zz_generated.deepcopy.go

Lines changed: 50 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,31 @@ spec:
445445
446446
Deprecated use CRD policy (`crds`) attribute with value `Skip` instead.
447447
type: boolean
448+
strategy:
449+
description: |-
450+
Strategy defines the install strategy to use for this HelmRelease.
451+
Defaults to 'RemediateOnFailure'.
452+
properties:
453+
name:
454+
description: Name of the install strategy.
455+
enum:
456+
- RemediateOnFailure
457+
- RetryOnFailure
458+
- RetryWithUpgradeOnFailure
459+
type: string
460+
retryInterval:
461+
description: |-
462+
RetryInterval is the interval at which to retry a failed install.
463+
Can be used only when Name is set to RetryOnFailure.
464+
Defaults to '5m'.
465+
pattern: ^([0-9]+(\.[0-9]+)?(ms|s|m|h))+$
466+
type: string
467+
required:
468+
- name
469+
type: object
470+
x-kubernetes-validations:
471+
- message: .retryInterval cannot be set when .name is 'RemediateOnFailure'
472+
rule: '!has(self.retryInterval) || self.name != ''RemediateOnFailure'''
448473
timeout:
449474
description: |-
450475
Timeout is the time to wait for any individual Kubernetes operation (like
@@ -912,6 +937,30 @@ spec:
912937
- uninstall
913938
type: string
914939
type: object
940+
strategy:
941+
description: |-
942+
Strategy defines the upgrade strategy to use for this HelmRelease.
943+
Defaults to 'RemediateOnFailure'.
944+
properties:
945+
name:
946+
description: Name of the upgrade strategy.
947+
enum:
948+
- RemediateOnFailure
949+
- RetryOnFailure
950+
type: string
951+
retryInterval:
952+
description: |-
953+
RetryInterval is the interval at which to retry a failed upgrade.
954+
Can be used only when Name is set to RetryOnFailure.
955+
Defaults to '5m'.
956+
pattern: ^([0-9]+(\.[0-9]+)?(ms|s|m|h))+$
957+
type: string
958+
required:
959+
- name
960+
type: object
961+
x-kubernetes-validations:
962+
- message: .retryInterval can only be set when .name is 'RetryOnFailure'
963+
rule: '!has(self.retryInterval) || self.name == ''RetryOnFailure'''
915964
timeout:
916965
description: |-
917966
Timeout is the time to wait for any individual Kubernetes operation (like
@@ -1178,7 +1227,8 @@ spec:
11781227
lastAttemptedReleaseAction:
11791228
description: |-
11801229
LastAttemptedReleaseAction is the last release action performed for this
1181-
HelmRelease. It is used to determine the active remediation strategy.
1230+
HelmRelease. It is used to determine the active retry or remediation
1231+
strategy.
11821232
enum:
11831233
- install
11841234
- upgrade

0 commit comments

Comments
 (0)