Skip to content

Commit fa2498f

Browse files
committed
upgrade: Add support for retrying automatically without remediations
Signed-off-by: Matheus Pimenta <[email protected]>
1 parent 8555152 commit fa2498f

File tree

7 files changed

+294
-11
lines changed

7 files changed

+294
-11
lines changed

api/v2/helmrelease_types.go

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,21 @@ type Remediation interface {
437437
RetriesExhausted(hr *HelmRelease) bool
438438
}
439439

440+
// Strategy defines a consistent interface for InstallStrategy and
441+
// UpgradeStrategy.
442+
// +kubebuilder:object:generate=false
443+
type Strategy interface {
444+
GetRetry() Retry
445+
}
446+
447+
// Retry defines a consistent interface for retry strategies from
448+
// InstallStrategy and UpgradeStrategy.
449+
// +kubebuilder:object:generate=false
450+
type Retry interface {
451+
GetRetryInterval() time.Duration
452+
GetAction() ReleaseAction
453+
}
454+
440455
// Install holds the configuration for Helm install actions performed for this
441456
// HelmRelease.
442457
type Install struct {
@@ -631,6 +646,11 @@ type Upgrade struct {
631646
// +optional
632647
Timeout *metav1.Duration `json:"timeout,omitempty"`
633648

649+
// Strategy defines the upgrade strategy to use for this HelmRelease.
650+
// Defaults to 'RemediateOnFailure'.
651+
// +optional
652+
Strategy *UpgradeStrategy `json:"strategy,omitempty"`
653+
634654
// Remediation holds the remediation configuration for when the Helm upgrade
635655
// action for the HelmRelease fails. The default is to not perform any action.
636656
// +optional
@@ -719,6 +739,46 @@ func (in Upgrade) GetRemediation() Remediation {
719739
return *in.Remediation
720740
}
721741

742+
// GetRetry returns the configured retry strategy for the Helm upgrade
743+
// action.
744+
func (in Upgrade) GetRetry() Retry {
745+
if in.Strategy == nil || in.Strategy.Name != string(ActionStrategyRetryOnFailure) {
746+
return nil
747+
}
748+
return in.Strategy
749+
}
750+
751+
// UpgradeStrategy holds the configuration for Helm upgrade strategy.
752+
// +kubebuilder:validation:XValidation:rule="!has(self.retryInterval) || self.name == 'RetryOnFailure'", message=".retryInterval can only be set when .name is 'RetryOnFailure'"
753+
type UpgradeStrategy struct {
754+
// Name of the upgrade strategy.
755+
// +kubebuilder:validation:Enum=RemediateOnFailure;RetryOnFailure
756+
// +required
757+
Name string `json:"name"`
758+
759+
// RetryInterval is the interval at which to retry a failed upgrade.
760+
// Can be used only when Name is set to RetryOnFailure.
761+
// Defaults to '5m'.
762+
// +kubebuilder:validation:Type=string
763+
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ms|s|m|h))+$"
764+
// +optional
765+
RetryInterval *metav1.Duration `json:"retryInterval,omitempty"`
766+
}
767+
768+
// GetRetryInterval returns the configured retry interval for the Helm upgrade
769+
// action, or the default.
770+
func (in UpgradeStrategy) GetRetryInterval() time.Duration {
771+
if in.RetryInterval == nil {
772+
return 5 * time.Minute
773+
}
774+
return in.RetryInterval.Duration
775+
}
776+
777+
// GetAction returns the retry action for the Helm upgrade action.
778+
func (in UpgradeStrategy) GetAction() ReleaseAction {
779+
return ReleaseActionUpgrade
780+
}
781+
722782
// UpgradeRemediation holds the configuration for Helm upgrade remediation.
723783
type UpgradeRemediation struct {
724784
// Retries is the number of retries that should be attempted on failures before
@@ -791,6 +851,19 @@ func (in UpgradeRemediation) RetriesExhausted(hr *HelmRelease) bool {
791851
return in.Retries >= 0 && in.GetFailureCount(hr) > int64(in.Retries)
792852
}
793853

854+
// ActionStrategyName is a valid name for an action strategy.
855+
type ActionStrategyName string
856+
857+
const (
858+
// ActionStrategyRemediateOnFailure is the action strategy name for
859+
// remediate on failure.
860+
ActionStrategyRemediateOnFailure ActionStrategyName = "RemediateOnFailure"
861+
862+
// ActionStrategyRetryOnFailure is the action strategy name for retry on
863+
// failure.
864+
ActionStrategyRetryOnFailure ActionStrategyName = "RetryOnFailure"
865+
)
866+
794867
// RemediationStrategy returns the strategy to use to remediate a failed install
795868
// or upgrade.
796869
type RemediationStrategy string
@@ -1012,7 +1085,8 @@ type HelmReleaseStatus struct {
10121085
History Snapshots `json:"history,omitempty"`
10131086

10141087
// LastAttemptedReleaseAction is the last release action performed for this
1015-
// HelmRelease. It is used to determine the active remediation strategy.
1088+
// HelmRelease. It is used to determine the active retry or remediation
1089+
// strategy.
10161090
// +kubebuilder:validation:Enum=install;upgrade
10171091
// +optional
10181092
LastAttemptedReleaseAction ReleaseAction `json:"lastAttemptedReleaseAction,omitempty"`
@@ -1189,6 +1263,17 @@ func (in HelmRelease) GetActiveRemediation() Remediation {
11891263
}
11901264
}
11911265

1266+
// GetActiveRetry returns the active retry configuration for the
1267+
// HelmRelease.
1268+
func (in HelmRelease) GetActiveRetry() Retry {
1269+
switch in.Status.LastAttemptedReleaseAction {
1270+
case ReleaseActionUpgrade:
1271+
return in.GetUpgrade().GetRetry()
1272+
default:
1273+
return nil
1274+
}
1275+
}
1276+
11921277
// GetRequeueAfter returns the duration after which the HelmRelease
11931278
// must be reconciled again.
11941279
func (in HelmRelease) GetRequeueAfter() time.Duration {

api/v2/zz_generated.deepcopy.go

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -912,6 +912,30 @@ spec:
912912
- uninstall
913913
type: string
914914
type: object
915+
strategy:
916+
description: |-
917+
Strategy defines the upgrade strategy to use for this HelmRelease.
918+
Defaults to 'RemediateOnFailure'.
919+
properties:
920+
name:
921+
description: Name of the upgrade strategy.
922+
enum:
923+
- RemediateOnFailure
924+
- RetryOnFailure
925+
type: string
926+
retryInterval:
927+
description: |-
928+
RetryInterval is the interval at which to retry a failed upgrade.
929+
Can be used only when Name is set to RetryOnFailure.
930+
Defaults to '5m'.
931+
pattern: ^([0-9]+(\.[0-9]+)?(ms|s|m|h))+$
932+
type: string
933+
required:
934+
- name
935+
type: object
936+
x-kubernetes-validations:
937+
- message: .retryInterval can only be set when .name is 'RetryOnFailure'
938+
rule: '!has(self.retryInterval) || self.name == ''RetryOnFailure'''
915939
timeout:
916940
description: |-
917941
Timeout is the time to wait for any individual Kubernetes operation (like
@@ -1178,7 +1202,8 @@ spec:
11781202
lastAttemptedReleaseAction:
11791203
description: |-
11801204
LastAttemptedReleaseAction is the last release action performed for this
1181-
HelmRelease. It is used to determine the active remediation strategy.
1205+
HelmRelease. It is used to determine the active retry or remediation
1206+
strategy.
11821207
enum:
11831208
- install
11841209
- upgrade

docs/api/v2/helm.md

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,9 @@ HelmReleaseStatus
424424
</table>
425425
</div>
426426
</div>
427+
<h3 id="helm.toolkit.fluxcd.io/v2.ActionStrategyName">ActionStrategyName
428+
(<code>string</code> alias)</h3>
429+
<p>ActionStrategyName is a valid name for an action strategy.</p>
427430
<h3 id="helm.toolkit.fluxcd.io/v2.CRDsPolicy">CRDsPolicy
428431
(<code>string</code> alias)</h3>
429432
<p>
@@ -1676,7 +1679,8 @@ ReleaseAction
16761679
<td>
16771680
<em>(Optional)</em>
16781681
<p>LastAttemptedReleaseAction is the last release action performed for this
1679-
HelmRelease. It is used to determine the active remediation strategy.</p>
1682+
HelmRelease. It is used to determine the active retry or remediation
1683+
strategy.</p>
16801684
</td>
16811685
</tr>
16821686
<tr>
@@ -2247,6 +2251,10 @@ UpgradeRemediation.</p>
22472251
</p>
22482252
<p>RemediationStrategy returns the strategy to use to remediate a failed install
22492253
or upgrade.</p>
2254+
<h3 id="helm.toolkit.fluxcd.io/v2.Retry">Retry
2255+
</h3>
2256+
<p>Retry defines a consistent interface for retry strategies from
2257+
InstallStrategy and UpgradeStrategy.</p>
22502258
<h3 id="helm.toolkit.fluxcd.io/v2.Rollback">Rollback
22512259
</h3>
22522260
<p>
@@ -2570,6 +2578,10 @@ string
25702578
<a href="#helm.toolkit.fluxcd.io/v2.HelmReleaseStatus">HelmReleaseStatus</a>)
25712579
</p>
25722580
<p>Snapshots is a list of Snapshot objects.</p>
2581+
<h3 id="helm.toolkit.fluxcd.io/v2.Strategy">Strategy
2582+
</h3>
2583+
<p>Strategy defines a consistent interface for InstallStrategy and
2584+
UpgradeStrategy.</p>
25732585
<h3 id="helm.toolkit.fluxcd.io/v2.Test">Test
25742586
</h3>
25752587
<p>
@@ -2833,6 +2845,21 @@ Jobs for hooks) during the performance of a Helm upgrade action. Defaults to
28332845
</tr>
28342846
<tr>
28352847
<td>
2848+
<code>strategy</code><br>
2849+
<em>
2850+
<a href="#helm.toolkit.fluxcd.io/v2.UpgradeStrategy">
2851+
UpgradeStrategy
2852+
</a>
2853+
</em>
2854+
</td>
2855+
<td>
2856+
<em>(Optional)</em>
2857+
<p>Strategy defines the upgrade strategy to use for this HelmRelease.
2858+
Defaults to &lsquo;RemediateOnFailure&rsquo;.</p>
2859+
</td>
2860+
</tr>
2861+
<tr>
2862+
<td>
28362863
<code>remediation</code><br>
28372864
<em>
28382865
<a href="#helm.toolkit.fluxcd.io/v2.UpgradeRemediation">
@@ -3066,6 +3093,54 @@ RemediationStrategy
30663093
</table>
30673094
</div>
30683095
</div>
3096+
<h3 id="helm.toolkit.fluxcd.io/v2.UpgradeStrategy">UpgradeStrategy
3097+
</h3>
3098+
<p>
3099+
(<em>Appears on:</em>
3100+
<a href="#helm.toolkit.fluxcd.io/v2.Upgrade">Upgrade</a>)
3101+
</p>
3102+
<p>UpgradeStrategy holds the configuration for Helm upgrade strategy.</p>
3103+
<div class="md-typeset__scrollwrap">
3104+
<div class="md-typeset__table">
3105+
<table>
3106+
<thead>
3107+
<tr>
3108+
<th>Field</th>
3109+
<th>Description</th>
3110+
</tr>
3111+
</thead>
3112+
<tbody>
3113+
<tr>
3114+
<td>
3115+
<code>name</code><br>
3116+
<em>
3117+
string
3118+
</em>
3119+
</td>
3120+
<td>
3121+
<p>Name of the upgrade strategy.</p>
3122+
</td>
3123+
</tr>
3124+
<tr>
3125+
<td>
3126+
<code>retryInterval</code><br>
3127+
<em>
3128+
<a href="https://godoc.org/k8s.io/apimachinery/pkg/apis/meta/v1#Duration">
3129+
Kubernetes meta/v1.Duration
3130+
</a>
3131+
</em>
3132+
</td>
3133+
<td>
3134+
<em>(Optional)</em>
3135+
<p>RetryInterval is the interval at which to retry a failed upgrade.
3136+
Can be used only when Name is set to RetryOnFailure.
3137+
Defaults to &lsquo;5m&rsquo;.</p>
3138+
</td>
3139+
</tr>
3140+
</tbody>
3141+
</table>
3142+
</div>
3143+
</div>
30693144
<div class="admonition note">
30703145
<p class="last">This page was automatically generated with <code>gen-crd-api-reference-docs</code></p>
30713146
</div>

docs/spec/v2/helmreleases.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,30 @@ The field offers the following subfields:
606606
last release while merging in overrides from [values](#values). Setting
607607
this flag makes the HelmRelease non-declarative. Defaults to `false`.
608608

609+
#### Upgrade strategy
610+
611+
`.spec.upgrade.strategy` is an optional field to specify the strategy
612+
to use when running a Helm upgrade action.
613+
614+
The field offers the following subfields:
615+
616+
- `.name` (Required): The name of the upgrade strategy to use. One of
617+
`RemediateOnFailure` or `RetryOnFailure`. If the `.spec.upgrade.strategy`
618+
field is not specified, the HelmRelease reconciliation behaves as if
619+
`.spec.upgrade.strategy.name` was set to `RemediateOnFailure`.
620+
- `.retryInterval` (Optional): The time to wait between retries of failed
621+
releases when the upgrade strategy is set to `RetryOnFailure`. Defaults
622+
to `5m`. Cannot be used with `RemediateOnFailure`.
623+
624+
The default `RemediateOnFailure` strategy applies the rules defined by the
625+
`.spec.upgrade.remediation` field to the upgrade action, i.e. the same
626+
behavior of the controller prior to the introduction of the `RetryOnFailure`
627+
strategy.
628+
629+
The `RetryOnFailure` strategy will retry failed releases in a regular
630+
interval defined by the `.spec.upgrade.strategy.retryInterval` field,
631+
without applying any remediation.
632+
609633
#### Upgrade remediation
610634

611635
`.spec.upgrade.remediation` is an optional field to configure the remediation

internal/controller/helmrelease_controller.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -392,10 +392,12 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe
392392
Chart: loadedChart,
393393
Values: values,
394394
}); err != nil {
395-
if errors.Is(err, intreconcile.ErrMustRequeue) {
395+
switch {
396+
case errors.Is(err, intreconcile.ErrRetryAfterInterval):
397+
return jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: obj.GetActiveRetry().GetRetryInterval()}), nil
398+
case errors.Is(err, intreconcile.ErrMustRequeue):
396399
return ctrl.Result{Requeue: true}, nil
397-
}
398-
if interrors.IsOneOf(err, intreconcile.ErrExceededMaxRetries, intreconcile.ErrMissingRollbackTarget) {
400+
case interrors.IsOneOf(err, intreconcile.ErrExceededMaxRetries, intreconcile.ErrMissingRollbackTarget):
399401
err = reconcile.TerminalError(err)
400402
}
401403
return ctrl.Result{}, err

0 commit comments

Comments
 (0)