From c059e51008cac4c0307f24952b361bced7da2e3c Mon Sep 17 00:00:00 2001
From: Matheus Pimenta <matheuscscp@gmail.com>
Date: Sat, 9 Aug 2025 18:36:43 +0100
Subject: [PATCH 1/2] Remove tautology

Signed-off-by: Matheus Pimenta <matheuscscp@gmail.com>
---
 internal/reconcile/atomic_release.go | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/internal/reconcile/atomic_release.go b/internal/reconcile/atomic_release.go
index 457a28a30..07eacf2c7 100644
--- a/internal/reconcile/atomic_release.go
+++ b/internal/reconcile/atomic_release.go
@@ -283,16 +283,11 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
 					conditions.MarkReconciling(req.Object, meta.ProgressingWithRetryReason, "%s", conditions.GetMessage(req.Object, meta.ReadyCondition))
 					return ErrMustRequeue
 				}
-				// Check if retries have exhausted after remediation for early
-				// stall condition detection.
-				if remediation != nil && remediation.RetriesExhausted(req.Object) {
-					conditions.MarkStalled(req.Object, "RetriesExceeded", "Failed to %s after %d attempt(s)",
-						req.Object.Status.LastAttemptedReleaseAction, req.Object.GetActiveRemediation().GetFailureCount(req.Object))
-					return ErrExceededMaxRetries
-				}
 
-				conditions.Delete(req.Object, meta.ReconcilingCondition)
-				return nil
+				// Retries have exhausted after remediation for early stall condition detection.
+				conditions.MarkStalled(req.Object, "RetriesExceeded", "Failed to %s after %d attempt(s)",
+					req.Object.Status.LastAttemptedReleaseAction, req.Object.GetActiveRemediation().GetFailureCount(req.Object))
+				return ErrExceededMaxRetries
 			}
 
 			// Append the type to the set of action types we have performed.

From 0325b55b05c0756c2f7805eebb9a25f1b2ce6dba Mon Sep 17 00:00:00 2001
From: Matheus Pimenta <matheuscscp@gmail.com>
Date: Fri, 8 Aug 2025 11:49:58 +0100
Subject: [PATCH 2/2] Introduce `RetryOnFailure` lifecycle management strategy

Signed-off-by: Matheus Pimenta <matheuscscp@gmail.com>
---
 api/v2/helmrelease_types.go                   | 123 ++++++++-
 api/v2/zz_generated.deepcopy.go               |  50 ++++
 .../helm.toolkit.fluxcd.io_helmreleases.yaml  |  51 +++-
 docs/api/v2/helm.md                           | 140 ++++++++++-
 docs/spec/v2/helmreleases.md                  |  49 ++++
 internal/controller/helmrelease_controller.go |   8 +-
 .../controller/helmrelease_controller_test.go |  86 +++++++
 internal/reconcile/atomic_release.go          |  34 +++
 internal/reconcile/atomic_release_test.go     | 233 ++++++++++++++++++
 internal/reconcile/install.go                 |   6 +
 internal/reconcile/install_test.go            |  26 ++
 internal/reconcile/upgrade.go                 |   6 +
 internal/reconcile/upgrade_test.go            |  26 ++
 13 files changed, 832 insertions(+), 6 deletions(-)

diff --git a/api/v2/helmrelease_types.go b/api/v2/helmrelease_types.go
index ed34e5254..4e3efb7b5 100644
--- a/api/v2/helmrelease_types.go
+++ b/api/v2/helmrelease_types.go
@@ -437,6 +437,20 @@ type Remediation interface {
 	RetriesExhausted(hr *HelmRelease) bool
 }
 
+// Strategy defines a consistent interface for InstallStrategy and
+// UpgradeStrategy.
+// +kubebuilder:object:generate=false
+type Strategy interface {
+	GetRetry() Retry
+}
+
+// Retry defines a consistent interface for retry strategies from
+// InstallStrategy and UpgradeStrategy.
+// +kubebuilder:object:generate=false
+type Retry interface {
+	GetRetryInterval() time.Duration
+}
+
 // Install holds the configuration for Helm install actions performed for this
 // HelmRelease.
 type Install struct {
@@ -448,6 +462,11 @@ type Install struct {
 	// +optional
 	Timeout *metav1.Duration `json:"timeout,omitempty"`
 
+	// Strategy defines the install strategy to use for this HelmRelease.
+	// Defaults to 'RemediateOnFailure'.
+	// +optional
+	Strategy *InstallStrategy `json:"strategy,omitempty"`
+
 	// Remediation holds the remediation configuration for when the Helm install
 	// action for the HelmRelease fails. The default is to not perform any action.
 	// +optional
@@ -541,6 +560,41 @@ func (in Install) GetRemediation() Remediation {
 	return *in.Remediation
 }
 
+// GetRetry returns the configured retry strategy for the Helm install
+// action.
+func (in Install) GetRetry() Retry {
+	if in.Strategy == nil || in.Strategy.Name != string(ActionStrategyRetryOnFailure) {
+		return nil
+	}
+	return in.Strategy
+}
+
+// InstallStrategy holds the configuration for Helm install strategy.
+// +kubebuilder:validation:XValidation:rule="!has(self.retryInterval) || self.name != 'RemediateOnFailure'", message=".retryInterval cannot be set when .name is 'RemediateOnFailure'"
+type InstallStrategy struct {
+	// Name of the install strategy.
+	// +kubebuilder:validation:Enum=RemediateOnFailure;RetryOnFailure
+	// +required
+	Name string `json:"name"`
+
+	// RetryInterval is the interval at which to retry a failed install.
+	// Can be used only when Name is set to RetryOnFailure.
+	// Defaults to '5m'.
+	// +kubebuilder:validation:Type=string
+	// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ms|s|m|h))+$"
+	// +optional
+	RetryInterval *metav1.Duration `json:"retryInterval,omitempty"`
+}
+
+// GetRetryInterval returns the configured retry interval for the Helm install
+// action, or the default.
+func (in InstallStrategy) GetRetryInterval() time.Duration {
+	if in.RetryInterval == nil {
+		return 5 * time.Minute
+	}
+	return in.RetryInterval.Duration
+}
+
 // InstallRemediation holds the configuration for Helm install remediation.
 type InstallRemediation struct {
 	// Retries is the number of retries that should be attempted on failures before
@@ -631,6 +685,11 @@ type Upgrade struct {
 	// +optional
 	Timeout *metav1.Duration `json:"timeout,omitempty"`
 
+	// Strategy defines the upgrade strategy to use for this HelmRelease.
+	// Defaults to 'RemediateOnFailure'.
+	// +optional
+	Strategy *UpgradeStrategy `json:"strategy,omitempty"`
+
 	// Remediation holds the remediation configuration for when the Helm upgrade
 	// action for the HelmRelease fails. The default is to not perform any action.
 	// +optional
@@ -719,6 +778,41 @@ func (in Upgrade) GetRemediation() Remediation {
 	return *in.Remediation
 }
 
+// GetRetry returns the configured retry strategy for the Helm upgrade
+// action.
+func (in Upgrade) GetRetry() Retry {
+	if in.Strategy == nil || in.Strategy.Name != string(ActionStrategyRetryOnFailure) {
+		return nil
+	}
+	return in.Strategy
+}
+
+// UpgradeStrategy holds the configuration for Helm upgrade strategy.
+// +kubebuilder:validation:XValidation:rule="!has(self.retryInterval) || self.name == 'RetryOnFailure'", message=".retryInterval can only be set when .name is 'RetryOnFailure'"
+type UpgradeStrategy struct {
+	// Name of the upgrade strategy.
+	// +kubebuilder:validation:Enum=RemediateOnFailure;RetryOnFailure
+	// +required
+	Name string `json:"name"`
+
+	// RetryInterval is the interval at which to retry a failed upgrade.
+	// Can be used only when Name is set to RetryOnFailure.
+	// Defaults to '5m'.
+	// +kubebuilder:validation:Type=string
+	// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ms|s|m|h))+$"
+	// +optional
+	RetryInterval *metav1.Duration `json:"retryInterval,omitempty"`
+}
+
+// GetRetryInterval returns the configured retry interval for the Helm upgrade
+// action, or the default.
+func (in UpgradeStrategy) GetRetryInterval() time.Duration {
+	if in.RetryInterval == nil {
+		return 5 * time.Minute
+	}
+	return in.RetryInterval.Duration
+}
+
 // UpgradeRemediation holds the configuration for Helm upgrade remediation.
 type UpgradeRemediation struct {
 	// Retries is the number of retries that should be attempted on failures before
@@ -791,6 +885,19 @@ func (in UpgradeRemediation) RetriesExhausted(hr *HelmRelease) bool {
 	return in.Retries >= 0 && in.GetFailureCount(hr) > int64(in.Retries)
 }
 
+// ActionStrategyName is a valid name for an action strategy.
+type ActionStrategyName string
+
+const (
+	// ActionStrategyRemediateOnFailure is the action strategy name for
+	// remediate on failure.
+	ActionStrategyRemediateOnFailure ActionStrategyName = "RemediateOnFailure"
+
+	// ActionStrategyRetryOnFailure is the action strategy name for retry on
+	// failure.
+	ActionStrategyRetryOnFailure ActionStrategyName = "RetryOnFailure"
+)
+
 // RemediationStrategy returns the strategy to use to remediate a failed install
 // or upgrade.
 type RemediationStrategy string
@@ -1012,7 +1119,8 @@ type HelmReleaseStatus struct {
 	History Snapshots `json:"history,omitempty"`
 
 	// LastAttemptedReleaseAction is the last release action performed for this
-	// HelmRelease. It is used to determine the active remediation strategy.
+	// HelmRelease. It is used to determine the active retry or remediation
+	// strategy.
 	// +kubebuilder:validation:Enum=install;upgrade
 	// +optional
 	LastAttemptedReleaseAction ReleaseAction `json:"lastAttemptedReleaseAction,omitempty"`
@@ -1195,6 +1303,19 @@ func (in HelmRelease) GetActiveRemediation() Remediation {
 	}
 }
 
+// GetActiveRetry returns the active retry configuration for the
+// HelmRelease.
+func (in HelmRelease) GetActiveRetry() Retry {
+	switch in.Status.LastAttemptedReleaseAction {
+	case ReleaseActionInstall:
+		return in.GetInstall().GetRetry()
+	case ReleaseActionUpgrade:
+		return in.GetUpgrade().GetRetry()
+	default:
+		return nil
+	}
+}
+
 // GetRequeueAfter returns the duration after which the HelmRelease
 // must be reconciled again.
 func (in HelmRelease) GetRequeueAfter() time.Duration {
diff --git a/api/v2/zz_generated.deepcopy.go b/api/v2/zz_generated.deepcopy.go
index c9f2b5076..57d818d3a 100644
--- a/api/v2/zz_generated.deepcopy.go
+++ b/api/v2/zz_generated.deepcopy.go
@@ -475,6 +475,11 @@ func (in *Install) DeepCopyInto(out *Install) {
 		*out = new(v1.Duration)
 		**out = **in
 	}
+	if in.Strategy != nil {
+		in, out := &in.Strategy, &out.Strategy
+		*out = new(InstallStrategy)
+		(*in).DeepCopyInto(*out)
+	}
 	if in.Remediation != nil {
 		in, out := &in.Remediation, &out.Remediation
 		*out = new(InstallRemediation)
@@ -517,6 +522,26 @@ func (in *InstallRemediation) DeepCopy() *InstallRemediation {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InstallStrategy) DeepCopyInto(out *InstallStrategy) {
+	*out = *in
+	if in.RetryInterval != nil {
+		in, out := &in.RetryInterval, &out.RetryInterval
+		*out = new(v1.Duration)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstallStrategy.
+func (in *InstallStrategy) DeepCopy() *InstallStrategy {
+	if in == nil {
+		return nil
+	}
+	out := new(InstallStrategy)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *Kustomize) DeepCopyInto(out *Kustomize) {
 	*out = *in
@@ -726,6 +751,11 @@ func (in *Upgrade) DeepCopyInto(out *Upgrade) {
 		*out = new(v1.Duration)
 		**out = **in
 	}
+	if in.Strategy != nil {
+		in, out := &in.Strategy, &out.Strategy
+		*out = new(UpgradeStrategy)
+		(*in).DeepCopyInto(*out)
+	}
 	if in.Remediation != nil {
 		in, out := &in.Remediation, &out.Remediation
 		*out = new(UpgradeRemediation)
@@ -772,3 +802,23 @@ func (in *UpgradeRemediation) DeepCopy() *UpgradeRemediation {
 	in.DeepCopyInto(out)
 	return out
 }
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *UpgradeStrategy) DeepCopyInto(out *UpgradeStrategy) {
+	*out = *in
+	if in.RetryInterval != nil {
+		in, out := &in.RetryInterval, &out.RetryInterval
+		*out = new(v1.Duration)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UpgradeStrategy.
+func (in *UpgradeStrategy) DeepCopy() *UpgradeStrategy {
+	if in == nil {
+		return nil
+	}
+	out := new(UpgradeStrategy)
+	in.DeepCopyInto(out)
+	return out
+}
diff --git a/config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml b/config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml
index 5c04b97b9..91a406347 100644
--- a/config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml
+++ b/config/crd/bases/helm.toolkit.fluxcd.io_helmreleases.yaml
@@ -445,6 +445,30 @@ spec:
 
                       Deprecated use CRD policy (`crds`) attribute with value `Skip` instead.
                     type: boolean
+                  strategy:
+                    description: |-
+                      Strategy defines the install strategy to use for this HelmRelease.
+                      Defaults to 'RemediateOnFailure'.
+                    properties:
+                      name:
+                        description: Name of the install strategy.
+                        enum:
+                        - RemediateOnFailure
+                        - RetryOnFailure
+                        type: string
+                      retryInterval:
+                        description: |-
+                          RetryInterval is the interval at which to retry a failed install.
+                          Can be used only when Name is set to RetryOnFailure.
+                          Defaults to '5m'.
+                        pattern: ^([0-9]+(\.[0-9]+)?(ms|s|m|h))+$
+                        type: string
+                    required:
+                    - name
+                    type: object
+                    x-kubernetes-validations:
+                    - message: .retryInterval cannot be set when .name is 'RemediateOnFailure'
+                      rule: '!has(self.retryInterval) || self.name != ''RemediateOnFailure'''
                   timeout:
                     description: |-
                       Timeout is the time to wait for any individual Kubernetes operation (like
@@ -912,6 +936,30 @@ spec:
                         - uninstall
                         type: string
                     type: object
+                  strategy:
+                    description: |-
+                      Strategy defines the upgrade strategy to use for this HelmRelease.
+                      Defaults to 'RemediateOnFailure'.
+                    properties:
+                      name:
+                        description: Name of the upgrade strategy.
+                        enum:
+                        - RemediateOnFailure
+                        - RetryOnFailure
+                        type: string
+                      retryInterval:
+                        description: |-
+                          RetryInterval is the interval at which to retry a failed upgrade.
+                          Can be used only when Name is set to RetryOnFailure.
+                          Defaults to '5m'.
+                        pattern: ^([0-9]+(\.[0-9]+)?(ms|s|m|h))+$
+                        type: string
+                    required:
+                    - name
+                    type: object
+                    x-kubernetes-validations:
+                    - message: .retryInterval can only be set when .name is 'RetryOnFailure'
+                      rule: '!has(self.retryInterval) || self.name == ''RetryOnFailure'''
                   timeout:
                     description: |-
                       Timeout is the time to wait for any individual Kubernetes operation (like
@@ -1178,7 +1226,8 @@ spec:
               lastAttemptedReleaseAction:
                 description: |-
                   LastAttemptedReleaseAction is the last release action performed for this
-                  HelmRelease. It is used to determine the active remediation strategy.
+                  HelmRelease. It is used to determine the active retry or remediation
+                  strategy.
                 enum:
                 - install
                 - upgrade
diff --git a/docs/api/v2/helm.md b/docs/api/v2/helm.md
index 7546dc581..1c2abf3c8 100644
--- a/docs/api/v2/helm.md
+++ b/docs/api/v2/helm.md
@@ -424,6 +424,9 @@ HelmReleaseStatus
 </table>
 </div>
 </div>
+<h3 id="helm.toolkit.fluxcd.io/v2.ActionStrategyName">ActionStrategyName
+(<code>string</code> alias)</h3>
+<p>ActionStrategyName is a valid name for an action strategy.</p>
 <h3 id="helm.toolkit.fluxcd.io/v2.CRDsPolicy">CRDsPolicy
 (<code>string</code> alias)</h3>
 <p>
@@ -1676,7 +1679,8 @@ ReleaseAction
 <td>
 <em>(Optional)</em>
 <p>LastAttemptedReleaseAction is the last release action performed for this
-HelmRelease. It is used to determine the active remediation strategy.</p>
+HelmRelease. It is used to determine the active retry or remediation
+strategy.</p>
 </td>
 </tr>
 <tr>
@@ -1934,6 +1938,21 @@ Jobs for hooks) during the performance of a Helm install action. Defaults to
 </tr>
 <tr>
 <td>
+<code>strategy</code><br>
+<em>
+<a href="#helm.toolkit.fluxcd.io/v2.InstallStrategy">
+InstallStrategy
+</a>
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>Strategy defines the install strategy to use for this HelmRelease.
+Defaults to &lsquo;RemediateOnFailure&rsquo;.</p>
+</td>
+</tr>
+<tr>
+<td>
 <code>remediation</code><br>
 <em>
 <a href="#helm.toolkit.fluxcd.io/v2.InstallRemediation">
@@ -2156,6 +2175,54 @@ no retries remain. Defaults to &lsquo;false&rsquo;.</p>
 </table>
 </div>
 </div>
+<h3 id="helm.toolkit.fluxcd.io/v2.InstallStrategy">InstallStrategy
+</h3>
+<p>
+(<em>Appears on:</em>
+<a href="#helm.toolkit.fluxcd.io/v2.Install">Install</a>)
+</p>
+<p>InstallStrategy holds the configuration for Helm install strategy.</p>
+<div class="md-typeset__scrollwrap">
+<div class="md-typeset__table">
+<table>
+<thead>
+<tr>
+<th>Field</th>
+<th>Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>
+<code>name</code><br>
+<em>
+string
+</em>
+</td>
+<td>
+<p>Name of the install strategy.</p>
+</td>
+</tr>
+<tr>
+<td>
+<code>retryInterval</code><br>
+<em>
+<a href="https://godoc.org/k8s.io/apimachinery/pkg/apis/meta/v1#Duration">
+Kubernetes meta/v1.Duration
+</a>
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>RetryInterval is the interval at which to retry a failed install.
+Can be used only when Name is set to RetryOnFailure.
+Defaults to &lsquo;5m&rsquo;.</p>
+</td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
 <h3 id="helm.toolkit.fluxcd.io/v2.Kustomize">Kustomize
 </h3>
 <p>
@@ -2262,6 +2329,10 @@ UpgradeRemediation.</p>
 </p>
 <p>RemediationStrategy returns the strategy to use to remediate a failed install
 or upgrade.</p>
+<h3 id="helm.toolkit.fluxcd.io/v2.Retry">Retry
+</h3>
+<p>Retry defines a consistent interface for retry strategies from
+InstallStrategy and UpgradeStrategy.</p>
 <h3 id="helm.toolkit.fluxcd.io/v2.Rollback">Rollback
 </h3>
 <p>
@@ -2585,6 +2656,10 @@ string
 <a href="#helm.toolkit.fluxcd.io/v2.HelmReleaseStatus">HelmReleaseStatus</a>)
 </p>
 <p>Snapshots is a list of Snapshot objects.</p>
+<h3 id="helm.toolkit.fluxcd.io/v2.Strategy">Strategy
+</h3>
+<p>Strategy defines a consistent interface for InstallStrategy and
+UpgradeStrategy.</p>
 <h3 id="helm.toolkit.fluxcd.io/v2.Test">Test
 </h3>
 <p>
@@ -2848,6 +2923,21 @@ Jobs for hooks) during the performance of a Helm upgrade action. Defaults to
 </tr>
 <tr>
 <td>
+<code>strategy</code><br>
+<em>
+<a href="#helm.toolkit.fluxcd.io/v2.UpgradeStrategy">
+UpgradeStrategy
+</a>
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>Strategy defines the upgrade strategy to use for this HelmRelease.
+Defaults to &lsquo;RemediateOnFailure&rsquo;.</p>
+</td>
+</tr>
+<tr>
+<td>
 <code>remediation</code><br>
 <em>
 <a href="#helm.toolkit.fluxcd.io/v2.UpgradeRemediation">
@@ -3081,6 +3171,54 @@ RemediationStrategy
 </table>
 </div>
 </div>
+<h3 id="helm.toolkit.fluxcd.io/v2.UpgradeStrategy">UpgradeStrategy
+</h3>
+<p>
+(<em>Appears on:</em>
+<a href="#helm.toolkit.fluxcd.io/v2.Upgrade">Upgrade</a>)
+</p>
+<p>UpgradeStrategy holds the configuration for Helm upgrade strategy.</p>
+<div class="md-typeset__scrollwrap">
+<div class="md-typeset__table">
+<table>
+<thead>
+<tr>
+<th>Field</th>
+<th>Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>
+<code>name</code><br>
+<em>
+string
+</em>
+</td>
+<td>
+<p>Name of the upgrade strategy.</p>
+</td>
+</tr>
+<tr>
+<td>
+<code>retryInterval</code><br>
+<em>
+<a href="https://godoc.org/k8s.io/apimachinery/pkg/apis/meta/v1#Duration">
+Kubernetes meta/v1.Duration
+</a>
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>RetryInterval is the interval at which to retry a failed upgrade.
+Can be used only when Name is set to RetryOnFailure.
+Defaults to &lsquo;5m&rsquo;.</p>
+</td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
 <div class="admonition note">
 <p class="last">This page was automatically generated with <code>gen-crd-api-reference-docs</code></p>
 </div>
diff --git a/docs/spec/v2/helmreleases.md b/docs/spec/v2/helmreleases.md
index 4f3f769de..8e2093dbe 100644
--- a/docs/spec/v2/helmreleases.md
+++ b/docs/spec/v2/helmreleases.md
@@ -555,6 +555,31 @@ The field offers the following subfields:
 - `.disableWaitForJobs` (Optional): Disables waiting for any Jobs to complete
   after the installation of the chart. Defaults to `false`.
 
+#### Install strategy
+
+`.spec.install.strategy` is an optional field to specify the strategy
+to use when running a Helm install action.
+
+The field offers the following subfields:
+
+- `.name` (Required): The name of the install strategy to use. One of
+  `RemediateOnFailure` or `RetryOnFailure`.
+  If the `.spec.install.strategy` field is not specified, the HelmRelease
+  reconciliation behaves as if `.spec.install.strategy.name` was set to
+  `RemediateOnFailure`.
+- `.retryInterval` (Optional): The time to wait between retries of failed
+  releases when the install strategy is set to `RetryOnFailure`. Defaults
+  to `5m`. Cannot be used with `RemediateOnFailure`.
+
+The default `RemediateOnFailure` strategy applies the rules defined by the
+`.spec.install.remediation` field to the install action, i.e. the same
+behavior of the controller prior to the introduction of the `RetryOnFailure`
+strategy.
+
+The `RetryOnFailure` strategy will retry a failed install with an upgrade
+after the interval defined by the `.spec.install.strategy.retryInterval`
+field.
+
 #### Install remediation
 
 `.spec.install.remediation` is an optional field to configure the remediation
@@ -606,6 +631,30 @@ The field offers the following subfields:
   last release while merging in overrides from [values](#values). Setting
   this flag makes the HelmRelease non-declarative. Defaults to `false`.
 
+#### Upgrade strategy
+
+`.spec.upgrade.strategy` is an optional field to specify the strategy
+to use when running a Helm upgrade action.
+
+The field offers the following subfields:
+
+- `.name` (Required): The name of the upgrade strategy to use. One of
+  `RemediateOnFailure` or `RetryOnFailure`. If the `.spec.upgrade.strategy`
+  field is not specified, the HelmRelease reconciliation behaves as if
+  `.spec.upgrade.strategy.name` was set to `RemediateOnFailure`.
+- `.retryInterval` (Optional): The time to wait between retries of failed
+  releases when the upgrade strategy is set to `RetryOnFailure`. Defaults
+  to `5m`. Cannot be used with `RemediateOnFailure`.
+
+The default `RemediateOnFailure` strategy applies the rules defined by the
+`.spec.upgrade.remediation` field to the upgrade action, i.e. the same
+behavior of the controller prior to the introduction of the `RetryOnFailure`
+strategy.
+
+The `RetryOnFailure` strategy will retry failed upgrades in a regular
+interval defined by the `.spec.upgrade.strategy.retryInterval` field,
+without applying any remediation.
+
 #### Upgrade remediation
 
 `.spec.upgrade.remediation` is an optional field to configure the remediation
diff --git a/internal/controller/helmrelease_controller.go b/internal/controller/helmrelease_controller.go
index 519038ebb..e29739706 100644
--- a/internal/controller/helmrelease_controller.go
+++ b/internal/controller/helmrelease_controller.go
@@ -392,10 +392,12 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe
 		Chart:  loadedChart,
 		Values: values,
 	}); err != nil {
-		if errors.Is(err, intreconcile.ErrMustRequeue) {
+		switch {
+		case errors.Is(err, intreconcile.ErrRetryAfterInterval):
+			return jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: obj.GetActiveRetry().GetRetryInterval()}), nil
+		case errors.Is(err, intreconcile.ErrMustRequeue):
 			return ctrl.Result{Requeue: true}, nil
-		}
-		if interrors.IsOneOf(err, intreconcile.ErrExceededMaxRetries, intreconcile.ErrMissingRollbackTarget) {
+		case interrors.IsOneOf(err, intreconcile.ErrExceededMaxRetries, intreconcile.ErrMissingRollbackTarget):
 			err = reconcile.TerminalError(err)
 		}
 		return ctrl.Result{}, err
diff --git a/internal/controller/helmrelease_controller_test.go b/internal/controller/helmrelease_controller_test.go
index 8d24da9f1..b571e74ff 100644
--- a/internal/controller/helmrelease_controller_test.go
+++ b/internal/controller/helmrelease_controller_test.go
@@ -663,6 +663,92 @@ func TestHelmReleaseReconciler_reconcileRelease(t *testing.T) {
 		g.Expect(obj.Status.Failures).To(Equal(int64(1)))
 	})
 
+	t.Run("uses retry interval when the error ErrRetryAfterInterval is returned", func(t *testing.T) {
+		g := NewWithT(t)
+
+		chartMock := testutil.BuildChart()
+		chartArtifact, err := testutil.SaveChartAsArtifact(chartMock, digest.SHA256, testServer.URL(), testServer.Root())
+		g.Expect(err).ToNot(HaveOccurred())
+
+		chart := &sourcev1.HelmChart{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:       "chart",
+				Namespace:  "mock",
+				Generation: 1,
+			},
+			Status: sourcev1.HelmChartStatus{
+				ObservedGeneration: 1,
+				Artifact:           chartArtifact,
+				Conditions: []metav1.Condition{
+					{
+						Type:   meta.ReadyCondition,
+						Status: metav1.ConditionTrue,
+					},
+				},
+			},
+		}
+
+		obj := &v2.HelmRelease{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:       "release",
+				Namespace:  "mock",
+				Generation: 2,
+			},
+			Spec: v2.HelmReleaseSpec{
+				// Trigger a failure by setting an invalid storage namespace,
+				// preventing the release from actually being installed.
+				// This allows us to just test the failure count reset, without
+				// having to facilitate a full install.
+				StorageNamespace: "not-exist",
+				Install: &v2.Install{
+					Strategy: &v2.InstallStrategy{
+						Name:          "RetryOnFailure",
+						RetryInterval: &metav1.Duration{Duration: time.Minute},
+					},
+				},
+			},
+			Status: v2.HelmReleaseStatus{
+				HelmChart:               "mock/chart",
+				InstallFailures:         2,
+				UpgradeFailures:         3,
+				Failures:                5,
+				LastAttemptedGeneration: 2,
+			},
+		}
+
+		c := fake.NewClientBuilder().
+			WithScheme(NewTestScheme()).
+			WithStatusSubresource(&v2.HelmRelease{}).
+			WithObjects(chart, obj).
+			Build()
+
+		r := &HelmReleaseReconciler{
+			Client:           c,
+			APIReader:        c,
+			GetClusterConfig: GetTestClusterConfig,
+			EventRecorder:    record.NewFakeRecorder(32),
+		}
+
+		res, err := r.reconcileRelease(context.TODO(), patch.NewSerialPatcher(obj, c), obj)
+		g.Expect(err).NotTo(HaveOccurred())
+		g.Expect(res.RequeueAfter).To(BeNumerically("==", time.Minute))
+
+		g.Expect(obj.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{
+			{
+				Type:    "Ready",
+				Status:  "False",
+				Reason:  "RetryAfterInterval",
+				Message: "Will retry after 1m0s",
+			},
+			{
+				Type:    "Released",
+				Status:  "False",
+				Reason:  "InstallFailed",
+				Message: "Helm install failed for release mock/release with chart hello@0.1.0: create: failed to create: namespaces \"not-exist\" not found",
+			},
+		}))
+	})
+
 	t.Run("sets last attempted values", func(t *testing.T) {
 		g := NewWithT(t)
 
diff --git a/internal/reconcile/atomic_release.go b/internal/reconcile/atomic_release.go
index 07eacf2c7..1399c4786 100644
--- a/internal/reconcile/atomic_release.go
+++ b/internal/reconcile/atomic_release.go
@@ -58,6 +58,11 @@ var (
 	// attempts for the provided release config.
 	ErrExceededMaxRetries = errors.New("exceeded maximum retries")
 
+	// ErrRetryAfterInterval is returned when the action strategy is RetryOnFailure
+	// and the current AtomicRelease has already reconciled at least one action,
+	// in which case the action must be retried after the configured retry interval.
+	ErrRetryAfterInterval = errors.New("retry after interval")
+
 	// ErrMustRequeue is returned when the caller must requeue the object
 	// to continue the reconciliation process.
 	ErrMustRequeue = errors.New("must requeue")
@@ -235,6 +240,13 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
 					fmt.Sprintf("instructed to stop before running %s action reconciler %s", next.Type(), next.Name()),
 				)
 
+				if retry := req.Object.GetActiveRetry(); retry != nil {
+					conditions.Delete(req.Object, meta.ReconcilingCondition)
+					conditions.MarkFalse(req.Object, meta.ReadyCondition, "RetryAfterInterval",
+						"Will retry after %s", retry.GetRetryInterval().String())
+					return ErrRetryAfterInterval
+				}
+
 				if remediation := req.Object.GetActiveRemediation(); remediation == nil || !remediation.RetriesExhausted(req.Object) {
 					conditions.MarkReconciling(req.Object, meta.ProgressingWithRetryReason, "%s", conditions.GetMessage(req.Object, meta.ReadyCondition))
 					return ErrMustRequeue
@@ -266,6 +278,14 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
 			// Run the action sub-reconciler.
 			log.Info(fmt.Sprintf("running '%s' action with timeout of %s", next.Name(), timeoutForAction(next, req.Object).String()))
 			if err = next.Reconcile(ctx, req); err != nil {
+				if retry := req.Object.GetActiveRetry(); retry != nil {
+					log.Error(err, fmt.Sprintf("failed to run '%s' action", next.Name()))
+					conditions.Delete(req.Object, meta.ReconcilingCondition)
+					conditions.MarkFalse(req.Object, meta.ReadyCondition, "RetryAfterInterval",
+						"Will retry after %s", retry.GetRetryInterval().String())
+					return ErrRetryAfterInterval
+				}
+
 				if conditions.IsReady(req.Object) {
 					conditions.MarkFalse(req.Object, meta.ReadyCondition, "ReconcileError", "%s", err)
 				}
@@ -278,6 +298,13 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
 					"instructed to stop after running %s action reconciler %s", next.Type(), next.Name()),
 				)
 
+				if retry := req.Object.GetActiveRetry(); retry != nil {
+					conditions.Delete(req.Object, meta.ReconcilingCondition)
+					conditions.MarkFalse(req.Object, meta.ReadyCondition, "RetryAfterInterval",
+						"Will retry after %s", retry.GetRetryInterval().String())
+					return ErrRetryAfterInterval
+				}
+
 				remediation := req.Object.GetActiveRemediation()
 				if remediation == nil || !remediation.RetriesExhausted(req.Object) {
 					conditions.MarkReconciling(req.Object, meta.ProgressingWithRetryReason, "%s", conditions.GetMessage(req.Object, meta.ReadyCondition))
@@ -429,6 +456,13 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
 	case ReleaseStatusFailed:
 		log.Info(msgWithReason("release is in a failed state", state.Reason))
 
+		// If the action strategy is to retry (and not remediate), we behave just like
+		// "flux reconcile hr --force" and .spec.<action>.remediation.retries set to 0.
+		if req.Object.GetActiveRetry() != nil {
+			log.V(logger.DebugLevel).Info("retrying upgrade for failed release")
+			return NewUpgrade(r.configFactory, r.eventRecorder), nil
+		}
+
 		remediation := req.Object.GetActiveRemediation()
 
 		// If there is no active remediation strategy, we can only attempt to
diff --git a/internal/reconcile/atomic_release_test.go b/internal/reconcile/atomic_release_test.go
index fafee08ff..544cf8dac 100644
--- a/internal/reconcile/atomic_release_test.go
+++ b/internal/reconcile/atomic_release_test.go
@@ -633,6 +633,27 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
 			},
 			wantErr: ErrExceededMaxRetries,
 		},
+		{
+			name: "install failure with retry",
+			spec: func(spec *v2.HelmReleaseSpec) {
+				spec.Install = &v2.Install{
+					Strategy: &v2.InstallStrategy{
+						Name:          "RetryOnFailure",
+						RetryInterval: &metav1.Duration{Duration: time.Minute},
+					},
+				}
+				spec.Uninstall = &v2.Uninstall{
+					KeepHistory: true,
+				}
+			},
+			chart: testutil.BuildChart(testutil.ChartWithFailingHook()),
+			expectHistory: func(releases []*helmrelease.Release) v2.Snapshots {
+				return v2.Snapshots{
+					release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
+				}
+			},
+			wantErr: ErrRetryAfterInterval,
+		},
 		{
 			name: "install test failure with remediation",
 			spec: func(spec *v2.HelmReleaseSpec) {
@@ -659,6 +680,33 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
 			},
 			wantErr: ErrExceededMaxRetries,
 		},
+		{
+			name: "install test failure with retry",
+			spec: func(spec *v2.HelmReleaseSpec) {
+				spec.Install = &v2.Install{
+					Strategy: &v2.InstallStrategy{
+						Name:          "RetryOnFailure",
+						RetryInterval: &metav1.Duration{Duration: time.Minute},
+					},
+				}
+				spec.Test = &v2.Test{
+					Enable: true,
+				}
+				spec.Uninstall = &v2.Uninstall{
+					KeepHistory: true,
+				}
+			},
+			chart: testutil.BuildChart(testutil.ChartWithFailingTestHook()),
+			expectHistory: func(releases []*helmrelease.Release) v2.Snapshots {
+				snap := release.ObservedToSnapshot(release.ObserveRelease(releases[0]))
+				snap.SetTestHooks(release.TestHooksFromRelease(releases[0]))
+
+				return v2.Snapshots{
+					snap,
+				}
+			},
+			wantErr: ErrRetryAfterInterval,
+		},
 		{
 			name: "install test failure with test ignore",
 			spec: func(spec *v2.HelmReleaseSpec) {
@@ -806,6 +854,43 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
 			},
 			wantErr: ErrExceededMaxRetries,
 		},
+		{
+			name: "upgrade failure with retry",
+			releases: func(namespace string) []*helmrelease.Release {
+				return []*helmrelease.Release{
+					testutil.BuildRelease(&helmrelease.MockReleaseOptions{
+						Name:      mockReleaseName,
+						Namespace: namespace,
+						Version:   1,
+						Chart:     testutil.BuildChart(),
+						Status:    helmrelease.StatusDeployed,
+					}),
+				}
+			},
+			spec: func(spec *v2.HelmReleaseSpec) {
+				spec.Upgrade = &v2.Upgrade{
+					Strategy: &v2.UpgradeStrategy{
+						Name:          "RetryOnFailure",
+						RetryInterval: &metav1.Duration{Duration: time.Minute},
+					},
+				}
+			},
+			status: func(namespace string, releases []*helmrelease.Release) v2.HelmReleaseStatus {
+				return v2.HelmReleaseStatus{
+					History: v2.Snapshots{
+						release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
+					},
+				}
+			},
+			chart: testutil.BuildChart(testutil.ChartWithFailingHook()),
+			expectHistory: func(releases []*helmrelease.Release) v2.Snapshots {
+				return v2.Snapshots{
+					release.ObservedToSnapshot(release.ObserveRelease(releases[1])),
+					release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
+				}
+			},
+			wantErr: ErrRetryAfterInterval,
+		},
 		{
 			name: "upgrade test failure with remediation",
 			releases: func(namespace string) []*helmrelease.Release {
@@ -849,6 +934,49 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
 			},
 			wantErr: ErrExceededMaxRetries,
 		},
+		{
+			name: "upgrade test failure with retry",
+			releases: func(namespace string) []*helmrelease.Release {
+				return []*helmrelease.Release{
+					testutil.BuildRelease(&helmrelease.MockReleaseOptions{
+						Name:      mockReleaseName,
+						Namespace: namespace,
+						Version:   1,
+						Chart:     testutil.BuildChart(),
+						Status:    helmrelease.StatusDeployed,
+					}),
+				}
+			},
+			spec: func(spec *v2.HelmReleaseSpec) {
+				spec.Upgrade = &v2.Upgrade{
+					Strategy: &v2.UpgradeStrategy{
+						Name:          "RetryOnFailure",
+						RetryInterval: &metav1.Duration{Duration: time.Minute},
+					},
+				}
+				spec.Test = &v2.Test{
+					Enable: true,
+				}
+			},
+			status: func(namespace string, releases []*helmrelease.Release) v2.HelmReleaseStatus {
+				return v2.HelmReleaseStatus{
+					History: v2.Snapshots{
+						release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
+					},
+				}
+			},
+			chart: testutil.BuildChart(testutil.ChartWithFailingTestHook()),
+			expectHistory: func(releases []*helmrelease.Release) v2.Snapshots {
+				testedSnap := release.ObservedToSnapshot(release.ObserveRelease(releases[1]))
+				testedSnap.SetTestHooks(release.TestHooksFromRelease(releases[1]))
+
+				return v2.Snapshots{
+					testedSnap,
+					release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
+				}
+			},
+			wantErr: ErrRetryAfterInterval,
+		},
 		{
 			name: "upgrade test failure with test ignore",
 			releases: func(namespace string) []*helmrelease.Release {
@@ -984,6 +1112,55 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
 			chart:   testutil.BuildChart(),
 			wantErr: ErrExceededMaxRetries,
 		},
+		{
+			name: "upgrade retry does not result in exhausted retries",
+			releases: func(namespace string) []*helmrelease.Release {
+				return []*helmrelease.Release{
+					testutil.BuildRelease(&helmrelease.MockReleaseOptions{
+						Name:      mockReleaseName,
+						Namespace: namespace,
+						Version:   1,
+						Chart:     testutil.BuildChart(),
+						Status:    helmrelease.StatusSuperseded,
+					}),
+					testutil.BuildRelease(&helmrelease.MockReleaseOptions{
+						Name:      mockReleaseName,
+						Namespace: namespace,
+						Version:   2,
+						Chart:     testutil.BuildChart(),
+						Status:    helmrelease.StatusSuperseded,
+					}),
+					testutil.BuildRelease(&helmrelease.MockReleaseOptions{
+						Name:      mockReleaseName,
+						Namespace: namespace,
+						Version:   3,
+						Chart:     testutil.BuildChart(),
+						Status:    helmrelease.StatusFailed,
+					}),
+				}
+			},
+			spec: func(spec *v2.HelmReleaseSpec) {
+				spec.Upgrade = &v2.Upgrade{
+					Strategy: &v2.UpgradeStrategy{
+						Name:          "RetryOnFailure",
+						RetryInterval: &metav1.Duration{Duration: time.Minute},
+					},
+				}
+			},
+			status: func(namespace string, releases []*helmrelease.Release) v2.HelmReleaseStatus {
+				return v2.HelmReleaseStatus{
+					History: v2.Snapshots{
+						release.ObservedToSnapshot(release.ObserveRelease(releases[2])),
+						release.ObservedToSnapshot(release.ObserveRelease(releases[1])),
+						release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
+					},
+					LastAttemptedReleaseAction: v2.ReleaseActionUpgrade,
+					Failures:                   2,
+					UpgradeFailures:            2,
+				}
+			},
+			chart: testutil.BuildChart(),
+		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
@@ -1632,6 +1809,24 @@ func TestAtomicRelease_actionForState(t *testing.T) {
 			},
 			want: &UninstallRemediation{},
 		},
+		{
+			name:  "failed release with active install retry triggers upgrade",
+			state: ReleaseState{Status: ReleaseStatusFailed},
+			spec: func(spec *v2.HelmReleaseSpec) {
+				spec.Install = &v2.Install{
+					Strategy: &v2.InstallStrategy{
+						Name: "RetryOnFailure",
+					},
+				}
+			},
+			status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus {
+				return v2.HelmReleaseStatus{
+					LastAttemptedReleaseAction: v2.ReleaseActionInstall,
+					InstallFailures:            2,
+				}
+			},
+			want: &Upgrade{},
+		},
 		{
 			name:  "failed release with active upgrade remediation triggers rollback",
 			state: ReleaseState{Status: ReleaseStatusFailed},
@@ -1670,6 +1865,44 @@ func TestAtomicRelease_actionForState(t *testing.T) {
 			},
 			want: &RollbackRemediation{},
 		},
+		{
+			name:  "failed release with active upgrade retry triggers upgrade",
+			state: ReleaseState{Status: ReleaseStatusFailed},
+			releases: []*helmrelease.Release{
+				testutil.BuildRelease(&helmrelease.MockReleaseOptions{
+					Name:      mockReleaseName,
+					Namespace: mockReleaseNamespace,
+					Version:   1,
+					Status:    helmrelease.StatusSuperseded,
+					Chart:     testutil.BuildChart(),
+				}),
+				testutil.BuildRelease(&helmrelease.MockReleaseOptions{
+					Name:      mockReleaseName,
+					Namespace: mockReleaseNamespace,
+					Version:   2,
+					Status:    helmrelease.StatusFailed,
+					Chart:     testutil.BuildChart(),
+				}),
+			},
+			spec: func(spec *v2.HelmReleaseSpec) {
+				spec.Upgrade = &v2.Upgrade{
+					Strategy: &v2.UpgradeStrategy{
+						Name: "RetryOnFailure",
+					},
+				}
+			},
+			status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus {
+				return v2.HelmReleaseStatus{
+					History: v2.Snapshots{
+						release.ObservedToSnapshot(release.ObserveRelease(releases[1])),
+						release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
+					},
+					LastAttemptedReleaseAction: v2.ReleaseActionUpgrade,
+					UpgradeFailures:            1,
+				}
+			},
+			want: &Upgrade{},
+		},
 		{
 			name:  "failed release with active upgrade remediation and no previous release triggers error",
 			state: ReleaseState{Status: ReleaseStatusFailed},
diff --git a/internal/reconcile/install.go b/internal/reconcile/install.go
index 1dbfc827a..cbd3845ed 100644
--- a/internal/reconcile/install.go
+++ b/internal/reconcile/install.go
@@ -185,6 +185,12 @@ func (r *Install) success(req *Request) {
 			cur.FullReleaseName(), cur.VersionedChartName())
 	}
 
+	// Failures are only relevant while the release is failed
+	// when a retry strategy is configured.
+	if req.Object.GetInstall().GetRetry() != nil {
+		req.Object.Status.ClearFailures()
+	}
+
 	// Record event.
 	r.eventRecorder.AnnotatedEventf(
 		req.Object,
diff --git a/internal/reconcile/install_test.go b/internal/reconcile/install_test.go
index d10962473..5f349b1fb 100644
--- a/internal/reconcile/install_test.go
+++ b/internal/reconcile/install_test.go
@@ -564,6 +564,32 @@ func TestInstall_success(t *testing.T) {
 		}))
 	})
 
+	t.Run("clears failures if retry strategy is configured", func(t *testing.T) {
+		g := NewWithT(t)
+
+		recorder := testutil.NewFakeRecorder(10, false)
+		r := &Install{
+			eventRecorder: recorder,
+		}
+
+		req := &Request{
+			Object: obj.DeepCopy(),
+		}
+		req.Object.Spec.Install = &v2.Install{
+			Strategy: &v2.InstallStrategy{
+				Name: "RetryOnFailure",
+			},
+		}
+		req.Object.Status.Failures = 3
+		req.Object.Status.InstallFailures = 3
+		req.Object.Status.UpgradeFailures = 3
+		r.success(req)
+
+		g.Expect(req.Object.Status.Failures).To(BeZero())
+		g.Expect(req.Object.Status.InstallFailures).To(BeZero())
+		g.Expect(req.Object.Status.UpgradeFailures).To(BeZero())
+	})
+
 	t.Run("records success with TestSuccess=False", func(t *testing.T) {
 		g := NewWithT(t)
 
diff --git a/internal/reconcile/upgrade.go b/internal/reconcile/upgrade.go
index 7b29da4a4..ea4f580ef 100644
--- a/internal/reconcile/upgrade.go
+++ b/internal/reconcile/upgrade.go
@@ -175,6 +175,12 @@ func (r *Upgrade) success(req *Request) {
 			cur.FullReleaseName(), cur.VersionedChartName())
 	}
 
+	// Failures are only relevant while the release is failed
+	// when a retry strategy is configured.
+	if req.Object.GetUpgrade().GetRetry() != nil {
+		req.Object.Status.ClearFailures()
+	}
+
 	// Record event.
 	r.eventRecorder.AnnotatedEventf(
 		req.Object,
diff --git a/internal/reconcile/upgrade_test.go b/internal/reconcile/upgrade_test.go
index 0133c572f..47ab11f08 100644
--- a/internal/reconcile/upgrade_test.go
+++ b/internal/reconcile/upgrade_test.go
@@ -724,6 +724,32 @@ func TestUpgrade_success(t *testing.T) {
 		}))
 	})
 
+	t.Run("clears failures if retry strategy is configured", func(t *testing.T) {
+		g := NewWithT(t)
+
+		recorder := testutil.NewFakeRecorder(10, false)
+		r := &Upgrade{
+			eventRecorder: recorder,
+		}
+
+		req := &Request{
+			Object: obj.DeepCopy(),
+		}
+		req.Object.Spec.Upgrade = &v2.Upgrade{
+			Strategy: &v2.UpgradeStrategy{
+				Name: "RetryOnFailure",
+			},
+		}
+		req.Object.Status.Failures = 3
+		req.Object.Status.InstallFailures = 3
+		req.Object.Status.UpgradeFailures = 3
+		r.success(req)
+
+		g.Expect(req.Object.Status.Failures).To(BeZero())
+		g.Expect(req.Object.Status.InstallFailures).To(BeZero())
+		g.Expect(req.Object.Status.UpgradeFailures).To(BeZero())
+	})
+
 	t.Run("records success with TestSuccess=False", func(t *testing.T) {
 		g := NewWithT(t)