@@ -4,6 +4,7 @@ import is.hail.services.requests.ClientResponseException
44import is .hail .shadedazure .com .azure .storage .common .implementation .Constants
55import is .hail .utils ._
66
7+ import scala .annotation .tailrec
78import scala .util .Random
89
910import java .io ._
@@ -30,31 +31,24 @@ package object services {
3031
3132 private [this ] val LOG_2_MAX_MULTIPLIER =
3233 30 // do not set larger than 30 due to integer overflow calculating multiplier
33- private [this ] val DEFAULT_MAX_DELAY_MS = 60000
34- private [this ] val DEFAULT_BASE_DELAY_MS = 1000
34+ private [this ] val DEFAULT_MAX_DELAY_MS = 60000L
35+ private [this ] val DEFAULT_BASE_DELAY_MS = 1000L
3536
3637 def delayMsForTry (
3738 tries : Int ,
38- baseDelayMs : Int = DEFAULT_BASE_DELAY_MS ,
39- maxDelayMs : Int = DEFAULT_MAX_DELAY_MS ,
40- ): Int = {
39+ baseDelayMs : Long = DEFAULT_BASE_DELAY_MS ,
40+ maxDelayMs : Long = DEFAULT_MAX_DELAY_MS ,
41+ ): Long = {
4142 // Based on AWS' recommendations:
4243 // - https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
4344 /* -
4445 * https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-core/src/main/java/com/amazonaws/retry/PredefinedBackoffStrategies.java */
4546 val multiplier = 1L << math.min(tries, LOG_2_MAX_MULTIPLIER )
46- val ceilingForDelayMs = math.min(baseDelayMs * multiplier, maxDelayMs.toLong).toInt
47- val proposedDelayMs = ceilingForDelayMs / 2 + Random .nextInt(ceilingForDelayMs / 2 + 1 )
48- return proposedDelayMs
47+ val ceilingForDelayMs = math.min(baseDelayMs * multiplier, maxDelayMs).toInt
48+ ceilingForDelayMs / 2L + Random .nextInt(ceilingForDelayMs / 2 + 1 )
4949 }
5050
51- def sleepBeforTry (
52- tries : Int ,
53- baseDelayMs : Int = DEFAULT_BASE_DELAY_MS ,
54- maxDelayMs : Int = DEFAULT_MAX_DELAY_MS ,
55- ) =
56- Thread .sleep(delayMsForTry(tries, baseDelayMs, maxDelayMs).toLong)
57-
51+ @ tailrec
5852 def isLimitedRetriesError (_e : Throwable ): Boolean = {
5953 // An exception is a "retry once error" if a rare, known bug in a dependency or in a cloud
6054 // provider can manifest as this exception *and* that manifestation is indistinguishable from a
@@ -94,6 +88,7 @@ package object services {
9488 }
9589 }
9690
91+ @ tailrec
9792 def isTransientError (_e : Throwable ): Boolean = {
9893 // ReactiveException is package private inside reactore.core.Exception so we cannot access
9994 // it directly for an isInstance check. AFAICT, this is the only way to check if we received
@@ -185,14 +180,11 @@ package object services {
185180 }
186181 }
187182
188- def retryTransientErrors [T ](f : => T , reset : Option [() => Unit ] = None ): T = {
189- var tries = 0
190- while (true ) {
191- try
192- return f
183+ def retryTransientErrors [T ](f : => T , reset : Option [() => Unit ] = None ): T =
184+ retryable { tries =>
185+ try f
193186 catch {
194187 case e : Exception =>
195- tries += 1
196188 val delay = delayMsForTry(tries)
197189 if (tries <= 5 && isLimitedRetriesError(e)) {
198190 log.warn(
@@ -205,14 +197,12 @@ package object services {
205197 } else if (tries % 10 == 0 ) {
206198 log.warn(s " Encountered $tries transient errors, most recent one was $e. " )
207199 }
208- Thread .sleep(delay.toLong)
200+ Thread .sleep(delay)
201+ reset.foreach(_())
202+ retry
209203 }
210- reset.foreach(_())
211204 }
212205
213- throw new AssertionError (" unreachable" )
214- }
215-
216206 def formatException (e : Throwable ): String = {
217207 using(new StringWriter ()) { sw =>
218208 using(new PrintWriter (sw)) { pw =>
0 commit comments