diff --git a/src/main/groovy/de/dkfz/roddy/execution/jobs/cluster/lsf/LSFJobManager.groovy b/src/main/groovy/de/dkfz/roddy/execution/jobs/cluster/lsf/LSFJobManager.groovy index 605091c6..f444eec8 100644 --- a/src/main/groovy/de/dkfz/roddy/execution/jobs/cluster/lsf/LSFJobManager.groovy +++ b/src/main/groovy/de/dkfz/roddy/execution/jobs/cluster/lsf/LSFJobManager.groovy @@ -45,8 +45,16 @@ class LSFJobManager extends AbstractLSFJobManager { static final DateTimeHelper dateTimeHelper = new DateTimeHelper() + /** + * LSF supports retrying the submission command multiple times. The default is to retry for a very long time, + * which is also blocking the execution of the thread. A single retry usually works but is failing + * too frequently, in particular if there is load on the LSF system. The current number of LSB_NTRIES is a + * compromise between blocking endlessly and having no failover. + * + * @return a Bash environment variable declaration affecting LSF commands. + */ final static String getEnvironmentString() { - return "LSB_NTRIES=1" + return "LSB_NTRIES=5" } LSFJobManager(BEExecutionService executionService, JobManagerOptions parms) {