Skip to content

Commit dae32f5

Browse files
edmundmillerpditommasoclaude
committed
refactor(aws): Replace custom sanitization config with strict mode validation
Replace the custom `aws.batch.sanitizeTags` configuration option with validation logic that integrates with Nextflow's existing `nextflow.enable.strict` feature flag for consistent platform behavior. Changes: - Remove `sanitizeTags` config option from AwsBatchConfig and AwsOptions - Replace `sanitizeAwsBatchLabels()` with `validateAwsBatchLabels()` - Implement strict mode detection using `nextflow.enable.strict` - Normal mode: warns about invalid AWS Batch tags but passes them through - Strict mode: throws ProcessUnrecoverableException for invalid tags - Filter out null keys/values with appropriate logging - Update all tests from sanitization to validation approach This provides better integration with Nextflow's platform-wide strict mode behavior while maintaining backward compatibility. Ref: #6211 (comment) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Paolo Di Tommaso <[email protected]> Co-Authored-By: Claude <[email protected]> Signed-off-by: Edmund Miller <[email protected]>
1 parent ef84c99 commit dae32f5

File tree

4 files changed

+216
-211
lines changed

4 files changed

+216
-211
lines changed

plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy

Lines changed: 65 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
779779
builder.jobQueue(getJobQueue(task))
780780
builder.jobDefinition(getJobDefinition(task))
781781
if( labels ) {
782-
final tags = opts.sanitizeTags() ? sanitizeAwsBatchLabels(labels) : labels
782+
final tags = validateAwsBatchLabels(labels)
783783
builder.tags(tags)
784784
builder.propagateTags(true)
785785
}
@@ -866,76 +866,95 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
866866
}
867867

868868
/**
869-
* Sanitize resource labels to comply with AWS Batch tag requirements.
870-
* AWS Batch tags have specific constraints:
871-
* - Keys and values can contain: letters, numbers, spaces, and characters: _ . : / = + - @
872-
* - Maximum key length: 128 characters
869+
* Validate AWS Batch labels for compliance with AWS naming requirements.
870+
* This method validates resource labels against AWS Batch tag constraints and
871+
* handles violations based on the nextflow.enable.strict setting:
872+
*
873+
* - When strict mode is disabled (default): logs warnings for invalid tags but allows them through
874+
* - When strict mode is enabled: throws ProcessUnrecoverableException for invalid tags
875+
*
876+
* AWS Batch tag constraints validated:
877+
* - Keys and values cannot be null
878+
* - Maximum key length: 128 characters
873879
* - Maximum value length: 256 characters
880+
* - Allowed characters: letters, numbers, spaces, and: _ . : / = + - @
874881
*
875882
* @param labels The original resource labels map
876-
* @return A new map with sanitized labels suitable for AWS Batch tags
883+
* @return The labels map (unchanged in validation mode)
884+
* @throws ProcessUnrecoverableException when strict mode is enabled and labels are invalid
877885
*/
878-
protected Map<String, String> sanitizeAwsBatchLabels(Map<String, String> labels) {
886+
protected Map<String, String> validateAwsBatchLabels(Map<String, String> labels) {
879887
if (!labels) return labels
880888

881-
final result = new LinkedHashMap<String, String>()
889+
final strictMode = executor.session.config.navigate('nextflow.enable.strict', false)
890+
final violations = []
891+
final result = new HashMap<String, String>()
882892

883893
for (Map.Entry<String, String> entry : labels.entrySet()) {
884894
final key = entry.getKey()
885895
final value = entry.getValue()
886896

887-
// Handle null keys or values
888-
if (key == null || value == null) {
889-
log.warn "AWS Batch label dropped due to null ${key == null ? 'key' : 'value'}: key=${key}, value=${value}"
897+
// Check for null keys or values and filter them out (not validation violations)
898+
if (key == null) {
899+
log.warn "AWS Batch label dropped due to null key: key=null, value=${value}"
890900
continue
891901
}
892-
893-
final originalKey = key.toString()
894-
final originalValue = value.toString()
895-
final sanitizedKey = sanitizeAwsBatchLabel(originalKey, 128)
896-
final sanitizedValue = sanitizeAwsBatchLabel(originalValue, 256)
897-
898-
// Check if sanitization resulted in empty strings
899-
if (!sanitizedKey || !sanitizedValue) {
900-
log.warn "AWS Batch label dropped after sanitization - key: '${originalKey}' -> '${sanitizedKey ?: ''}', value: '${originalValue}' -> '${sanitizedValue ?: ''}'"
902+
if (value == null) {
903+
log.warn "AWS Batch label dropped due to null value: key=${key}, value=null"
901904
continue
902905
}
903906

904-
// Log if values were modified during sanitization
905-
if (sanitizedKey != originalKey || sanitizedValue != originalValue) {
906-
log.warn "AWS Batch label sanitized - key: '${originalKey}' -> '${sanitizedKey}', value: '${originalValue}' -> '${sanitizedValue}'"
907+
final keyStr = key.toString()
908+
final valueStr = value.toString()
909+
910+
// Validate key length
911+
if (keyStr.length() > 128) {
912+
violations << "Label key exceeds 128 characters: '${keyStr}' (${keyStr.length()} chars)"
913+
}
914+
915+
// Validate value length
916+
if (valueStr.length() > 256) {
917+
violations << "Label value exceeds 256 characters: '${keyStr}' = '${valueStr}' (${valueStr.length()} chars)"
918+
}
919+
920+
// Validate key characters
921+
if (!isValidAwsBatchTagString(keyStr)) {
922+
violations << "Label key contains invalid characters: '${keyStr}' - only letters, numbers, spaces, and _ . : / = + - @ are allowed"
923+
}
924+
925+
// Validate value characters
926+
if (!isValidAwsBatchTagString(valueStr)) {
927+
violations << "Label value contains invalid characters: '${keyStr}' = '${valueStr}' - only letters, numbers, spaces, and _ . : / = + - @ are allowed"
907928
}
908929

909-
result.put(sanitizedKey, sanitizedValue)
930+
// Add valid entries to result
931+
result[keyStr] = valueStr
932+
}
933+
934+
// Handle violations based on strict mode (but only for constraint violations, not null filtering)
935+
if (violations) {
936+
final message = "AWS Batch tag validation failed:\n${violations.collect{ ' - ' + it }.join('\n')}"
937+
if (strictMode) {
938+
throw new ProcessUnrecoverableException(message)
939+
} else {
940+
log.warn "${message}\nTags will be used as-is but may cause AWS Batch submission failures"
941+
}
910942
}
911943

912944
return result
913945
}
914946

915947
/**
916-
* Sanitize a single label key or value for AWS Batch tags.
917-
* Replaces invalid characters with underscores and truncates if necessary.
918-
*
919-
* @param input The input string to sanitize
920-
* @param maxLength The maximum allowed length
921-
* @return The sanitized string
948+
* Check if a string contains only characters allowed in AWS Batch tags.
949+
* AWS Batch allows: letters, numbers, spaces, and: _ . : / = + - @
950+
*
951+
* @param input The string to validate
952+
* @return true if the string contains only valid characters
922953
*/
923-
protected String sanitizeAwsBatchLabel(String input, int maxLength) {
924-
if (!input) return input
925-
926-
// Replace invalid characters and clean up the string
927-
// AWS Batch allows: letters, numbers, spaces, and: _ . : / = + - @
928-
final sanitized = input
929-
.replaceAll(/[^a-zA-Z0-9\s_.\:\/=+\-@]/, '_') // Replace invalid chars with underscores
930-
.replaceAll(/[_\s]{2,}/, '_') // Replace multiple consecutive underscores/spaces
931-
.replaceAll(/^[_\s]+|[_\s]+$/, '') // Remove leading/trailing underscores and spaces
932-
933-
// Truncate if necessary and clean up any trailing underscores/spaces
934-
final result = sanitized.size() > maxLength
935-
? sanitized.substring(0, maxLength).replaceAll(/[_\s]+$/, '')
936-
: sanitized
937-
938-
return result ?: null
954+
protected boolean isValidAwsBatchTagString(String input, int maxLength = 128) {
955+
if (!input) return false
956+
if (input.length() > maxLength) return false
957+
return input ==~ /^[a-zA-Z0-9\s_.\:\/=+\-@]*$/
939958
}
940959

941960
/**

plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsOptions.groovy

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,4 @@ class AwsOptions implements CloudTransferOptions {
164164
return awsConfig.batchConfig.terminateUnschedulableJobs
165165
}
166166

167-
Boolean sanitizeTags() {
168-
return awsConfig.batchConfig.sanitizeTags
169-
}
170-
171167
}

plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsBatchConfig.groovy

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,6 @@ class AwsBatchConfig implements CloudTransferOptions, ConfigScope {
125125
""")
126126
final List<String> volumes
127127

128-
@ConfigOption
129-
@Description("""
130-
When `true`, enables automatic sanitization of AWS Batch job tags to ensure compliance with AWS naming requirements (default: `false`).
131-
""")
132-
final Boolean sanitizeTags
133-
134128
/**
135129
* The path for the `s5cmd` tool as an alternative to `aws s3` CLI to upload/download files
136130
*/
@@ -157,7 +151,6 @@ class AwsBatchConfig implements CloudTransferOptions, ConfigScope {
157151
schedulingPriority = opts.schedulingPriority as Integer ?: 0
158152
executionRole = opts.executionRole
159153
terminateUnschedulableJobs = opts.terminateUnschedulableJobs as boolean
160-
sanitizeTags = opts.sanitizeTags as Boolean ?: false
161154
if( retryMode == 'built-in' )
162155
retryMode = null // this force falling back on NF built-in retry mode instead of delegating to AWS CLI tool
163156
if( retryMode && retryMode !in AwsOptions.VALID_RETRY_MODES )

0 commit comments

Comments
 (0)