Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,42 @@ class AzFileAttributes implements BasicFileAttributes {
objectId = "/${client.containerName}/${client.blobName}"
creationTime = time(props.getCreationTime())
updateTime = time(props.getLastModified())
directory = client.blobName.endsWith('/')
size = props.getBlobSize()

// Support for Azure Data Lake Storage Gen2 with hierarchical namespace enabled

// Determine if this is a directory using metadata only (most reliable):
final meta = props.getMetadata()
if( meta.containsKey("hdi_isfolder") && size == 0 ){
directory = meta.get("hdi_isfolder")
if( meta != null && meta.containsKey("hdi_isfolder") && meta.get("hdi_isfolder") == "true" ){
directory = true
size = 0
}
else {
// Without metadata, default to treating as file
// This aligns with Azure SDK's approach where explicit directory markers are required
directory = false
size = props.getBlobSize()
}
}

AzFileAttributes(String containerName, BlobItem item) {
objectId = "/${containerName}/${item.name}"
directory = item.name.endsWith('/')
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bentsherman what will break if we incorrectly classify something as a file when it's a pseudo-directory? What's the impact of getting it wrong?

if( !directory ) {
creationTime = time(item.properties.getCreationTime())
updateTime = time(item.properties.getLastModified())
size = item.properties.getContentLength()

// Determine if this is a directory using reliable methods only:
// 1. Check if it's marked as a prefix (virtual directory) - Most reliable
if( item.isPrefix() != null && item.isPrefix() ) {
directory = true
// Virtual directories don't have properties like creation time
size = 0
}
// 2. Check metadata for hierarchical namespace (ADLS Gen2)
else if( item.getMetadata() != null && item.getMetadata().containsKey("hdi_isfolder") && item.getMetadata().get("hdi_isfolder") == "true" ) {
directory = true
size = 0
}
// 3. Default: treat as file
else {
directory = false
creationTime = time(item.getProperties().getCreationTime())
updateTime = time(item.getProperties().getLastModified())
size = item.getProperties().getContentLength()
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Original file line number Diff line number Diff line change
Expand Up @@ -928,4 +928,51 @@ class AzNioTest extends Specification implements AzBaseSpec {
deleteBucket(bucket1)
}

def 'should detect directory with hdi_isfolder metadata' () {
given:
def bucketName = createBucket()
def dirPath = "$bucketName/test-dir"

when:
// Create a directory marker with hdi_isfolder metadata
def containerClient = storageClient.getBlobContainerClient(bucketName)
def blobClient = containerClient.getBlobClient("test-dir/")
blobClient.upload(new ByteArrayInputStream(new byte[0]), 0)
blobClient.setMetadata(['hdi_isfolder': 'true'])

and:
def path = Paths.get(new URI("az://$dirPath/"))
def attrs = Files.readAttributes(path, BasicFileAttributes)

then:
attrs.isDirectory()
!attrs.isRegularFile()

cleanup:
deleteBucket(bucketName)
}

def 'should not treat file with trailing slash as directory without metadata' () {
given:
def bucketName = createBucket()

when:
// Create a file with trailing slash but no directory metadata
def containerClient = storageClient.getBlobContainerClient(bucketName)
def blobClient = containerClient.getBlobClient("test-file/")
blobClient.upload(new ByteArrayInputStream("content".bytes), "content".length())

and:
def path = Paths.get(new URI("az://$bucketName/test-file/"))
def attrs = Files.readAttributes(path, BasicFileAttributes)

then:
// Without metadata or isPrefix, it should be treated as a file
attrs.isRegularFile()
!attrs.isDirectory()

cleanup:
deleteBucket(bucketName)
}

}