Skip to content

fix: allow Unicode object names #605

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions migrations/tenant/0026-unicode-object-names.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ALTER TABLE "storage"."objects"
ADD CONSTRAINT objects_name_check
CHECK (name SIMILAR TO '[\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD\x00010000-\x0010ffff]+');

4 changes: 4 additions & 0 deletions src/http/plugins/xml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ export const xmlParser = fastifyPlugin(
isArray: (_: string, jpath: string) => {
return opts.parseAsArray?.includes(jpath)
},
tagValueProcessor: (name: string, value: string) =>
value.replace(/&#x([0-9a-fA-F]{1,6});/g, (_, str: string) =>
String.fromCharCode(Number.parseInt(str, 16))
),
})
}

Expand Down
1 change: 1 addition & 0 deletions src/internal/database/migrations/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ export const DBMigration = {
'optimize-search-function': 22,
'operation-function': 23,
'custom-metadata': 24,
'unicode-object-names': 25,
} as const
2 changes: 1 addition & 1 deletion src/internal/errors/codes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ export const ERRORS = {
code: ErrorCode.InvalidKey,
resource: key,
httpStatusCode: 400,
message: `Invalid key: ${key}`,
message: `Invalid key: ${encodeURIComponent(key)}`,
originalError: e,
}),

Expand Down
6 changes: 5 additions & 1 deletion src/storage/database/knex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,11 @@ export class DBError extends StorageBackendError implements RenderableError {
code: pgError.code,
})
default:
return ERRORS.DatabaseError(pgError.message, pgError).withMetadata({
const errorMessage =
pgError.code === '23514' && pgError.constraint === 'objects_name_check'
? 'Invalid object name'
: pgError.message
return ERRORS.DatabaseError(errorMessage, pgError).withMetadata({
query,
code: pgError.code,
})
Expand Down
12 changes: 9 additions & 3 deletions src/storage/limits.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,15 @@ export async function isImageTransformationEnabled(tenantId: string) {
* @param key
*/
export function isValidKey(key: string): boolean {
// only allow s3 safe characters and characters which require special handling for now
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
return key.length > 0 && /^(\w|\/|!|-|\.|\*|'|\(|\)| |&|\$|@|=|;|:|\+|,|\?)*$/.test(key)
// Allow any sequence of Unicode characters with UTF-8 encoding,
// except characters not allowed in XML 1.0.
// See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
// See: https://www.w3.org/TR/REC-xml/#charsets
//
const regex =
/[\0-\x08\x0B\f\x0E-\x1F\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/

return key.length > 0 && !regex.test(key)
}

/**
Expand Down
36 changes: 36 additions & 0 deletions src/test/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,42 @@ export const adminApp = app({})

const ENV = process.env

/**
* Should support all Unicode characters with UTF-8 encoding according to AWS S3 object naming guide, including:
* - Safe characters: 0-9 a-z A-Z !-_.*'()
* - Characters that might require special handling: &$@=;/:+,? and Space and ASCII characters \t, \n, and \r.
* - Characters: \{}^%`[]"<>~#| and non-printable ASCII characters (128–255 decimal characters).
*
* The following characters are not allowed:
* - ASCII characters 0x00–0x1F, except 0x09, 0x0A, and 0x0D.
* - Unicode \u{FFFE} and \u{FFFF}.
* - Lone surrogates characters.
* See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
* See: https://www.w3.org/TR/REC-xml/#charsets
*/
export function getUnicodeObjectName(): string {
const objectName = 'test'
.concat("!-_*.'()")
// Characters that might require special handling
.concat('&$@=;:+,? \x09\x0A\x0D')
// Characters to avoid
.concat('\\{}^%`[]"<>~#|\xFF')
// MinIO max. length for each '/' separated segment is 255
.concat('/')
.concat([...Array(127).keys()].map((i) => String.fromCodePoint(i + 128)).join(''))
.concat('/')
// Some special Unicode characters
.concat('\u2028\u202F\u{0001FFFF}')
// Some other Unicode characters
.concat('일이삼\u{0001f642}')

return objectName
}

export function getInvalidObjectName(): string {
return 'test\x01\x02\x03.txt'
}

export function useMockQueue() {
const queueSpy: jest.SpyInstance | undefined = undefined
beforeEach(() => {
Expand Down
90 changes: 89 additions & 1 deletion src/test/object.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ import app from '../app'
import { getConfig, mergeConfig } from '../config'
import { signJWT } from '@internal/auth'
import { Obj, backends } from '../storage'
import { useMockObject, useMockQueue } from './common'
import { getInvalidObjectName, getUnicodeObjectName, useMockObject, useMockQueue } from './common'
import { getServiceKeyUser, getPostgresConnection } from '@internal/database'
import { Knex } from 'knex'
import { ErrorCode, StorageBackendError } from '@internal/errors'
import { randomUUID } from 'crypto'

const { jwtSecret, serviceKey, tenantId } = getConfig()
const anonKey = process.env.ANON_KEY || ''
Expand Down Expand Up @@ -2456,3 +2457,90 @@ describe('testing list objects', () => {
expect(responseJSON[1].name).toBe('sadcat-upload23.png')
})
})

describe('Object key names with Unicode characters', () => {
test('can upload, get, list, and delete', async () => {
const prefix = `test-utf8-${randomUUID()}`
const objectName = getUnicodeObjectName()

const form = new FormData()
form.append('file', fs.createReadStream(`./src/test/assets/sadcat.jpg`))
const headers = Object.assign({}, form.getHeaders(), {
authorization: `Bearer ${serviceKey}`,
'x-upsert': 'true',
})

const uploadResponse = await app().inject({
method: 'POST',
url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`,
headers: {
...headers,
...form.getHeaders(),
},
payload: form,
})
expect(uploadResponse.statusCode).toBe(200)

const getResponse = await app().inject({
method: 'GET',
url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`,
headers: {
authorization: `Bearer ${serviceKey}`,
},
})
expect(getResponse.statusCode).toBe(200)
expect(getResponse.headers['etag']).toBe('abc')
expect(getResponse.headers['last-modified']).toBe('Thu, 12 Aug 2021 16:00:00 GMT')
expect(getResponse.headers['cache-control']).toBe('no-cache')

const listResponse = await app().inject({
method: 'POST',
url: `/object/list/bucket2`,
headers: {
authorization: `Bearer ${serviceKey}`,
},
payload: { prefix },
})
expect(listResponse.statusCode).toBe(200)
const listResponseJSON = JSON.parse(listResponse.body)
expect(listResponseJSON).toHaveLength(1)
expect(listResponseJSON[0].name).toBe(objectName.split('/')[0])

const deleteResponse = await app().inject({
method: 'DELETE',
url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`,
headers: {
authorization: `Bearer ${serviceKey}`,
},
})
expect(deleteResponse.statusCode).toBe(200)
})

test('should not upload if the name contains invalid characters', async () => {
const invalidObjectName = getInvalidObjectName()
const form = new FormData()
form.append('file', fs.createReadStream(`./src/test/assets/sadcat.jpg`))
const headers = Object.assign({}, form.getHeaders(), {
authorization: `Bearer ${serviceKey}`,
'x-upsert': 'true',
})
const uploadResponse = await app().inject({
method: 'POST',
url: `/object/bucket2/${encodeURIComponent(invalidObjectName)}`,
headers: {
...headers,
...form.getHeaders(),
},
payload: form,
})
expect(uploadResponse.statusCode).toBe(400)
expect(S3Backend.prototype.uploadObject).not.toHaveBeenCalled()
expect(uploadResponse.body).toBe(
JSON.stringify({
statusCode: '400',
error: 'InvalidKey',
message: `Invalid key: ${encodeURIComponent(invalidObjectName)}`,
})
)
})
})
146 changes: 146 additions & 0 deletions src/test/s3-protocol.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import { randomUUID } from 'crypto'
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
import axios from 'axios'
import { createPresignedPost } from '@aws-sdk/s3-presigned-post'
import { getInvalidObjectName, getUnicodeObjectName } from './common'

const { s3ProtocolAccessKeySecret, s3ProtocolAccessKeyId, storageS3Region } = getConfig()

Expand Down Expand Up @@ -1402,5 +1403,150 @@ describe('S3 Protocol', () => {
expect(resp.ok).toBeTruthy()
})
})

describe('Object key names with Unicode characters', () => {
it('can be used with MultipartUpload commands', async () => {
const bucketName = await createBucket(client)
const objectName = getUnicodeObjectName()
const createMultiPartUpload = new CreateMultipartUploadCommand({
Bucket: bucketName,
Key: objectName,
ContentType: 'image/jpg',
CacheControl: 'max-age=2000',
})
const createMultipartResp = await client.send(createMultiPartUpload)
expect(createMultipartResp.UploadId).toBeTruthy()
const uploadId = createMultipartResp.UploadId

const listMultipartUploads = new ListMultipartUploadsCommand({
Bucket: bucketName,
})
const listMultipartResp = await client.send(listMultipartUploads)
expect(listMultipartResp.Uploads?.length).toBe(1)
expect(listMultipartResp.Uploads?.[0].Key).toBe(objectName)

const data = Buffer.alloc(1024 * 1024 * 2)
const uploadPart = new UploadPartCommand({
Bucket: bucketName,
Key: objectName,
ContentLength: data.length,
UploadId: uploadId,
Body: data,
PartNumber: 1,
})
const uploadPartResp = await client.send(uploadPart)
expect(uploadPartResp.ETag).toBeTruthy()

const listParts = new ListPartsCommand({
Bucket: bucketName,
Key: objectName,
UploadId: uploadId,
})
const listPartsResp = await client.send(listParts)
expect(listPartsResp.Parts?.length).toBe(1)

const completeMultiPartUpload = new CompleteMultipartUploadCommand({
Bucket: bucketName,
Key: objectName,
UploadId: uploadId,
MultipartUpload: {
Parts: [
{
PartNumber: 1,
ETag: uploadPartResp.ETag,
},
],
},
})
const completeMultipartResp = await client.send(completeMultiPartUpload)
expect(completeMultipartResp.$metadata.httpStatusCode).toBe(200)
expect(completeMultipartResp.Key).toEqual(objectName)
})

it('can be used with Put, List, and Delete Object commands', async () => {
const bucketName = await createBucket(client)
const objectName = getUnicodeObjectName()
const putObject = new PutObjectCommand({
Bucket: bucketName,
Key: objectName,
Body: Buffer.alloc(1024 * 1024 * 1),
})
const putObjectResp = await client.send(putObject)
expect(putObjectResp.$metadata.httpStatusCode).toEqual(200)

const listObjects = new ListObjectsCommand({
Bucket: bucketName,
})
const listObjectsResp = await client.send(listObjects)
expect(listObjectsResp.Contents?.length).toBe(1)
expect(listObjectsResp.Contents?.[0].Key).toBe(objectName)

const listObjectsV2 = new ListObjectsV2Command({
Bucket: bucketName,
})
const listObjectsV2Resp = await client.send(listObjectsV2)
expect(listObjectsV2Resp.Contents?.length).toBe(1)
expect(listObjectsV2Resp.Contents?.[0].Key).toBe(objectName)

const getObject = new GetObjectCommand({
Bucket: bucketName,
Key: objectName,
})
const getObjectResp = await client.send(getObject)
const getObjectRespData = await getObjectResp.Body?.transformToByteArray()
expect(getObjectRespData).toBeTruthy()
expect(getObjectResp.ETag).toBeTruthy()

const deleteObjects = new DeleteObjectsCommand({
Bucket: bucketName,
Delete: {
Objects: [
{
Key: objectName,
},
],
},
})
const deleteObjectsResp = await client.send(deleteObjects)
expect(deleteObjectsResp.Errors).toBeFalsy()
expect(deleteObjectsResp.Deleted).toEqual([
{
Key: objectName,
},
])

const getObjectDeleted = new GetObjectCommand({
Bucket: bucketName,
Key: objectName,
})
try {
await client.send(getObjectDeleted)
throw new Error('Should not reach here')
} catch (e) {
expect((e as S3ServiceException).$metadata.httpStatusCode).toEqual(404)
}
})

it('should not upload if the name contains invalid characters', async () => {
const bucketName = await createBucket(client)
const invalidObjectName = getInvalidObjectName()
try {
const putObject = new PutObjectCommand({
Bucket: bucketName,
Key: invalidObjectName,
Body: Buffer.alloc(1024 * 1024 * 1),
})
await client.send(putObject)
throw new Error('Should not reach here')
} catch (e) {
expect((e as Error).message).not.toBe('Should not reach here')
expect((e as S3ServiceException).$metadata.httpStatusCode).toEqual(400)
expect((e as S3ServiceException).name).toEqual('InvalidKey')
expect((e as S3ServiceException).message).toEqual(
`Invalid key: ${encodeURIComponent(invalidObjectName)}`
)
}
})
})
})
})
4 changes: 2 additions & 2 deletions src/test/tenant.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const payload = {
serviceKey: 'd',
jwks: { keys: [] },
migrationStatus: 'COMPLETED',
migrationVersion: 'custom-metadata',
migrationVersion: 'unicode-object-names',
tracingMode: 'basic',
features: {
imageTransformation: {
Expand All @@ -40,7 +40,7 @@ const payload2 = {
serviceKey: 'h',
jwks: null,
migrationStatus: 'COMPLETED',
migrationVersion: 'custom-metadata',
migrationVersion: 'unicode-object-names',
tracingMode: 'basic',
features: {
imageTransformation: {
Expand Down
Loading