-
Notifications
You must be signed in to change notification settings - Fork 415
fix(storage-resize-images): fix backfill and excluded paths parameter not being applied #2513
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: next
Are you sure you want to change the base?
Changes from all commits
1431f26
97497c7
4d8a962
ba9ee15
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
{ | ||
"projects": { | ||
"default": "demo-test" | ||
"default": "dev-extensions-testing" | ||
}, | ||
"targets": {}, | ||
"etags": { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
import { shouldResize } from "../src/filters"; | ||
import { convertToObjectMetadata } from "../src/util"; | ||
|
||
jest.mock("../src/config", () => ({ | ||
config: { | ||
excludePathList: ["/maps/*/thumbnails", "/pois/*/thumbnails"], | ||
resizedImagesPath: "thumbnails", | ||
backfillBatchSize: 3, | ||
imageSizes: ["640x480", "1200x800", "1920x1280"], | ||
imageType: "avif", | ||
includePathList: undefined, | ||
doBackfill: true, | ||
location: "europe-west1", | ||
projectId: "test-project", | ||
}, | ||
})); | ||
|
||
describe("Backfill Fix Validation", () => { | ||
test("should only scan original images, not thumbnails", () => { | ||
const filesReturnedByFix = [ | ||
createMockFile("/maps/location1/image1.jpg", "image/jpeg"), | ||
createMockFile("/maps/location2/image2.png", "image/png"), | ||
createMockFile("/pois/place1/photo1.jpg", "image/jpeg"), | ||
createMockFile("/other/path/image3.gif", "image/gif"), | ||
]; | ||
|
||
let scannedCount = 0; | ||
let processedCount = 0; | ||
|
||
for (const file of filesReturnedByFix) { | ||
scannedCount++; | ||
const objectMetadata = convertToObjectMetadata(file.metadata); | ||
|
||
if (shouldResize(objectMetadata)) { | ||
processedCount++; | ||
} | ||
} | ||
|
||
expect(scannedCount).toBe(4); | ||
expect(processedCount).toBe(4); | ||
expect(scannedCount).toBeLessThan(10); | ||
}); | ||
|
||
test("should verify shouldResize correctly identifies excluded paths", () => { | ||
const originalImages = [ | ||
createMockFile("/maps/location1/image1.jpg", "image/jpeg"), | ||
createMockFile("/pois/place1/photo1.png", "image/png"), | ||
createMockFile("/other/path/image3.gif", "image/gif"), | ||
]; | ||
|
||
for (const file of originalImages) { | ||
const objectMetadata = convertToObjectMetadata(file.metadata); | ||
const shouldResizeResult = shouldResize(objectMetadata); | ||
expect(shouldResizeResult).toBe(true); | ||
} | ||
|
||
const thumbnailImages = [ | ||
createMockFile( | ||
"/maps/location1/thumbnails/image1_640x480.avif", | ||
"image/avif", | ||
{ resizedImage: "true" } | ||
), | ||
createMockFile( | ||
"/pois/place1/thumbnails/photo1_1200x800.avif", | ||
"image/avif", | ||
{ resizedImage: "true" } | ||
), | ||
]; | ||
|
||
for (const file of thumbnailImages) { | ||
const objectMetadata = convertToObjectMetadata(file.metadata); | ||
const shouldResizeResult = shouldResize(objectMetadata); | ||
expect(shouldResizeResult).toBe(false); | ||
} | ||
}); | ||
|
||
test("should verify the fix works with different exclude path patterns", () => { | ||
const testCases = [ | ||
{ | ||
path: "/maps/location1/image1.jpg", | ||
shouldBeProcessed: true, | ||
}, | ||
{ | ||
path: "/maps/location1/thumbnails/image1_640x480.avif", | ||
shouldBeProcessed: false, | ||
}, | ||
{ | ||
path: "/pois/place1/photo1.jpg", | ||
shouldBeProcessed: true, | ||
}, | ||
{ | ||
path: "/pois/place1/thumbnails/photo1_640x480.avif", | ||
shouldBeProcessed: false, | ||
}, | ||
{ | ||
path: "/other/path/image.jpg", | ||
shouldBeProcessed: true, | ||
}, | ||
{ | ||
path: "/maps/any/location/thumbnails/image.jpg", | ||
shouldBeProcessed: false, | ||
}, | ||
]; | ||
|
||
for (const testCase of testCases) { | ||
const mockFile = createMockFile(testCase.path, "image/jpeg"); | ||
const objectMetadata = convertToObjectMetadata(mockFile.metadata); | ||
const shouldResizeResult = shouldResize(objectMetadata); | ||
|
||
expect(shouldResizeResult).toBe(testCase.shouldBeProcessed); | ||
} | ||
}); | ||
|
||
test("should demonstrate cost efficiency with large datasets", () => { | ||
const largeDataset = { | ||
originalImages: 600000, | ||
thumbnails: 1800000, | ||
}; | ||
|
||
const filesScannedWithFix = largeDataset.originalImages; | ||
const costMultiplier = filesScannedWithFix / largeDataset.originalImages; | ||
|
||
expect(filesScannedWithFix).toBe(largeDataset.originalImages); | ||
expect(costMultiplier).toBe(1); | ||
expect(filesScannedWithFix).toBeLessThan( | ||
largeDataset.originalImages + largeDataset.thumbnails | ||
); | ||
}); | ||
|
||
test("should verify the fix prevents exponential growth scenarios", () => { | ||
const scenarios = [ | ||
{ cycle: 1, originalImages: 1000, thumbnailsCreated: 3000 }, | ||
{ cycle: 2, originalImages: 1000, thumbnailsCreated: 6000 }, | ||
{ cycle: 3, originalImages: 1000, thumbnailsCreated: 9000 }, | ||
{ cycle: 4, originalImages: 1000, thumbnailsCreated: 12000 }, | ||
]; | ||
|
||
for (const scenario of scenarios) { | ||
const filesScannedWithFix = scenario.originalImages; | ||
const costMultiplier = filesScannedWithFix / scenario.originalImages; | ||
|
||
expect(filesScannedWithFix).toBe(scenario.originalImages); | ||
expect(costMultiplier).toBe(1); | ||
} | ||
}); | ||
}); | ||
|
||
function createMockFile( | ||
name: string, | ||
contentType: string, | ||
metadata: any = {} | ||
): any { | ||
return { | ||
metadata: { | ||
name, | ||
contentType, | ||
metadata, | ||
bucket: "test-bucket", | ||
generation: "123456789", | ||
metageneration: "1", | ||
timeCreated: new Date().toISOString(), | ||
updated: new Date().toISOString(), | ||
size: "1024", | ||
md5Hash: "test-hash", | ||
etag: "test-etag", | ||
selfLink: `https://storage.googleapis.com/test-bucket/${name}`, | ||
mediaLink: `https://storage.googleapis.com/download/storage/v1/b/test-bucket/o/${name}?generation=123456789&alt=media`, | ||
crc32c: "test-crc32c", | ||
kind: "storage#object", | ||
}, | ||
name, | ||
bucket: { | ||
name: "test-bucket", | ||
}, | ||
delete: jest.fn(), | ||
download: jest.fn(), | ||
get: jest.fn(), | ||
getMetadata: jest.fn(), | ||
makePublic: jest.fn(), | ||
move: jest.fn(), | ||
save: jest.fn(), | ||
setMetadata: jest.fn(), | ||
upload: jest.fn(), | ||
}; | ||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -153,6 +153,64 @@ export const generateResizedImage = functions.storage | |||||
await events.recordCompletionEvent({ context }); | ||||||
}); | ||||||
|
||||||
function buildOptimizedQuery(baseQuery: any, excludePathList?: string[]): any { | ||||||
if (!excludePathList || excludePathList.length === 0) { | ||||||
return baseQuery; | ||||||
} | ||||||
|
||||||
const optimizedQuery = { ...baseQuery }; | ||||||
optimizedQuery.delimiter = "/"; | ||||||
|
||||||
return optimizedQuery; | ||||||
} | ||||||
|
||||||
function filterExcludedPaths( | ||||||
files: File[], | ||||||
excludePathList?: string[] | ||||||
): File[] { | ||||||
if (!excludePathList || excludePathList.length === 0) { | ||||||
return files; | ||||||
} | ||||||
|
||||||
return files.filter((file) => { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have the impression that exclude paths are meant to manually exclude paths that should not be processed at all (e.g. "folder in which I want to keep large images only"), i.e. realize high level business objectives. On the low-level, regardless of the provided value of exclude paths, the extension should also exclude any thumbnails subdirectories it created, regardless of whether the RESIZED_IMAGES_PATH is kept as default or explicitly defined. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a fair point. I believe the extension currently uses metadata to track whether to resize or not (to prevent resizing twice) but that isn't good enough for backfill, since the events themselves can't be filtered out on metadata before runtime. |
||||||
const filePath = path.dirname(file.metadata.name); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||
|
||||||
for (const excludePath of excludePathList) { | ||||||
const trimmedExcludePath = excludePath | ||||||
.trim() | ||||||
.replace(/\*/g, "([a-zA-Z0-9_\\-\\+.\\s\\/]*)?"); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The regex pattern allows arbitrary characters including forward slashes, which could lead to unintended path matching. Consider using a more restrictive character set or escaping special regex characters properly to prevent regex injection. Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||
|
||||||
const regex = new RegExp("^" + trimmedExcludePath + "(?:/.*|$)"); | ||||||
|
||||||
if (regex.test(filePath)) { | ||||||
return false; | ||||||
} | ||||||
} | ||||||
|
||||||
return true; | ||||||
}); | ||||||
} | ||||||
|
||||||
async function getFilesWithPathExclusions( | ||||||
bucket: any, | ||||||
baseQuery: any, | ||||||
excludePathList?: string[] | ||||||
): Promise<[File[], any]> { | ||||||
if (!excludePathList || excludePathList.length === 0) { | ||||||
return bucket.getFiles(baseQuery); | ||||||
} | ||||||
|
||||||
const optimizedQuery = { | ||||||
...baseQuery, | ||||||
delimiter: "/", | ||||||
}; | ||||||
|
||||||
const [files, nextPageQuery] = await bucket.getFiles(optimizedQuery); | ||||||
const filteredFiles = filterExcludedPaths(files, excludePathList); | ||||||
|
||||||
return [filteredFiles, nextPageQuery]; | ||||||
} | ||||||
|
||||||
/** | ||||||
* | ||||||
*/ | ||||||
|
@@ -173,11 +231,17 @@ export const backfillResizedImages = functions.tasks | |||||
} | ||||||
|
||||||
const bucket = admin.storage().bucket(process.env.IMG_BUCKET); | ||||||
const query = data.nextPageQuery || { | ||||||
const baseQuery = data.nextPageQuery || { | ||||||
autoPaginate: false, | ||||||
maxResults: config.backfillBatchSize, | ||||||
}; | ||||||
const [files, nextPageQuery] = await bucket.getFiles(query); | ||||||
|
||||||
const [files, nextPageQuery] = await getFilesWithPathExclusions( | ||||||
bucket, | ||||||
baseQuery, | ||||||
config.excludePathList | ||||||
); | ||||||
|
||||||
const filesToResize = files.filter((f: File) => { | ||||||
logs.continueBackfill(f.metadata.name); | ||||||
return shouldResize(convertToObjectMetadata(f.metadata)); | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
buildOptimizedQuery
function is defined but never used in the codebase. Consider removing it or integrating it into thegetFilesWithPathExclusions
function to avoid code duplication.Copilot uses AI. Check for mistakes.