Skip to content

Commit 6f97516

Browse files
committed
perf(bench): split instantiation from decode/encode kernel measurement
The previous codec benches conflated three costs into one number: constructor + buffer fill + decode/encode + destructor. CodSpeed runs each bench body exactly once under Cachegrind, so the per-iteration construct/destroy cycle was sitting inside the measurement window — and the variance analysis on a5b8ee6..fa2db51 showed exactly that mixing was the source of the 18–110% run-to-run spread on openjpeg encode CT1.RAW, openjpeg decode CT1.j2k, openjphjs encode CT1.RAW, and libjpeg decode jpeg400jfif (the only four noisy benches in the suite). Refactor: hoist new Decoder()/Encoder() + getEncodedBuffer().set() / getDecodedBuffer().set() / setNearLossless() to module scope, one instance per fixture (wasm decoders advance internal state on decode() and can't be reused across multiple bench bodies). Each bench body is now a single decoder.decode() or encoder.encode() call. Two new "instantiate+destroy" benches per codec measure the lifecycle cost in isolation — that's where the noise was hiding and now it shows up as its own number rather than smearing into the kernel measurement. Net effect: the 4 noisy benches should drop to <1% spread (matching the dispatcher benches which already cache codec instances), and the lifecycle cost gets its own signal line that catches regressions in wasm setup paths. Affected packages: charls, libjpeg-turbo-8bit, openjpeg, openjphjs. big-endian/little-endian have no decoder class and dicom-codec's dispatch.bench.js already caches codec instances via runProcess → initialize → codecConfig.codec, so neither needs the same change.
1 parent fa2db51 commit 6f97516

4 files changed

Lines changed: 179 additions & 98 deletions

File tree

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
// Decoder/encoder construction is hoisted to module scope so the bench
2+
// body only measures the decode/encode kernel itself. Each fixture gets
3+
// its own pre-constructed instance because the underlying wasm
4+
// JpegLSDecoder advances internal state on decode() and can't be reused
5+
// across multiple bench bodies. A separate "instantiate+destroy" bench
6+
// measures the lifecycle cost that the old monolithic bench was
7+
// conflating with kernel time.
8+
19
import { bench, describe } from "vitest"
210
import { existsSync, readFileSync } from "node:fs"
311
import { fileURLToPath } from "node:url"
@@ -18,45 +26,59 @@ const ctNearLossless = !skip
1826
: null
1927

2028
let codec
29+
let decCT1
30+
let decCT2
31+
let decNL
32+
let encCT2
2133
if (!skip) {
2234
const factory = (await import(distPath)).default ?? (await import(distPath))
2335
codec = await factory()
36+
37+
decCT1 = new codec.JpegLSDecoder()
38+
decCT1.getEncodedBuffer(ct1Encoded.length).set(ct1Encoded)
39+
40+
decCT2 = new codec.JpegLSDecoder()
41+
decCT2.getEncodedBuffer(ct2Encoded.length).set(ct2Encoded)
42+
43+
decNL = new codec.JpegLSDecoder()
44+
decNL.getEncodedBuffer(ctNearLossless.length).set(ctNearLossless)
45+
46+
encCT2 = new codec.JpegLSEncoder()
47+
encCT2
48+
.getDecodedBuffer({
49+
width: 512,
50+
height: 512,
51+
bitsPerSample: 16,
52+
componentCount: 1,
53+
})
54+
.set(ct2Raw)
55+
encCT2.setNearLossless(0)
2456
}
2557

2658
describe.skipIf(skip)("charls JPEG-LS (wasm)", () => {
27-
bench("decode CT1.JLS (.80 lossless, 512x512x16bit)", () => {
28-
const decoder = new codec.JpegLSDecoder()
29-
decoder.getEncodedBuffer(ct1Encoded.length).set(ct1Encoded)
30-
decoder.decode()
31-
decoder.delete()
59+
bench("instantiate+destroy JpegLSDecoder", () => {
60+
const d = new codec.JpegLSDecoder()
61+
d.delete()
62+
})
63+
64+
bench("instantiate+destroy JpegLSEncoder", () => {
65+
const e = new codec.JpegLSEncoder()
66+
e.delete()
67+
})
68+
69+
bench("decode CT1.JLS (.80 lossless, 512x512x16bit) — kernel", () => {
70+
decCT1.decode()
3271
})
3372

34-
bench("decode CT2.JLS (.80 lossless, 512x512x16bit)", () => {
35-
const decoder = new codec.JpegLSDecoder()
36-
decoder.getEncodedBuffer(ct2Encoded.length).set(ct2Encoded)
37-
decoder.decode()
38-
decoder.delete()
73+
bench("decode CT2.JLS (.80 lossless, 512x512x16bit) — kernel", () => {
74+
decCT2.decode()
3975
})
4076

41-
bench("decode CT-512x512-near-lossless.JLS (.81 near-lossless)", () => {
42-
const decoder = new codec.JpegLSDecoder()
43-
decoder.getEncodedBuffer(ctNearLossless.length).set(ctNearLossless)
44-
decoder.decode()
45-
decoder.delete()
77+
bench("decode CT-512x512-near-lossless.JLS (.81 near-lossless) — kernel", () => {
78+
decNL.decode()
4679
})
4780

48-
bench("encode CT2.RAW (lossless near=0)", () => {
49-
const encoder = new codec.JpegLSEncoder()
50-
encoder
51-
.getDecodedBuffer({
52-
width: 512,
53-
height: 512,
54-
bitsPerSample: 16,
55-
componentCount: 1,
56-
})
57-
.set(ct2Raw)
58-
encoder.setNearLossless(0)
59-
encoder.encode()
60-
encoder.delete()
81+
bench("encode CT2.RAW (lossless near=0) — kernel", () => {
82+
encCT2.encode()
6183
})
6284
})
Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
// Decoder/encoder construction is hoisted to module scope so the bench
2+
// body only measures the decode/encode kernel itself. A separate
3+
// "instantiate+destroy" bench measures the lifecycle cost that the old
4+
// monolithic bench was conflating with kernel time.
5+
16
import { bench, describe } from "vitest"
27
import { existsSync, readFileSync } from "node:fs"
38
import { fileURLToPath } from "node:url"
@@ -18,31 +23,43 @@ const rawDecoded = !skip
1823
: null
1924

2025
let codec
26+
let dec
27+
let enc
2128
if (!skip) {
2229
const factory = (await import(distPath)).default ?? (await import(distPath))
2330
codec = await factory()
31+
32+
dec = new codec.JPEGDecoder()
33+
dec.getEncodedBuffer(jpegEncoded.length).set(jpegEncoded)
34+
35+
enc = new codec.JPEGEncoder()
36+
enc
37+
.getDecodedBuffer({
38+
width: 600,
39+
height: 800,
40+
bitsPerSample: 8,
41+
componentCount: 1,
42+
isSigned: false,
43+
})
44+
.set(rawDecoded)
2445
}
2546

2647
describe.skipIf(skip)("libjpeg-turbo-8bit (wasm)", () => {
27-
bench("decode jpeg400jfif.jpg (600x800x8bit)", () => {
28-
const decoder = new codec.JPEGDecoder()
29-
decoder.getEncodedBuffer(jpegEncoded.length).set(jpegEncoded)
30-
decoder.decode()
31-
decoder.delete()
48+
bench("instantiate+destroy JPEGDecoder", () => {
49+
const d = new codec.JPEGDecoder()
50+
d.delete()
51+
})
52+
53+
bench("instantiate+destroy JPEGEncoder", () => {
54+
const e = new codec.JPEGEncoder()
55+
e.delete()
56+
})
57+
58+
bench("decode jpeg400jfif.jpg (600x800x8bit) — kernel", () => {
59+
dec.decode()
3260
})
3361

34-
bench("encode raw 600x800x8bit (lossy default)", () => {
35-
const encoder = new codec.JPEGEncoder()
36-
encoder
37-
.getDecodedBuffer({
38-
width: 600,
39-
height: 800,
40-
bitsPerSample: 8,
41-
componentCount: 1,
42-
isSigned: false,
43-
})
44-
.set(rawDecoded)
45-
encoder.encode()
46-
encoder.delete()
62+
bench("encode raw 600x800x8bit (lossy default) — kernel", () => {
63+
enc.encode()
4764
})
4865
})
Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
// Decoder/encoder construction is hoisted to module scope so the bench
2+
// body only measures the decode/encode kernel itself. Each fixture gets
3+
// its own pre-constructed instance because the underlying wasm
4+
// J2KDecoder advances internal state on decode() and can't be reused
5+
// across multiple bench bodies. A separate "instantiate+destroy" bench
6+
// measures the lifecycle cost that the old monolithic bench was
7+
// conflating with kernel time — the openjpeg encode CT1.RAW bench had
8+
// the worst variance in the suite (110% spread across 3 runs) and this
9+
// split is the fix.
10+
111
import { bench, describe } from "vitest"
212
import { existsSync, readFileSync } from "node:fs"
313
import { fileURLToPath } from "node:url"
@@ -18,45 +28,59 @@ const ctLossy = !skip
1828
: null
1929

2030
let codec
31+
let decCT1
32+
let decCT2
33+
let decLossy
34+
let encCT1
2135
if (!skip) {
2236
const factory = (await import(distPath)).default ?? (await import(distPath))
2337
codec = await factory()
38+
39+
decCT1 = new codec.J2KDecoder()
40+
decCT1.getEncodedBuffer(ct1Encoded.length).set(ct1Encoded)
41+
42+
decCT2 = new codec.J2KDecoder()
43+
decCT2.getEncodedBuffer(ct2Encoded.length).set(ct2Encoded)
44+
45+
decLossy = new codec.J2KDecoder()
46+
decLossy.getEncodedBuffer(ctLossy.length).set(ctLossy)
47+
48+
encCT1 = new codec.J2KEncoder()
49+
encCT1
50+
.getDecodedBuffer({
51+
width: 512,
52+
height: 512,
53+
bitsPerSample: 16,
54+
componentCount: 1,
55+
isSigned: true,
56+
})
57+
.set(ct1Raw)
2458
}
2559

2660
describe.skipIf(skip)("openjpeg J2K (wasm)", () => {
27-
bench("decode CT1.j2k (.90 lossless 5-3, 512x512x16bit)", () => {
28-
const decoder = new codec.J2KDecoder()
29-
decoder.getEncodedBuffer(ct1Encoded.length).set(ct1Encoded)
30-
decoder.decode()
31-
decoder.delete()
61+
bench("instantiate+destroy J2KDecoder", () => {
62+
const d = new codec.J2KDecoder()
63+
d.delete()
64+
})
65+
66+
bench("instantiate+destroy J2KEncoder", () => {
67+
const e = new codec.J2KEncoder()
68+
e.delete()
69+
})
70+
71+
bench("decode CT1.j2k (.90 lossless 5-3, 512x512x16bit) — kernel", () => {
72+
decCT1.decode()
3273
})
3374

34-
bench("decode CT2.j2k (.90 lossless 5-3, 512x512x16bit)", () => {
35-
const decoder = new codec.J2KDecoder()
36-
decoder.getEncodedBuffer(ct2Encoded.length).set(ct2Encoded)
37-
decoder.decode()
38-
decoder.delete()
75+
bench("decode CT2.j2k (.90 lossless 5-3, 512x512x16bit) — kernel", () => {
76+
decCT2.decode()
3977
})
4078

41-
bench("decode CT-512x512-lossy.j2k (.91 irreversible 9-7)", () => {
42-
const decoder = new codec.J2KDecoder()
43-
decoder.getEncodedBuffer(ctLossy.length).set(ctLossy)
44-
decoder.decode()
45-
decoder.delete()
79+
bench("decode CT-512x512-lossy.j2k (.91 irreversible 9-7) — kernel", () => {
80+
decLossy.decode()
4681
})
4782

48-
bench("encode CT1.RAW (lossless)", () => {
49-
const encoder = new codec.J2KEncoder()
50-
encoder
51-
.getDecodedBuffer({
52-
width: 512,
53-
height: 512,
54-
bitsPerSample: 16,
55-
componentCount: 1,
56-
isSigned: true,
57-
})
58-
.set(ct1Raw)
59-
encoder.encode()
60-
encoder.delete()
83+
bench("encode CT1.RAW (lossless) — kernel", () => {
84+
encCT1.encode()
6185
})
6286
})
Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
// Decoder/encoder construction is hoisted to module scope so the bench
2+
// body only measures the decode/encode kernel itself. A separate
3+
// "instantiate+destroy" bench measures the lifecycle cost that the old
4+
// monolithic bench was conflating with kernel time.
5+
16
import { bench, describe } from "vitest"
27
import { existsSync, readFileSync } from "node:fs"
38
import { fileURLToPath } from "node:url"
@@ -15,39 +20,52 @@ const ct2Encoded = !skip ? readFileSync(resolve(fixturesDir, "j2c/CT2.j2c")) : n
1520
const ct1Raw = !skip ? readFileSync(resolve(fixturesDir, "raw/CT1.RAW")) : null
1621

1722
let codec
23+
let decCT1
24+
let decCT2
25+
let encCT1
1826
if (!skip) {
1927
const factory = (await import(distPath)).default ?? (await import(distPath))
2028
codec = await factory()
29+
30+
decCT1 = new codec.HTJ2KDecoder()
31+
decCT1.getEncodedBuffer(ct1Encoded.length).set(ct1Encoded)
32+
33+
decCT2 = new codec.HTJ2KDecoder()
34+
decCT2.getEncodedBuffer(ct2Encoded.length).set(ct2Encoded)
35+
36+
encCT1 = new codec.HTJ2KEncoder()
37+
encCT1
38+
.getDecodedBuffer({
39+
width: 512,
40+
height: 512,
41+
bitsPerSample: 16,
42+
componentCount: 1,
43+
isSigned: true,
44+
isUsingColorTransform: false,
45+
})
46+
.set(ct1Raw)
2147
}
2248

2349
describe.skipIf(skip)("openjphjs HTJ2K (wasm)", () => {
24-
bench("decode CT1.j2c (.201 lossless, 512x512x16bit)", () => {
25-
const decoder = new codec.HTJ2KDecoder()
26-
decoder.getEncodedBuffer(ct1Encoded.length).set(ct1Encoded)
27-
decoder.decode()
28-
decoder.delete()
50+
bench("instantiate+destroy HTJ2KDecoder", () => {
51+
const d = new codec.HTJ2KDecoder()
52+
d.delete()
53+
})
54+
55+
bench("instantiate+destroy HTJ2KEncoder", () => {
56+
const e = new codec.HTJ2KEncoder()
57+
e.delete()
58+
})
59+
60+
bench("decode CT1.j2c (.201 lossless, 512x512x16bit) — kernel", () => {
61+
decCT1.decode()
2962
})
3063

31-
bench("decode CT2.j2c (.201 lossless, 512x512x16bit)", () => {
32-
const decoder = new codec.HTJ2KDecoder()
33-
decoder.getEncodedBuffer(ct2Encoded.length).set(ct2Encoded)
34-
decoder.decode()
35-
decoder.delete()
64+
bench("decode CT2.j2c (.201 lossless, 512x512x16bit) — kernel", () => {
65+
decCT2.decode()
3666
})
3767

38-
bench("encode CT1.RAW (HTJ2K lossless)", () => {
39-
const encoder = new codec.HTJ2KEncoder()
40-
encoder
41-
.getDecodedBuffer({
42-
width: 512,
43-
height: 512,
44-
bitsPerSample: 16,
45-
componentCount: 1,
46-
isSigned: true,
47-
isUsingColorTransform: false,
48-
})
49-
.set(ct1Raw)
50-
encoder.encode()
51-
encoder.delete()
68+
bench("encode CT1.RAW (HTJ2K lossless) — kernel", () => {
69+
encCT1.encode()
5270
})
5371
})

0 commit comments

Comments
 (0)