Skip to content

Commit

Permalink
chore: benchmark optimize (#348)
Browse files Browse the repository at this point in the history
* feat: add benchmark hot runs mode and release reports

* feat: update benchmark reports

* feat: hot start will clear atn cache every benchmark test and add version limit

* feat: hot start support benchmark results

---------

Co-authored-by: jialan <[email protected]>
  • Loading branch information
JackWang032 and jialan authored Sep 10, 2024
1 parent b835c4b commit cb69bab
Show file tree
Hide file tree
Showing 18 changed files with 682 additions and 50 deletions.
4 changes: 3 additions & 1 deletion benchmark/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import { Table } from 'console-table-printer';

const argv = argsParser(process.argv.slice(2));
const isChooseAll = argv.lang === 'all';
const isHot = argv.hot !== undefined;
const isRelease = argv.release !== undefined;
const testFiles = config.testFiles;

let benchmarkResults: SqlBenchmark[] = [];
Expand Down Expand Up @@ -115,7 +117,7 @@ const printSummaryReport = () => {
};

const benchmark = (lang: Language) => {
const sqlBenchmark = new SqlBenchmark(lang);
const sqlBenchmark = new SqlBenchmark(lang, isHot, isRelease);
const originalParams = readParams();

testFiles.forEach((testInfo) => {
Expand Down
131 changes: 98 additions & 33 deletions benchmark/sqlBenchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ export type BenchmarkResult = {
};

// 过滤掉异常数据,m为判断为异常值的标准差倍数
const removeOutliers = (data, m = 2) => {
const removeOutliers = (data, m = 1) => {
if (data.length <= 2) return data;
const mean = data.reduce((a, b) => a + b, 0) / data.length;
const standardDeviation = Math.sqrt(
data.map((x) => Math.pow(x - mean, 2)).reduce((a, b) => a + b) / data.length
);

return data.filter((x) => Math.abs(x - mean) < m * standardDeviation);
return data.filter((x) => Math.abs(x - mean) <= m * standardDeviation);
};

const tableColumns = [
Expand Down Expand Up @@ -57,6 +57,14 @@ const tableColumns = [
},
];

const releaseColumns = tableColumns.filter(
(column) => !['lastCostTime', 'costTimes', 'loopTimes'].includes(column.name)
);

const hotRunColumns = tableColumns.filter(
(column) => !['avgTime', 'lastCostTime'].includes(column.name)
);

/**
* Key is sql directory name, value is module export name.
*/
Expand All @@ -75,17 +83,26 @@ export type Language = keyof typeof languageNameMap;
export const languages = Object.keys(languageNameMap) as Language[];

class SqlBenchmark {
constructor(language: string) {
constructor(language: string, isHot: boolean, isRelease: boolean) {
this.language = language;
this.isHot = isHot;
this.isRelease = isRelease;

this._lastResultsCache = this.getLastResults();
}

public results: BenchmarkResult[] = [];

public readonly language: string;

public readonly isHot: boolean;

public readonly isRelease: boolean;

private _DEFAULT_LOOP_TIMES = 5;

private _RELEASE_LOOP_TIMES = 15;

/**
* If current average time difference from last time grater than DIFF_RATIO, we will highlight that record.
*/
Expand All @@ -95,25 +112,39 @@ class SqlBenchmark {

/**
* Returns SqlParser instance of specific language.
*
* Due to the presence of ATN cache in antlr4, we will clear the module cache to ensure that each parser is brand new and with no cache.
*/
getSqlParser(): BasicSQL {
const caches = Object.keys(require.cache);
caches.forEach((moduleName) => {
const module = require.cache[moduleName]!;
// Fix Memory Leak
if (module.parent) {
module.parent.children = [];
}
delete require.cache[moduleName];
});
if (!this.isHot) {
this.clearATNCache();
}
const Parser = require(path.resolve(`src/parser/${this.language}/index.ts`))[
languageNameMap[this.language]
];
return new Parser();
}

/**
* Due to the presence of ATN cache in antlr4, we will clear the module cache to ensure that each parser is brand new and with no cache.
*/
clearATNCache() {
const caches = Object.keys(require.cache);
const sourcePath = path.join(__dirname, '../src');
caches
.filter((cache) => cache.includes(sourcePath))
.forEach((moduleName) => {
const module = require.cache[moduleName]!;
// Fix Memory Leak
if (module.parent) {
module.parent.children = [];
}
delete require.cache[moduleName];
});
}

getColumns = () => {
return this.isRelease ? releaseColumns : this.isHot ? hotRunColumns : tableColumns;
};

/**
* @param type Which parser method you want to run
* @param name Benchmark name
Expand All @@ -126,11 +157,22 @@ class SqlBenchmark {
name: string,
params: any[],
sqlRows: number,
loopTimes: number = this._DEFAULT_LOOP_TIMES
loops: number = this._DEFAULT_LOOP_TIMES
) {
let avgTime = 0;
let loopTimes = this.isRelease ? this._RELEASE_LOOP_TIMES : loops;
const costTimes: number[] = [];
const lastResult =
this._lastResultsCache?.find((item) => item.type === type && item.name === name) ?? {};

if (this.isHot) {
this.clearATNCache();
}

if (this.isHot && loopTimes < 2) {
throw new Error('Hot start should run at least 2 times');
}

for (let i = 0; i < loopTimes; i++) {
const parser = this.getSqlParser();
if (!parser[type] || typeof parser[type] !== 'function') return;
Expand All @@ -141,10 +183,12 @@ class SqlBenchmark {

costTimes.push(Math.round(costTime));
}
const filteredData = removeOutliers(costTimes);
const avgTime = Math.round(

const filteredData = removeOutliers(this.isHot ? costTimes.slice(1) : costTimes);
avgTime = Math.round(
filteredData.reduce((prev, curr) => prev + curr, 0) / filteredData.length
);

const result = {
name,
avgTime,
Expand All @@ -159,11 +203,18 @@ class SqlBenchmark {
}

getLastResults() {
const reportPath = path.join(__dirname, `./reports/${this.language}.benchmark.md`);
if (!fs.existsSync(reportPath)) return null;
const reportPath = path.join(
__dirname,
'./reports',
this.isHot ? 'hot_start' : 'cold_start',
`${this.language}.benchmark.md`
);
if (this.isRelease || !fs.existsSync(reportPath)) return null;

const report = fs.readFileSync(reportPath, { encoding: 'utf-8' });
const pattern = /<input .*? value=['"](.+?)['"]\s*\/>/;
const match = pattern.exec(report);

if (match) {
const lastResultsStr = match[1];
try {
Expand All @@ -174,6 +225,7 @@ class SqlBenchmark {
return null;
}
}

return null;
}

Expand All @@ -188,11 +240,12 @@ class SqlBenchmark {
lastCostTime !== undefined &&
Math.abs(lastCostTime - currentCostTime) / currentCostTime >
this._HIGHLIGHT_DIFF_RATIO;
const [color, icon] = isSignificantDiff
? currentCostTime > lastCostTime
? ['red', '↓']
: ['green', '↑']
: ['#FFF', ' '];
const [color, icon] =
isSignificantDiff && !this.isHot
? currentCostTime > lastCostTime
? ['red', '↑']
: ['green', '↓']
: ['#FFF', ' '];

table.addRow(
{
Expand All @@ -218,12 +271,17 @@ class SqlBenchmark {
);
const currentVersion = require('../package.json').version;
const parsedEnvInfo = JSON.parse(envInfo);
const baseDir = path.join(
__dirname,
this.isRelease ? '../benchmark_reports' : './reports',
this.isHot ? 'hot_start' : 'cold_start'
);

if (!fs.existsSync(path.join(__dirname, `./reports`))) {
fs.mkdirSync(path.join(__dirname, `./reports`), { recursive: true });
if (!fs.existsSync(baseDir)) {
fs.mkdirSync(baseDir, { recursive: true });
}

const writePath = path.join(__dirname, `./reports/${this.language}.benchmark.md`);
const writePath = path.join(baseDir, `./${this.language}.benchmark.md`);
const writter = new MarkdownWritter(writePath);
writter.writeHeader('Benchmark', 2);
writter.writeLine();
Expand All @@ -239,21 +297,28 @@ class SqlBenchmark {
writter.writeHeader('Device', 3);
writter.writeText(parsedEnvInfo.System.OS);
writter.writeText(parsedEnvInfo.System.CPU);
writter.writeText(parsedEnvInfo.System.Memory);
writter.writeText(parsedEnvInfo.System.Memory?.split('/')[1]?.trim());
writter.writeLine();

writter.writeHeader('Version', 3);
writter.writeText(`dt-sql-parser: ${currentVersion}`);
writter.writeText(`antlr4-c3: ${parsedEnvInfo.npmPackages['antlr4-c3']?.installed}`);
writter.writeText(`antlr4ng: ${parsedEnvInfo.npmPackages['antlr4ng']?.installed}`);
writter.writeText(`\`nodejs\`: ${process.version}`);
writter.writeText(`\`dt-sql-parser\`: v${currentVersion}`);
writter.writeText(`\`antlr4-c3\`: v${parsedEnvInfo.npmPackages['antlr4-c3']?.installed}`);
writter.writeText(`\`antlr4ng\`: v${parsedEnvInfo.npmPackages['antlr4ng']?.installed}`);
writter.writeLine();

writter.writeHeader('Running Mode', 3);
writter.writeText(this.isHot ? 'Hot Start' : 'Cold Start');
writter.writeLine();

writter.writeHeader('Report', 3);
writter.writeTable(tableColumns, this.results);

const columns = this.getColumns();
writter.writeTable(columns, this.results);
writter.writeLine();

// Save original data via hidden input
writter.writeHiddenInput(this.results);
!this.isRelease && writter.writeHiddenInput(this.results);

writter.save();

Expand Down
36 changes: 36 additions & 0 deletions benchmark_reports/cold_start/flink.benchmark.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
## Benchmark

### Language
FlinkSQL

### Report Time
2024/9/9 19:55:03

### Device
macOS 14.4.1
(8) arm64 Apple M1 Pro
16.00 GB

### Version
`nodejs`: v21.6.1
`dt-sql-parser`: v4.0.2
`antlr4-c3`: v3.3.7
`antlr4ng`: v2.0.11

### Running Mode
Cold Start

### Report
| Benchmark Name | Method Name |SQL Rows|Average Time(ms)|
|----------------|----------------------------|--------|----------------|
|Query Collection| getAllTokens | 1015 | 227 |
|Query Collection| validate | 1015 | 221 |
| Insert Columns | getAllTokens | 1001 | 65 |
| Insert Columns | validate | 1001 | 65 |
| Create Table | getAllTokens | 1004 | 27 |
| Create Table | validate | 1004 | 26 |
| Split SQL | splitSQLByStatement | 999 | 52 |
|Collect Entities| getAllEntities | 1056 | 141 |
| Suggestion |getSuggestionAtCaretPosition| 1056 | 131 |


38 changes: 38 additions & 0 deletions benchmark_reports/cold_start/hive.benchmark.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
## Benchmark

### Language
HiveSQL

### Report Time
2024/9/9 19:55:03

### Device
macOS 14.4.1
(8) arm64 Apple M1 Pro
16.00 GB

### Version
`nodejs`: v21.6.1
`dt-sql-parser`: v4.0.2
`antlr4-c3`: v3.3.7
`antlr4ng`: v2.0.11

### Running Mode
Cold Start

### Report
| Benchmark Name | Method Name |SQL Rows|Average Time(ms)|
|----------------|----------------------------|--------|----------------|
|Query Collection| getAllTokens | 1015 | 185 |
|Query Collection| validate | 1015 | 179 |
| Update Table | getAllTokens | 1011 | 112 |
| Update Table | validate | 1011 | 109 |
| Insert Columns | getAllTokens | 1001 | 329 |
| Insert Columns | validate | 1001 | 329 |
| Create Table | getAllTokens | 1002 | 21 |
| Create Table | validate | 1002 | 20 |
| Split SQL | splitSQLByStatement | 1001 | 72 |
|Collect Entities| getAllEntities | 1066 | 106 |
| Suggestion |getSuggestionAtCaretPosition| 1066 | 100 |


38 changes: 38 additions & 0 deletions benchmark_reports/cold_start/impala.benchmark.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
## Benchmark

### Language
ImpalaSQL

### Report Time
2024/9/9 19:55:03

### Device
macOS 14.4.1
(8) arm64 Apple M1 Pro
16.00 GB

### Version
`nodejs`: v21.6.1
`dt-sql-parser`: v4.0.2
`antlr4-c3`: v3.3.7
`antlr4ng`: v2.0.11

### Running Mode
Cold Start

### Report
| Benchmark Name | Method Name |SQL Rows|Average Time(ms)|
|----------------|----------------------------|--------|----------------|
|Query Collection| getAllTokens | 1015 | 71 |
|Query Collection| validate | 1015 | 71 |
| Update Table | getAllTokens | 1011 | 113 |
| Update Table | validate | 1011 | 108 |
| Insert Columns | getAllTokens | 1001 | 208 |
| Insert Columns | validate | 1001 | 213 |
| Create Table | getAllTokens | 1002 | 23 |
| Create Table | validate | 1002 | 23 |
| Split SQL | splitSQLByStatement | 1001 | 65 |
|Collect Entities| getAllEntities | 1066 | 82 |
| Suggestion |getSuggestionAtCaretPosition| 1066 | 83 |


Loading

0 comments on commit cb69bab

Please sign in to comment.