From a3f4526a6684ac3aca4e7057541ab9a8160ee894 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Mon, 7 Apr 2025 16:42:51 -0700 Subject: [PATCH 01/14] format --- aws/lambda/job_queue_times_snapshot.json | 0 .../llms/components/LLMsSummaryPanel.tsx | 58 +++++++++++++++++ torchci/lib/benchmark/llms/utils/llmUtils.ts | 63 +++++++++---------- 3 files changed, 86 insertions(+), 35 deletions(-) create mode 100644 aws/lambda/job_queue_times_snapshot.json diff --git a/aws/lambda/job_queue_times_snapshot.json b/aws/lambda/job_queue_times_snapshot.json new file mode 100644 index 0000000000..e69de29bb2 diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index 1a132a6a45..74a1d44844 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -1,3 +1,4 @@ +import { Co2Sharp } from "@mui/icons-material"; import { Grid2 } from "@mui/material"; import { GridCellParams, GridRenderCellParams } from "@mui/x-data-grid"; import styles from "components/metrics.module.css"; @@ -62,6 +63,8 @@ export default function LLMsSummaryPanel({ lPerfData, rPerfData ); + + const columns: any[] = [ { field: "metadata", @@ -187,6 +190,45 @@ export default function LLMsSummaryPanel({ }); } + const handleModelBackendFailure = ( + field: string, + unit: string, + showTarget: string, + isLFailure: boolean, + isRFailure: boolean, + lactual: number, + ractual:number, + lPercent:string, + rPercent:string) => { + if (field === "FAILURE_REPORT"){ + if (lCommit === rCommit){ + return `Detected Failure on commit`; + } + + if (isLFailure && isRFailure){ + return `Detected Failure on both base commit and new commit`; + } + if (isLFailure){ + return `Detected Failure on base commit`; + } + if (isRFailure){ + return `Detected Failure on new commit`; + } + } + + if (isLFailure && isRFailure){ + if (lCommit === rCommit){ + return `Failure`; + } + return `Failure -> Fialure`; + } else if (isLFailure){ + return `Failure ->${ractual}${unit} ${rPercent} ${showTarget}`; + } else if (isRFailure){ + return `${lactual}${unit} ${lPercent} -> Failure`; + } + } + + const hasBackend = data.length > 0 && "backend" in data[0] ? true : false; if (hasBackend && benchmarkName !== "TorchCache Benchmark") { columns.push({ @@ -233,6 +275,7 @@ export default function LLMsSummaryPanel({ return params.value; }, }, + // add all other metrics as columns ...metricNames .filter((metric: string) => { // TODO (huydhn): Just a temp fix, remove this after a few weeks @@ -256,6 +299,10 @@ export default function LLMsSummaryPanel({ return ""; } + if(metric === "FAILURE_REPORT"){ + return styles.error; + } + // l is the old (base) value, r is the new value const l = v.l.actual; const r = v.r.actual; @@ -329,6 +376,17 @@ export default function LLMsSummaryPanel({ const showTarget = target && target != 0 ? `[target = ${target}]` : ""; + if (params.field == "FAILURE_REPORT"){ + console.log(params); + } + + // A Failure is detected for a model and backend + if (params.row.FAILURE_REPORT){ + const isLFailure = params.row.FAILURE_REPORT?.l.actual == -1? false : true; + const isRFailure = params.row.FAILURE_REPORT?.r.actual == -1? false : true; + return handleModelBackendFailure(params.field, unit, showTarget, isLFailure, isRFailure, l, r, lPercent, rPercent); + } + if (lCommit === rCommit || !v.highlight) { return `${r}${unit} ${rPercent} ${showTarget}`; } else { diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts index b8ebc04b47..65bed17c6a 100644 --- a/torchci/lib/benchmark/llms/utils/llmUtils.ts +++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts @@ -143,14 +143,6 @@ export function combineLeftAndRight( dataGroupedByModel[key][metric]["l"] = record; }); } - - // NB: This is a hack to keep track of valid devices. The problem is that the records - // in the benchmark database alone don't have the information to differentiate between - // benchmarks that are failed to run and benchmarks that are not run. Both show up as - // 0 on the dashboard. Note that we can do a join with workflow_job table to get this - // information, but it's a rather slow and expensive route - const validDevices = new Set<string>(); - const validBackends = new Set<string>(); // First round to get all the valid devices Object.keys(dataGroupedByModel).forEach((key: string) => { const [model, backend, mode, dtype, device, arch, extra] = key.split(";"); @@ -159,16 +151,6 @@ export function combineLeftAndRight( name: `${model} ${backend} (${mode} / ${dtype} / ${device} / ${arch})`, }; - for (const metric in dataGroupedByModel[key]) { - const record = dataGroupedByModel[key][metric]; - const hasL = "l" in record; - const hasR = "r" in record; - - if (hasL && hasR) { - validDevices.add(device); - validBackends.add(`${model} ${backend}`); - } - } }); // Transform the data into a displayable format @@ -185,24 +167,10 @@ export function combineLeftAndRight( const hasL = "l" in record; const hasR = "r" in record; - // Skip devices and models that weren't run in this commit - if ( - (validDevices.size !== 0 && !validDevices.has(device)) || - (validBackends.size !== 0 && !validBackends.has(`${model} ${backend}`)) - ) { - continue; - } - - // No overlapping between left and right commits, just show what it's on the - // right commit instead of showing a blank page - if (!hasR) { - continue; - } - if (!("metadata" in row)) { row["metadata"] = { model: model, - origins: record["r"].origins, + origins: hasR? record["r"].origins : [], backend: backend, mode: mode, dtype: dtype, @@ -281,6 +249,31 @@ export function combineLeftAndRight( row["is_dynamic"] = extraInfo["is_dynamic"]; } + if (metric == "FAILURE_REPORT"){ + row[metric] = { + l: hasL + ? { + actual: record["l"].actual, + target: record["l"].target, + } + : { + actual: -1, // indicate the failure on left side + target: 0, + }, + r: hasR + ? { + actual: record["r"].actual, + target: record["r"].target, + } + : { + actual: -1,// indicate the failure on right side + target: 0, + }, + highlight: + hasL && + hasR, + }; + } else{ row[metric] = { l: hasL ? { @@ -301,13 +294,13 @@ export function combineLeftAndRight( target: 0, }, highlight: - validDevices.size !== 0 && - validBackends.has(`${model} ${backend}`) && hasL && hasR, }; } + } + if ("metadata" in row) { data.push(row); } From e8095dfe9f09e3214c79ad3e92f250389512f883 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Mon, 7 Apr 2025 18:05:05 -0700 Subject: [PATCH 02/14] addDeviceLevelFailure --- .../llms/components/LLMsSummaryPanel.tsx | 94 +++++++++++-------- torchci/components/metrics.module.css | 5 + torchci/lib/benchmark/llms/utils/llmUtils.ts | 68 ++++++-------- 3 files changed, 86 insertions(+), 81 deletions(-) diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index 74a1d44844..555545d811 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -1,4 +1,3 @@ -import { Co2Sharp } from "@mui/icons-material"; import { Grid2 } from "@mui/material"; import { GridCellParams, GridRenderCellParams } from "@mui/x-data-grid"; import styles from "components/metrics.module.css"; @@ -64,7 +63,6 @@ export default function LLMsSummaryPanel({ rPerfData ); - const columns: any[] = [ { field: "metadata", @@ -190,6 +188,7 @@ export default function LLMsSummaryPanel({ }); } + // handle failure report for a row. const handleModelBackendFailure = ( field: string, unit: string, @@ -197,37 +196,39 @@ export default function LLMsSummaryPanel({ isLFailure: boolean, isRFailure: boolean, lactual: number, - ractual:number, - lPercent:string, - rPercent:string) => { - if (field === "FAILURE_REPORT"){ - if (lCommit === rCommit){ - return `Detected Failure on commit`; - } - - if (isLFailure && isRFailure){ - return `Detected Failure on both base commit and new commit`; - } - if (isLFailure){ - return `Detected Failure on base commit`; - } - if (isRFailure){ - return `Detected Failure on new commit`; - } + ractual: number, + lPercent: string, + rPercent: string + ) => { + // Indicate the failure details in Failure Report column + if (field === "FAILURE_REPORT") { + if (lCommit === rCommit) { + return `Detected Failure on commit`; } - if (isLFailure && isRFailure){ - if (lCommit === rCommit){ - return `Failure`; - } - return `Failure -> Fialure`; - } else if (isLFailure){ - return `Failure ->${ractual}${unit} ${rPercent} ${showTarget}`; - } else if (isRFailure){ - return `${lactual}${unit} ${lPercent} -> Failure`; + if (isLFailure && isRFailure) { + return `Detected Failure on both base commit and new commit`; + } + if (isLFailure) { + return `Detected Failure on base commit`; + } + if (isRFailure) { + return `Detected Failure on new commit`; } } + // render the row's value in other metric columns + if (isLFailure && isRFailure) { + if (lCommit === rCommit) { + return `Failure`; + } + return `Failure -> Fialure`; + } else if (isLFailure) { + return `Failure ->${ractual}${unit} ${rPercent} ${showTarget}`; + } else if (isRFailure) { + return `${lactual}${unit} ${lPercent} -> Failure`; + } + }; const hasBackend = data.length > 0 && "backend" in data[0] ? true : false; if (hasBackend && benchmarkName !== "TorchCache Benchmark") { @@ -295,12 +296,14 @@ export default function LLMsSummaryPanel({ flex: 1, cellClassName: (params: GridCellParams<any, any>) => { const v = params.value; - if (v === undefined) { - return ""; + + // If the row data has failure, we render it in grey color + if (params.row.FAILURE_REPORT) { + return styles.failure; } - if(metric === "FAILURE_REPORT"){ - return styles.error; + if (v === undefined) { + return ""; } // l is the old (base) value, r is the new value @@ -354,6 +357,9 @@ export default function LLMsSummaryPanel({ renderCell: (params: GridRenderCellParams<any>) => { const v = params.value; if (v === undefined) { + if (params.row.FAILURE_REPORT) { + return "N/A"; + } return ""; } @@ -376,15 +382,23 @@ export default function LLMsSummaryPanel({ const showTarget = target && target != 0 ? `[target = ${target}]` : ""; - if (params.field == "FAILURE_REPORT"){ - console.log(params); - } - // A Failure is detected for a model and backend - if (params.row.FAILURE_REPORT){ - const isLFailure = params.row.FAILURE_REPORT?.l.actual == -1? false : true; - const isRFailure = params.row.FAILURE_REPORT?.r.actual == -1? false : true; - return handleModelBackendFailure(params.field, unit, showTarget, isLFailure, isRFailure, l, r, lPercent, rPercent); + if (params.row.FAILURE_REPORT) { + const isLFailure = + params.row.FAILURE_REPORT?.l.actual == -1 ? false : true; + const isRFailure = + params.row.FAILURE_REPORT?.r.actual == -1 ? false : true; + return handleModelBackendFailure( + params.field, + unit, + showTarget, + isLFailure, + isRFailure, + l, + r, + lPercent, + rPercent + ); } if (lCommit === rCommit || !v.highlight) { diff --git a/torchci/components/metrics.module.css b/torchci/components/metrics.module.css index 1a7d9ff914..bf775b2fab 100644 --- a/torchci/components/metrics.module.css +++ b/torchci/components/metrics.module.css @@ -8,6 +8,11 @@ color: var(--text-color); } +.failure { + background-color: var(--workflow-box-none-bg, lightgray); + color: var(--text-color); +} + .error { background-color: var(--workflow-box-fail-bg, lightpink); color: var(--text-color); diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts index 65bed17c6a..3eeb5353c4 100644 --- a/torchci/lib/benchmark/llms/utils/llmUtils.ts +++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts @@ -143,15 +143,6 @@ export function combineLeftAndRight( dataGroupedByModel[key][metric]["l"] = record; }); } - // First round to get all the valid devices - Object.keys(dataGroupedByModel).forEach((key: string) => { - const [model, backend, mode, dtype, device, arch, extra] = key.split(";"); - const row: { [k: string]: any } = { - // Keep the name as as the row ID as DataGrid requires it - name: `${model} ${backend} (${mode} / ${dtype} / ${device} / ${arch})`, - }; - - }); // Transform the data into a displayable format const data: { [k: string]: any }[] = []; @@ -170,7 +161,7 @@ export function combineLeftAndRight( if (!("metadata" in row)) { row["metadata"] = { model: model, - origins: hasR? record["r"].origins : [], + origins: hasR ? record["r"].origins : [], backend: backend, mode: mode, dtype: dtype, @@ -249,7 +240,7 @@ export function combineLeftAndRight( row["is_dynamic"] = extraInfo["is_dynamic"]; } - if (metric == "FAILURE_REPORT"){ + if (metric == "FAILURE_REPORT") { row[metric] = { l: hasL ? { @@ -266,39 +257,34 @@ export function combineLeftAndRight( target: record["r"].target, } : { - actual: -1,// indicate the failure on right side + actual: -1, // indicate the failure on right side target: 0, }, - highlight: - hasL && - hasR, + highlight: hasL && hasR, }; - } else{ - row[metric] = { - l: hasL - ? { - actual: record["l"].actual, - target: record["l"].target, - } - : { - actual: 0, - target: 0, - }, - r: hasR - ? { - actual: record["r"].actual, - target: record["r"].target, - } - : { - actual: 0, - target: 0, - }, - highlight: - hasL && - hasR, - }; - } - + } else { + row[metric] = { + l: hasL + ? { + actual: record["l"].actual, + target: record["l"].target, + } + : { + actual: 0, + target: 0, + }, + r: hasR + ? { + actual: record["r"].actual, + target: record["r"].target, + } + : { + actual: 0, + target: 0, + }, + highlight: hasL && hasR, + }; + } } if ("metadata" in row) { From 1e9af85abfc52be2ee5ae5efc85efcb598e8d0b8 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Mon, 7 Apr 2025 20:28:07 -0700 Subject: [PATCH 03/14] addDeviceLevelFailure --- torchci/lib/benchmark/llms/utils/llmUtils.ts | 162 +++++++++++++++---- 1 file changed, 134 insertions(+), 28 deletions(-) diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts index 3eeb5353c4..ddbb3fceef 100644 --- a/torchci/lib/benchmark/llms/utils/llmUtils.ts +++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts @@ -17,6 +17,9 @@ import { } from "../common"; import { LLMsBenchmarkProps } from "../types/dashboardProps"; import { TORCHAO_BASELINE } from "./aoUtils"; +import { startsWith } from "lodash"; +import JobArtifact from "components/JobArtifact"; +import { map } from "d3"; export function useBenchmark( queryParams: { [key: string]: any }, @@ -95,6 +98,7 @@ export function combineLeftAndRight( const rData = rPerfData.data; const dataGroupedByModel: { [k: string]: any } = {}; + rData.forEach((record: LLMsBenchmarkData) => { const model = record.model; const backend = record.backend; @@ -155,13 +159,14 @@ export function combineLeftAndRight( for (const metric in dataGroupedByModel[key]) { const record = dataGroupedByModel[key][metric]; + const hasL = "l" in record; const hasR = "r" in record; if (!("metadata" in row)) { row["metadata"] = { model: model, - origins: hasR ? record["r"].origins : [], + origins: hasR? record["r"].origins : [], backend: backend, mode: mode, dtype: dtype, @@ -239,8 +244,7 @@ export function combineLeftAndRight( const extraInfo = JSON.parse(extra); row["is_dynamic"] = extraInfo["is_dynamic"]; } - - if (metric == "FAILURE_REPORT") { + if (metric == "FAILURE_REPORT"){ row[metric] = { l: hasL ? { @@ -257,34 +261,38 @@ export function combineLeftAndRight( target: record["r"].target, } : { - actual: -1, // indicate the failure on right side - target: 0, - }, - highlight: hasL && hasR, - }; - } else { - row[metric] = { - l: hasL - ? { - actual: record["l"].actual, - target: record["l"].target, - } - : { - actual: 0, - target: 0, - }, - r: hasR - ? { - actual: record["r"].actual, - target: record["r"].target, - } - : { - actual: 0, + actual: -1,// indicate the failure on right side target: 0, }, - highlight: hasL && hasR, + highlight: + hasL && + hasR, }; - } + } else{ + row[metric] = { + l: hasL + ? { + actual: record["l"].actual, + target: record["l"].target, + } + : { + actual: 0, + target: 0, + }, + r: hasR + ? { + actual: record["r"].actual, + target: record["r"].target, + } + : { + actual: 0, + target: 0, + }, + highlight: + hasL && + hasR, + }; + } } if ("metadata" in row) { @@ -347,3 +355,101 @@ export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) { return returnedGeomean; } + +function processDateGroupedByModel(repoName:string, dataGroupedByModel: { [k: string]: any }){ + const {failure_rows, failure_mapping} = mapDevicesForFailureReport(repoName, dataGroupedByModel); + + failure_mapping.forEach((key: string) => { + const obj = dataGroupedByModel[key] + obj["FAILURE_REPORT"] = { + l: { + } + } + }) + + + +} + + function mapDevicesForFailureReport(repo:string, maps: { [k: string]: any }){ + let prefixSet: Set<string> = new Set() + let failureIndicatorRows: { [k: string]: any } = {} + let failureMapping = new Set<string>() + + + if(!(repo in GIT_JOB_FAILURE_MAPPING_CONFIG)){ + return { + failure_rows:failureIndicatorRows, + failure_mapping:failureMapping + } + } + + const device_pools = GIT_JOB_FAILURE_MAPPING_CONFIG[repo]["device_pools"] + const device_names = device_pools.map((d:any) => d.name) + + + + Object.keys(maps).forEach((key: string) => { + const [model, backend, mode, dtype, device, arch, extra] = key.split(";"); + const extraInfo = JSON.parse(extra); + + const metrics = maps[key] + if ("FAILURE_REPORT" in metrics && extraInfo["failure_type"]=='GIT_JOB' && device in device_names){ + if (!(key in failureIndicatorRows)) { + const record = metrics["FAILURE_REPORT"] + const hasLFailure = "l" in record; + const hasRFailure= "r" in record; + failureIndicatorRows[key] = { + "l": hasLFailure, + "r": hasRFailure + } + } + const prefix = device_pools.find((item:any) => item.name === device).prefix + const res_key = `${model};${backend};${mode};${dtype};${prefix};`; + if (!(device in prefixSet)) { + prefixSet.add(res_key) + } + } + }) + + Object.keys(maps).forEach((key: string) => { + for (const prefix of prefixSet) { + if (key.startsWith(prefix)) { + failureMapping.add(key) + } + } +}) + +return { + failure_rows:failureIndicatorRows, + failure_mapping:failureMapping +} + +} + +const GIT_JOB_FAILURE_MAPPING_CONFIG:{ [k: string]: any } = { + "pytorch/excutorch":{ + "device_pools":[{ + name:"apple_iphone_15", + prefix: "Apple iPhone 15" + }, + { + name:"samsung_galaxy_s22", + prefix:"Samsung Galaxy S22" + }, + { + name: "samsung_galaxy_s24", + prefix: "Samsung Galaxy S24" + }, + { + name: "google_pixel_8_pro", + prefix: "Google Pixel 8" + + }], + } +} + +function removeFieldsByKey<T extends object, K extends keyof T>(obj: T, keysToRemove: K[]): Omit<T, K> { + const filteredEntries = Object.entries(obj).filter(([key]) => !keysToRemove.includes(key as K)); + return Object.fromEntries(filteredEntries) as Omit<T, K>; +} From be1e6674d2b3f8b420de9012a3feb9dce901f87e Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Mon, 7 Apr 2025 20:30:23 -0700 Subject: [PATCH 04/14] addDeviceLevelFailure --- torchci/lib/benchmark/llms/utils/llmUtils.ts | 205 ++++++++++--------- 1 file changed, 104 insertions(+), 101 deletions(-) diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts index ddbb3fceef..fe8a5c3407 100644 --- a/torchci/lib/benchmark/llms/utils/llmUtils.ts +++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts @@ -17,9 +17,6 @@ import { } from "../common"; import { LLMsBenchmarkProps } from "../types/dashboardProps"; import { TORCHAO_BASELINE } from "./aoUtils"; -import { startsWith } from "lodash"; -import JobArtifact from "components/JobArtifact"; -import { map } from "d3"; export function useBenchmark( queryParams: { [key: string]: any }, @@ -166,7 +163,7 @@ export function combineLeftAndRight( if (!("metadata" in row)) { row["metadata"] = { model: model, - origins: hasR? record["r"].origins : [], + origins: hasR ? record["r"].origins : [], backend: backend, mode: mode, dtype: dtype, @@ -244,7 +241,7 @@ export function combineLeftAndRight( const extraInfo = JSON.parse(extra); row["is_dynamic"] = extraInfo["is_dynamic"]; } - if (metric == "FAILURE_REPORT"){ + if (metric == "FAILURE_REPORT") { row[metric] = { l: hasL ? { @@ -261,38 +258,34 @@ export function combineLeftAndRight( target: record["r"].target, } : { - actual: -1,// indicate the failure on right side + actual: -1, // indicate the failure on right side target: 0, }, - highlight: - hasL && - hasR, + highlight: hasL && hasR, }; - } else{ - row[metric] = { - l: hasL - ? { - actual: record["l"].actual, - target: record["l"].target, - } - : { - actual: 0, - target: 0, - }, - r: hasR - ? { - actual: record["r"].actual, - target: record["r"].target, - } - : { - actual: 0, - target: 0, - }, - highlight: - hasL && - hasR, - }; - } + } else { + row[metric] = { + l: hasL + ? { + actual: record["l"].actual, + target: record["l"].target, + } + : { + actual: 0, + target: 0, + }, + r: hasR + ? { + actual: record["r"].actual, + target: record["r"].target, + } + : { + actual: 0, + target: 0, + }, + highlight: hasL && hasR, + }; + } } if ("metadata" in row) { @@ -356,100 +349,110 @@ export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) { return returnedGeomean; } -function processDateGroupedByModel(repoName:string, dataGroupedByModel: { [k: string]: any }){ - const {failure_rows, failure_mapping} = mapDevicesForFailureReport(repoName, dataGroupedByModel); +function processDateGroupedByModel( + repoName: string, + dataGroupedByModel: { [k: string]: any } +) { + const { failure_rows, failure_mapping } = mapDevicesForFailureReport( + repoName, + dataGroupedByModel + ); failure_mapping.forEach((key: string) => { - const obj = dataGroupedByModel[key] + const obj = dataGroupedByModel[key]; obj["FAILURE_REPORT"] = { - l: { - } - } - }) - - - + l: {}, + }; + }); } - function mapDevicesForFailureReport(repo:string, maps: { [k: string]: any }){ - let prefixSet: Set<string> = new Set() - let failureIndicatorRows: { [k: string]: any } = {} - let failureMapping = new Set<string>() +function mapDevicesForFailureReport(repo: string, maps: { [k: string]: any }) { + let prefixSet: Set<string> = new Set(); + let failureIndicatorRows: { [k: string]: any } = {}; + let failureMapping = new Set<string>(); - - if(!(repo in GIT_JOB_FAILURE_MAPPING_CONFIG)){ + if (!(repo in GIT_JOB_FAILURE_MAPPING_CONFIG)) { return { - failure_rows:failureIndicatorRows, - failure_mapping:failureMapping - } + failure_rows: failureIndicatorRows, + failure_mapping: failureMapping, + }; } - const device_pools = GIT_JOB_FAILURE_MAPPING_CONFIG[repo]["device_pools"] - const device_names = device_pools.map((d:any) => d.name) - - + const device_pools = GIT_JOB_FAILURE_MAPPING_CONFIG[repo]["device_pools"]; + const device_names = device_pools.map((d: any) => d.name); Object.keys(maps).forEach((key: string) => { const [model, backend, mode, dtype, device, arch, extra] = key.split(";"); const extraInfo = JSON.parse(extra); - const metrics = maps[key] - if ("FAILURE_REPORT" in metrics && extraInfo["failure_type"]=='GIT_JOB' && device in device_names){ + const metrics = maps[key]; + if ( + "FAILURE_REPORT" in metrics && + extraInfo["failure_type"] == "GIT_JOB" && + device in device_names + ) { if (!(key in failureIndicatorRows)) { - const record = metrics["FAILURE_REPORT"] + const record = metrics["FAILURE_REPORT"]; const hasLFailure = "l" in record; - const hasRFailure= "r" in record; + const hasRFailure = "r" in record; failureIndicatorRows[key] = { - "l": hasLFailure, - "r": hasRFailure - } + l: hasLFailure, + r: hasRFailure, + }; } - const prefix = device_pools.find((item:any) => item.name === device).prefix + const prefix = device_pools.find( + (item: any) => item.name === device + ).prefix; const res_key = `${model};${backend};${mode};${dtype};${prefix};`; if (!(device in prefixSet)) { - prefixSet.add(res_key) + prefixSet.add(res_key); } } - }) + }); - Object.keys(maps).forEach((key: string) => { - for (const prefix of prefixSet) { - if (key.startsWith(prefix)) { - failureMapping.add(key) + Object.keys(maps).forEach((key: string) => { + for (const prefix of prefixSet) { + if (key.startsWith(prefix)) { + failureMapping.add(key); + } } - } -}) - -return { - failure_rows:failureIndicatorRows, - failure_mapping:failureMapping -} + }); + return { + failure_rows: failureIndicatorRows, + failure_mapping: failureMapping, + }; } -const GIT_JOB_FAILURE_MAPPING_CONFIG:{ [k: string]: any } = { - "pytorch/excutorch":{ - "device_pools":[{ - name:"apple_iphone_15", - prefix: "Apple iPhone 15" - }, - { - name:"samsung_galaxy_s22", - prefix:"Samsung Galaxy S22" - }, - { - name: "samsung_galaxy_s24", - prefix: "Samsung Galaxy S24" - }, - { - name: "google_pixel_8_pro", - prefix: "Google Pixel 8" - - }], - } -} +const GIT_JOB_FAILURE_MAPPING_CONFIG: { [k: string]: any } = { + "pytorch/excutorch": { + device_pools: [ + { + name: "apple_iphone_15", + prefix: "Apple iPhone 15", + }, + { + name: "samsung_galaxy_s22", + prefix: "Samsung Galaxy S22", + }, + { + name: "samsung_galaxy_s24", + prefix: "Samsung Galaxy S24", + }, + { + name: "google_pixel_8_pro", + prefix: "Google Pixel 8", + }, + ], + }, +}; -function removeFieldsByKey<T extends object, K extends keyof T>(obj: T, keysToRemove: K[]): Omit<T, K> { - const filteredEntries = Object.entries(obj).filter(([key]) => !keysToRemove.includes(key as K)); +function removeFieldsByKey<T extends object, K extends keyof T>( + obj: T, + keysToRemove: K[] +): Omit<T, K> { + const filteredEntries = Object.entries(obj).filter( + ([key]) => !keysToRemove.includes(key as K) + ); return Object.fromEntries(filteredEntries) as Omit<T, K>; } From be785f812751c93b54fb054a8bbe46b1c87ce082 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Mon, 7 Apr 2025 21:19:59 -0700 Subject: [PATCH 05/14] addDeviceLevelFailure --- torchci/lib/benchmark/llms/utils/llmUtils.ts | 108 ------------------- 1 file changed, 108 deletions(-) diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts index fe8a5c3407..fe4470bba3 100644 --- a/torchci/lib/benchmark/llms/utils/llmUtils.ts +++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts @@ -348,111 +348,3 @@ export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) { return returnedGeomean; } - -function processDateGroupedByModel( - repoName: string, - dataGroupedByModel: { [k: string]: any } -) { - const { failure_rows, failure_mapping } = mapDevicesForFailureReport( - repoName, - dataGroupedByModel - ); - - failure_mapping.forEach((key: string) => { - const obj = dataGroupedByModel[key]; - obj["FAILURE_REPORT"] = { - l: {}, - }; - }); -} - -function mapDevicesForFailureReport(repo: string, maps: { [k: string]: any }) { - let prefixSet: Set<string> = new Set(); - let failureIndicatorRows: { [k: string]: any } = {}; - let failureMapping = new Set<string>(); - - if (!(repo in GIT_JOB_FAILURE_MAPPING_CONFIG)) { - return { - failure_rows: failureIndicatorRows, - failure_mapping: failureMapping, - }; - } - - const device_pools = GIT_JOB_FAILURE_MAPPING_CONFIG[repo]["device_pools"]; - const device_names = device_pools.map((d: any) => d.name); - - Object.keys(maps).forEach((key: string) => { - const [model, backend, mode, dtype, device, arch, extra] = key.split(";"); - const extraInfo = JSON.parse(extra); - - const metrics = maps[key]; - if ( - "FAILURE_REPORT" in metrics && - extraInfo["failure_type"] == "GIT_JOB" && - device in device_names - ) { - if (!(key in failureIndicatorRows)) { - const record = metrics["FAILURE_REPORT"]; - const hasLFailure = "l" in record; - const hasRFailure = "r" in record; - failureIndicatorRows[key] = { - l: hasLFailure, - r: hasRFailure, - }; - } - const prefix = device_pools.find( - (item: any) => item.name === device - ).prefix; - const res_key = `${model};${backend};${mode};${dtype};${prefix};`; - if (!(device in prefixSet)) { - prefixSet.add(res_key); - } - } - }); - - Object.keys(maps).forEach((key: string) => { - for (const prefix of prefixSet) { - if (key.startsWith(prefix)) { - failureMapping.add(key); - } - } - }); - - return { - failure_rows: failureIndicatorRows, - failure_mapping: failureMapping, - }; -} - -const GIT_JOB_FAILURE_MAPPING_CONFIG: { [k: string]: any } = { - "pytorch/excutorch": { - device_pools: [ - { - name: "apple_iphone_15", - prefix: "Apple iPhone 15", - }, - { - name: "samsung_galaxy_s22", - prefix: "Samsung Galaxy S22", - }, - { - name: "samsung_galaxy_s24", - prefix: "Samsung Galaxy S24", - }, - { - name: "google_pixel_8_pro", - prefix: "Google Pixel 8", - }, - ], - }, -}; - -function removeFieldsByKey<T extends object, K extends keyof T>( - obj: T, - keysToRemove: K[] -): Omit<T, K> { - const filteredEntries = Object.entries(obj).filter( - ([key]) => !keysToRemove.includes(key as K) - ); - return Object.fromEntries(filteredEntries) as Omit<T, K>; -} From 067deec1c97c62df6ac839e1ca229481d54544d2 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Mon, 7 Apr 2025 21:25:44 -0700 Subject: [PATCH 06/14] addDeviceLevelFailure --- aws/lambda/job_queue_times_snapshot.json | 0 .../benchmark/llms/components/LLMsSummaryPanel.tsx | 4 ++-- torchci/lib/benchmark/llms/utils/llmUtils.ts | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) delete mode 100644 aws/lambda/job_queue_times_snapshot.json diff --git a/aws/lambda/job_queue_times_snapshot.json b/aws/lambda/job_queue_times_snapshot.json deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index 555545d811..5371a55047 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -385,9 +385,9 @@ export default function LLMsSummaryPanel({ // A Failure is detected for a model and backend if (params.row.FAILURE_REPORT) { const isLFailure = - params.row.FAILURE_REPORT?.l.actual == -1 ? false : true; + params.row.FAILURE_REPORT?.l.actual == 1 ? true : false; const isRFailure = - params.row.FAILURE_REPORT?.r.actual == -1 ? false : true; + params.row.FAILURE_REPORT?.r.actual == 1 ? true : false; return handleModelBackendFailure( params.field, unit, diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts index fe4470bba3..e44feb4269 100644 --- a/torchci/lib/benchmark/llms/utils/llmUtils.ts +++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts @@ -245,20 +245,20 @@ export function combineLeftAndRight( row[metric] = { l: hasL ? { - actual: record["l"].actual, - target: record["l"].target, + actual: 1, + target: 0, } : { - actual: -1, // indicate the failure on left side + actual: 0, // indicate the failure on left side target: 0, }, r: hasR ? { - actual: record["r"].actual, - target: record["r"].target, + actual: 1, + target: 0, } : { - actual: -1, // indicate the failure on right side + actual: 0, // indicate the failure on right side target: 0, }, highlight: hasL && hasR, From 12e0fe8f7787e72d209cb68ce23abe9f0b06cc68 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Mon, 7 Apr 2025 21:27:39 -0700 Subject: [PATCH 07/14] addDeviceLevelFailure --- torchci/lib/benchmark/llms/utils/llmUtils.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts index e44feb4269..d0f852c4cc 100644 --- a/torchci/lib/benchmark/llms/utils/llmUtils.ts +++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts @@ -245,20 +245,20 @@ export function combineLeftAndRight( row[metric] = { l: hasL ? { - actual: 1, + actual: 1, // indicate the failure on left side target: 0, } : { - actual: 0, // indicate the failure on left side + actual: 0, target: 0, }, r: hasR ? { - actual: 1, + actual: 1, // indicate the failure on right side target: 0, } : { - actual: 0, // indicate the failure on right side + actual: 0, target: 0, }, highlight: hasL && hasR, From f3ab8251148c3aa154b74d62428ebf43b81804e0 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Tue, 8 Apr 2025 09:40:33 -0700 Subject: [PATCH 08/14] Update torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx Co-authored-by: Huy Do <huydhn@gmail.com> --- .../components/benchmark/llms/components/LLMsSummaryPanel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index 5371a55047..3f543328d7 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -226,7 +226,7 @@ export default function LLMsSummaryPanel({ } else if (isLFailure) { return `Failure ->${ractual}${unit} ${rPercent} ${showTarget}`; } else if (isRFailure) { - return `${lactual}${unit} ${lPercent} -> Failure`; + return `${lactual}${unit} ${lPercent} → Failure`; } }; From 6d6b4157fa90a84db63e040285f17932be567acc Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Tue, 8 Apr 2025 09:40:42 -0700 Subject: [PATCH 09/14] Update torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx Co-authored-by: Huy Do <huydhn@gmail.com> --- .../components/benchmark/llms/components/LLMsSummaryPanel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index 3f543328d7..6f3977099b 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -224,7 +224,7 @@ export default function LLMsSummaryPanel({ } return `Failure -> Fialure`; } else if (isLFailure) { - return `Failure ->${ractual}${unit} ${rPercent} ${showTarget}`; + return `Failure → ${ractual}${unit} ${rPercent} ${showTarget}`; } else if (isRFailure) { return `${lactual}${unit} ${lPercent} → Failure`; } From 99c3f831bb1feb60cfe1880ca4b33813387a9a3e Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Tue, 8 Apr 2025 12:17:51 -0700 Subject: [PATCH 10/14] addDeviceLevelFailure --- .../llms/components/LLMsSummaryPanel.tsx | 160 +++++++++++++----- 1 file changed, 117 insertions(+), 43 deletions(-) diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index 6f3977099b..f15bd69f25 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -1,4 +1,4 @@ -import { Grid2 } from "@mui/material"; +import { Grid2, styled, Tooltip } from "@mui/material"; import { GridCellParams, GridRenderCellParams } from "@mui/x-data-grid"; import styles from "components/metrics.module.css"; import { TablePanelWithData } from "components/metrics/panels/TablePanel"; @@ -12,6 +12,23 @@ import { UNIT_FOR_METRIC, } from "lib/benchmark/llms/common"; import { combineLeftAndRight } from "lib/benchmark/llms/utils/llmUtils"; +import { RiAlarmWarningFill } from "react-icons/ri"; +import { VscError } from "react-icons/vsc"; + +const FlexDiv = styled("div")({ + display: "flex", + flexDirection: "row", + justifyContent: "flex-start", + alignItems: "center", +}); + +const FlexDivCenter = styled("div")({ + display: "flex", + flexDirection: "row", + justifyContent: "center", + alignItems: "center", + margin: "3px", +}); const getDeviceArch = ( device: string | undefined, @@ -188,48 +205,6 @@ export default function LLMsSummaryPanel({ }); } - // handle failure report for a row. - const handleModelBackendFailure = ( - field: string, - unit: string, - showTarget: string, - isLFailure: boolean, - isRFailure: boolean, - lactual: number, - ractual: number, - lPercent: string, - rPercent: string - ) => { - // Indicate the failure details in Failure Report column - if (field === "FAILURE_REPORT") { - if (lCommit === rCommit) { - return `Detected Failure on commit`; - } - - if (isLFailure && isRFailure) { - return `Detected Failure on both base commit and new commit`; - } - if (isLFailure) { - return `Detected Failure on base commit`; - } - if (isRFailure) { - return `Detected Failure on new commit`; - } - } - - // render the row's value in other metric columns - if (isLFailure && isRFailure) { - if (lCommit === rCommit) { - return `Failure`; - } - return `Failure -> Fialure`; - } else if (isLFailure) { - return `Failure → ${ractual}${unit} ${rPercent} ${showTarget}`; - } else if (isRFailure) { - return `${lactual}${unit} ${lPercent} → Failure`; - } - }; - const hasBackend = data.length > 0 && "backend" in data[0] ? true : false; if (hasBackend && benchmarkName !== "TorchCache Benchmark") { columns.push({ @@ -389,6 +364,8 @@ export default function LLMsSummaryPanel({ const isRFailure = params.row.FAILURE_REPORT?.r.actual == 1 ? true : false; return handleModelBackendFailure( + lCommit, + rCommit, params.field, unit, showTarget, @@ -431,3 +408,100 @@ export default function LLMsSummaryPanel({ </Grid2> ); } + +// handle failure report for a row. +const handleModelBackendFailure = ( + lCommit: string, + rCommit: string, + field: string, + unit: string, + showTarget: string, + isLFailure: boolean, + isRFailure: boolean, + lactual: number, + ractual: number, + lPercent: string, + rPercent: string +) => { + // Indicate the failure details in Failure Report column + if (field === "FAILURE_REPORT") { + if (lCommit === rCommit) { + return ( + <WarningElementWithTooltip message="Detected Failure on commit"></WarningElementWithTooltip> + ); + } + + if (isLFailure && isRFailure) { + return ( + <WarningElementWithTooltip message="Detected Failure on both base commit and new commit"></WarningElementWithTooltip> + ); + } + if (isLFailure) { + return ( + <WarningElementWithTooltip message="Detected Failure on base commit"></WarningElementWithTooltip> + ); + } + if (isRFailure) { + return ( + <WarningElementWithTooltip message="Detected Failure on new commit"></WarningElementWithTooltip> + ); + } + } + + // render the row's value in other metric columns + if (isLFailure && isRFailure) { + if (lCommit === rCommit) { + return ( + <FailureElementWithTooltip message="device job failed on commit"></FailureElementWithTooltip> + ); + } + return ( + <div> + <FailureElementWithTooltip message="device job failed on both commit" /> + ; + </div> + ); + } else if (isLFailure) { + return ( + <FlexDiv> + <FailureElementWithTooltip message="device job failed on base commit"></FailureElementWithTooltip> + <span> → </span> + <span> + {ractual} + {unit} + {rPercent} {showTarget} + </span> + </FlexDiv> + ); + } else if (isRFailure) { + return ( + <FlexDiv> + <span> + {lactual} + {unit} + {lPercent} + </span> + <span> → </span> + <FailureElementWithTooltip message="device job failed on new commit"></FailureElementWithTooltip> + </FlexDiv> + ); + } +}; + +const FailureElementWithTooltip = ({ message = "" }) => ( + <Tooltip title={message}> + <div style={{ display: "flex", alignItems: "center", color: "red" }}> + <VscError /> + </div> + </Tooltip> +); + +const WarningElementWithTooltip = ({ message = "" }) => ( + <FlexDivCenter> + <Tooltip title={message}> + <div style={{ display: "flex", alignItems: "center", color: "red" }}> + <RiAlarmWarningFill size={20} /> + </div> + </Tooltip> + </FlexDivCenter> +); From dac485a7d97f36d788e6d9e2d10bd72a23c0baf7 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Tue, 8 Apr 2025 12:42:08 -0700 Subject: [PATCH 11/14] addDeviceLevelFailure --- .../llms/components/LLMsSummaryPanel.tsx | 95 +++++++++++-------- 1 file changed, 56 insertions(+), 39 deletions(-) diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index f15bd69f25..c30113c77e 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -12,7 +12,7 @@ import { UNIT_FOR_METRIC, } from "lib/benchmark/llms/common"; import { combineLeftAndRight } from "lib/benchmark/llms/utils/llmUtils"; -import { RiAlarmWarningFill } from "react-icons/ri"; +import { MdError } from "react-icons/md"; import { VscError } from "react-icons/vsc"; const FlexDiv = styled("div")({ @@ -139,9 +139,18 @@ export default function LLMsSummaryPanel({ ? `${model} (${metadata.origins.join(",")})` : model; return ( - <a href={url}> - <b>{displayName}</b> - </a> + <FlexDiv> + {params.row.FAILURE_REPORT && ( + <RenderWarningOnNameForFailure + lCommit={lCommit} + rCommit={rCommit} + row={params.row} + ></RenderWarningOnNameForFailure> + )} + <a href={url}> + <b>{displayName}</b> + </a> + </FlexDiv> ); }, }, @@ -261,6 +270,9 @@ export default function LLMsSummaryPanel({ (metric !== "speedup" && metric !== "Speedup") ); }) + .filter((metric: string) => { + return metric !== "FAILURE_REPORT"; + }) .map((metric: string) => { return { field: metric, @@ -359,18 +371,12 @@ export default function LLMsSummaryPanel({ // A Failure is detected for a model and backend if (params.row.FAILURE_REPORT) { - const isLFailure = - params.row.FAILURE_REPORT?.l.actual == 1 ? true : false; - const isRFailure = - params.row.FAILURE_REPORT?.r.actual == 1 ? true : false; return handleModelBackendFailure( + params.row, lCommit, rCommit, - params.field, unit, showTarget, - isLFailure, - isRFailure, l, r, lPercent, @@ -411,42 +417,18 @@ export default function LLMsSummaryPanel({ // handle failure report for a row. const handleModelBackendFailure = ( + row: any, lCommit: string, rCommit: string, - field: string, unit: string, showTarget: string, - isLFailure: boolean, - isRFailure: boolean, lactual: number, ractual: number, lPercent: string, rPercent: string ) => { - // Indicate the failure details in Failure Report column - if (field === "FAILURE_REPORT") { - if (lCommit === rCommit) { - return ( - <WarningElementWithTooltip message="Detected Failure on commit"></WarningElementWithTooltip> - ); - } - - if (isLFailure && isRFailure) { - return ( - <WarningElementWithTooltip message="Detected Failure on both base commit and new commit"></WarningElementWithTooltip> - ); - } - if (isLFailure) { - return ( - <WarningElementWithTooltip message="Detected Failure on base commit"></WarningElementWithTooltip> - ); - } - if (isRFailure) { - return ( - <WarningElementWithTooltip message="Detected Failure on new commit"></WarningElementWithTooltip> - ); - } - } + const isLFailure = row.FAILURE_REPORT?.l.actual == 1 ? true : false; + const isRFailure = row.FAILURE_REPORT?.r.actual == 1 ? true : false; // render the row's value in other metric columns if (isLFailure && isRFailure) { @@ -488,6 +470,41 @@ const handleModelBackendFailure = ( } }; +const RenderWarningOnNameForFailure = ({ + lCommit, + rCommit, + row, +}: { + lCommit: string; + rCommit: string; + row: any; +}) => { + const isLFailure = row.FAILURE_REPORT?.l.actual == 1 ? true : false; + const isRFailure = row.FAILURE_REPORT?.r.actual == 1 ? true : false; + // Indicate the failure details in Failure Report column + if (lCommit === rCommit) { + return ( + <WarningElementWithTooltip message="Detected Failure on commit"></WarningElementWithTooltip> + ); + } + if (isLFailure && isRFailure) { + return ( + <WarningElementWithTooltip message="Detected Failure on both base commit and new commit"></WarningElementWithTooltip> + ); + } + if (isLFailure) { + return ( + <WarningElementWithTooltip message="Detected Failure on base commit"></WarningElementWithTooltip> + ); + } + if (isRFailure) { + return ( + <WarningElementWithTooltip message="Detected Failure on new commit"></WarningElementWithTooltip> + ); + } + return <></>; +}; + const FailureElementWithTooltip = ({ message = "" }) => ( <Tooltip title={message}> <div style={{ display: "flex", alignItems: "center", color: "red" }}> @@ -500,7 +517,7 @@ const WarningElementWithTooltip = ({ message = "" }) => ( <FlexDivCenter> <Tooltip title={message}> <div style={{ display: "flex", alignItems: "center", color: "red" }}> - <RiAlarmWarningFill size={20} /> + <MdError size={20} /> </div> </Tooltip> </FlexDivCenter> From 6c9a22503425d487585188e139d1f49e8ba00c61 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Tue, 8 Apr 2025 13:20:19 -0700 Subject: [PATCH 12/14] addDeviceLevelFailure --- .../benchmark/llms/components/LLMsSummaryPanel.tsx | 10 +++++----- torchci/lib/benchmark/llms/utils/llmUtils.ts | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index c30113c77e..5ed38d031d 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -271,7 +271,7 @@ export default function LLMsSummaryPanel({ ); }) .filter((metric: string) => { - return metric !== "FAILURE_REPORT"; + return metric !== "FAILURE_REPO"; }) .map((metric: string) => { return { @@ -427,8 +427,8 @@ const handleModelBackendFailure = ( lPercent: string, rPercent: string ) => { - const isLFailure = row.FAILURE_REPORT?.l.actual == 1 ? true : false; - const isRFailure = row.FAILURE_REPORT?.r.actual == 1 ? true : false; + const isLFailure = row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false; + const isRFailure = row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false; // render the row's value in other metric columns if (isLFailure && isRFailure) { @@ -479,8 +479,8 @@ const RenderWarningOnNameForFailure = ({ rCommit: string; row: any; }) => { - const isLFailure = row.FAILURE_REPORT?.l.actual == 1 ? true : false; - const isRFailure = row.FAILURE_REPORT?.r.actual == 1 ? true : false; + const isLFailure = row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false; + const isRFailure = row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false; // Indicate the failure details in Failure Report column if (lCommit === rCommit) { return ( diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts index d0f852c4cc..2c12f85bb2 100644 --- a/torchci/lib/benchmark/llms/utils/llmUtils.ts +++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts @@ -245,7 +245,7 @@ export function combineLeftAndRight( row[metric] = { l: hasL ? { - actual: 1, // indicate the failure on left side + actual: Number.MAX_SAFE_INTEGER, // indicate the failure on left side target: 0, } : { @@ -254,7 +254,7 @@ export function combineLeftAndRight( }, r: hasR ? { - actual: 1, // indicate the failure on right side + actual: Number.MAX_SAFE_INTEGER, // indicate the failure on right side target: 0, } : { From c572d870a1ff89bca636fe7c44189eac4e14fcf4 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Tue, 8 Apr 2025 13:24:07 -0700 Subject: [PATCH 13/14] addDeviceLevelFailure --- .../llms/components/LLMsSummaryPanel.tsx | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index 5ed38d031d..dad86ba48b 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -271,7 +271,7 @@ export default function LLMsSummaryPanel({ ); }) .filter((metric: string) => { - return metric !== "FAILURE_REPO"; + return metric !== "FAILURE_REPORT"; }) .map((metric: string) => { return { @@ -371,6 +371,7 @@ export default function LLMsSummaryPanel({ // A Failure is detected for a model and backend if (params.row.FAILURE_REPORT) { + console.log("yang here"); return handleModelBackendFailure( params.row, lCommit, @@ -427,8 +428,10 @@ const handleModelBackendFailure = ( lPercent: string, rPercent: string ) => { - const isLFailure = row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false; - const isRFailure = row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false; + const isLFailure = + row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false; + const isRFailure = + row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false; // render the row's value in other metric columns if (isLFailure && isRFailure) { @@ -479,8 +482,10 @@ const RenderWarningOnNameForFailure = ({ rCommit: string; row: any; }) => { - const isLFailure = row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false; - const isRFailure = row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false; + const isLFailure = + row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false; + const isRFailure = + row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false; // Indicate the failure details in Failure Report column if (lCommit === rCommit) { return ( From d8122ff0549f17917302b9addbd60ff26e39ab31 Mon Sep 17 00:00:00 2001 From: Yang Wang <elainewy@meta.com> Date: Tue, 8 Apr 2025 15:07:41 -0700 Subject: [PATCH 14/14] typo --- .../components/benchmark/llms/components/LLMsSummaryPanel.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index dad86ba48b..5c6773a348 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -371,7 +371,6 @@ export default function LLMsSummaryPanel({ // A Failure is detected for a model and backend if (params.row.FAILURE_REPORT) { - console.log("yang here"); return handleModelBackendFailure( params.row, lCommit,