From a3f4526a6684ac3aca4e7057541ab9a8160ee894 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Mon, 7 Apr 2025 16:42:51 -0700
Subject: [PATCH 01/14] format

---
 aws/lambda/job_queue_times_snapshot.json      |  0
 .../llms/components/LLMsSummaryPanel.tsx      | 58 +++++++++++++++++
 torchci/lib/benchmark/llms/utils/llmUtils.ts  | 63 +++++++++----------
 3 files changed, 86 insertions(+), 35 deletions(-)
 create mode 100644 aws/lambda/job_queue_times_snapshot.json

diff --git a/aws/lambda/job_queue_times_snapshot.json b/aws/lambda/job_queue_times_snapshot.json
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
index 1a132a6a45..74a1d44844 100644
--- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -1,3 +1,4 @@
+import { Co2Sharp } from "@mui/icons-material";
 import { Grid2 } from "@mui/material";
 import { GridCellParams, GridRenderCellParams } from "@mui/x-data-grid";
 import styles from "components/metrics.module.css";
@@ -62,6 +63,8 @@ export default function LLMsSummaryPanel({
     lPerfData,
     rPerfData
   );
+
+
   const columns: any[] = [
     {
       field: "metadata",
@@ -187,6 +190,45 @@ export default function LLMsSummaryPanel({
     });
   }
 
+  const handleModelBackendFailure = (
+    field: string,
+    unit: string,
+    showTarget: string,
+    isLFailure: boolean,
+    isRFailure: boolean,
+    lactual: number,
+    ractual:number,
+    lPercent:string,
+    rPercent:string) => {
+      if (field === "FAILURE_REPORT"){
+        if (lCommit === rCommit){
+          return `Detected Failure on commit`;
+        }
+
+        if (isLFailure && isRFailure){
+          return `Detected Failure on both base commit and new commit`;
+        }
+        if (isLFailure){
+          return `Detected Failure on base commit`;
+        }
+        if (isRFailure){
+          return `Detected Failure on new commit`;
+        }
+      }
+
+      if (isLFailure && isRFailure){
+        if (lCommit === rCommit){
+          return `Failure`;
+        }
+        return `Failure -> Fialure`;
+      } else if (isLFailure){
+        return `Failure ->${ractual}${unit} ${rPercent} ${showTarget}`;
+      } else if (isRFailure){
+        return `${lactual}${unit} ${lPercent} -> Failure`;
+      }
+    }
+
+
   const hasBackend = data.length > 0 && "backend" in data[0] ? true : false;
   if (hasBackend && benchmarkName !== "TorchCache Benchmark") {
     columns.push({
@@ -233,6 +275,7 @@ export default function LLMsSummaryPanel({
           return params.value;
         },
       },
+      // add all other metrics as columns
       ...metricNames
         .filter((metric: string) => {
           // TODO (huydhn): Just a temp fix, remove this after a few weeks
@@ -256,6 +299,10 @@ export default function LLMsSummaryPanel({
                 return "";
               }
 
+              if(metric === "FAILURE_REPORT"){
+                return styles.error;
+              }
+
               // l is the old (base) value, r is the new value
               const l = v.l.actual;
               const r = v.r.actual;
@@ -329,6 +376,17 @@ export default function LLMsSummaryPanel({
               const showTarget =
                 target && target != 0 ? `[target = ${target}]` : "";
 
+              if (params.field == "FAILURE_REPORT"){
+                console.log(params);
+              }
+
+              // A Failure is detected for a model and backend
+              if (params.row.FAILURE_REPORT){
+                const isLFailure = params.row.FAILURE_REPORT?.l.actual == -1? false : true;
+                const isRFailure = params.row.FAILURE_REPORT?.r.actual == -1? false : true;
+                return handleModelBackendFailure(params.field, unit, showTarget, isLFailure, isRFailure, l, r, lPercent, rPercent);
+              }
+
               if (lCommit === rCommit || !v.highlight) {
                 return `${r}${unit} ${rPercent} ${showTarget}`;
               } else {
diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts
index b8ebc04b47..65bed17c6a 100644
--- a/torchci/lib/benchmark/llms/utils/llmUtils.ts
+++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts
@@ -143,14 +143,6 @@ export function combineLeftAndRight(
       dataGroupedByModel[key][metric]["l"] = record;
     });
   }
-
-  // NB: This is a hack to keep track of valid devices. The problem is that the records
-  // in the benchmark database alone don't have the information to differentiate between
-  // benchmarks that are failed to run and benchmarks that are not run. Both show up as
-  // 0 on the dashboard. Note that we can do a join with workflow_job table to get this
-  // information, but it's a rather slow and expensive route
-  const validDevices = new Set<string>();
-  const validBackends = new Set<string>();
   // First round to get all the valid devices
   Object.keys(dataGroupedByModel).forEach((key: string) => {
     const [model, backend, mode, dtype, device, arch, extra] = key.split(";");
@@ -159,16 +151,6 @@ export function combineLeftAndRight(
       name: `${model} ${backend} (${mode} / ${dtype} / ${device} / ${arch})`,
     };
 
-    for (const metric in dataGroupedByModel[key]) {
-      const record = dataGroupedByModel[key][metric];
-      const hasL = "l" in record;
-      const hasR = "r" in record;
-
-      if (hasL && hasR) {
-        validDevices.add(device);
-        validBackends.add(`${model} ${backend}`);
-      }
-    }
   });
 
   // Transform the data into a displayable format
@@ -185,24 +167,10 @@ export function combineLeftAndRight(
       const hasL = "l" in record;
       const hasR = "r" in record;
 
-      // Skip devices and models that weren't run in this commit
-      if (
-        (validDevices.size !== 0 && !validDevices.has(device)) ||
-        (validBackends.size !== 0 && !validBackends.has(`${model} ${backend}`))
-      ) {
-        continue;
-      }
-
-      // No overlapping between left and right commits, just show what it's on the
-      // right commit instead of showing a blank page
-      if (!hasR) {
-        continue;
-      }
-
       if (!("metadata" in row)) {
         row["metadata"] = {
           model: model,
-          origins: record["r"].origins,
+          origins: hasR? record["r"].origins : [],
           backend: backend,
           mode: mode,
           dtype: dtype,
@@ -281,6 +249,31 @@ export function combineLeftAndRight(
         row["is_dynamic"] = extraInfo["is_dynamic"];
       }
 
+      if (metric == "FAILURE_REPORT"){
+        row[metric] = {
+          l: hasL
+            ? {
+                actual: record["l"].actual,
+                target: record["l"].target,
+              }
+            : {
+                actual: -1, // indicate the failure on left side
+                target: 0,
+              },
+          r: hasR
+            ? {
+                actual: record["r"].actual,
+                target: record["r"].target,
+              }
+            : {
+                actual: -1,// indicate the failure on right side
+                target: 0,
+              },
+          highlight:
+            hasL &&
+            hasR,
+        };
+     } else{
       row[metric] = {
         l: hasL
           ? {
@@ -301,13 +294,13 @@ export function combineLeftAndRight(
               target: 0,
             },
         highlight:
-          validDevices.size !== 0 &&
-          validBackends.has(`${model} ${backend}`) &&
           hasL &&
           hasR,
       };
     }
 
+    }
+
     if ("metadata" in row) {
       data.push(row);
     }

From e8095dfe9f09e3214c79ad3e92f250389512f883 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Mon, 7 Apr 2025 18:05:05 -0700
Subject: [PATCH 02/14] addDeviceLevelFailure

---
 .../llms/components/LLMsSummaryPanel.tsx      | 94 +++++++++++--------
 torchci/components/metrics.module.css         |  5 +
 torchci/lib/benchmark/llms/utils/llmUtils.ts  | 68 ++++++--------
 3 files changed, 86 insertions(+), 81 deletions(-)

diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
index 74a1d44844..555545d811 100644
--- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -1,4 +1,3 @@
-import { Co2Sharp } from "@mui/icons-material";
 import { Grid2 } from "@mui/material";
 import { GridCellParams, GridRenderCellParams } from "@mui/x-data-grid";
 import styles from "components/metrics.module.css";
@@ -64,7 +63,6 @@ export default function LLMsSummaryPanel({
     rPerfData
   );
 
-
   const columns: any[] = [
     {
       field: "metadata",
@@ -190,6 +188,7 @@ export default function LLMsSummaryPanel({
     });
   }
 
+  // handle failure report for a row.
   const handleModelBackendFailure = (
     field: string,
     unit: string,
@@ -197,37 +196,39 @@ export default function LLMsSummaryPanel({
     isLFailure: boolean,
     isRFailure: boolean,
     lactual: number,
-    ractual:number,
-    lPercent:string,
-    rPercent:string) => {
-      if (field === "FAILURE_REPORT"){
-        if (lCommit === rCommit){
-          return `Detected Failure on commit`;
-        }
-
-        if (isLFailure && isRFailure){
-          return `Detected Failure on both base commit and new commit`;
-        }
-        if (isLFailure){
-          return `Detected Failure on base commit`;
-        }
-        if (isRFailure){
-          return `Detected Failure on new commit`;
-        }
+    ractual: number,
+    lPercent: string,
+    rPercent: string
+  ) => {
+    // Indicate the failure details in Failure Report column
+    if (field === "FAILURE_REPORT") {
+      if (lCommit === rCommit) {
+        return `Detected Failure on commit`;
       }
 
-      if (isLFailure && isRFailure){
-        if (lCommit === rCommit){
-          return `Failure`;
-        }
-        return `Failure -> Fialure`;
-      } else if (isLFailure){
-        return `Failure ->${ractual}${unit} ${rPercent} ${showTarget}`;
-      } else if (isRFailure){
-        return `${lactual}${unit} ${lPercent} -> Failure`;
+      if (isLFailure && isRFailure) {
+        return `Detected Failure on both base commit and new commit`;
+      }
+      if (isLFailure) {
+        return `Detected Failure on base commit`;
+      }
+      if (isRFailure) {
+        return `Detected Failure on new commit`;
       }
     }
 
+    // render the row's value in other metric columns
+    if (isLFailure && isRFailure) {
+      if (lCommit === rCommit) {
+        return `Failure`;
+      }
+      return `Failure -> Fialure`;
+    } else if (isLFailure) {
+      return `Failure ->${ractual}${unit} ${rPercent} ${showTarget}`;
+    } else if (isRFailure) {
+      return `${lactual}${unit} ${lPercent} -> Failure`;
+    }
+  };
 
   const hasBackend = data.length > 0 && "backend" in data[0] ? true : false;
   if (hasBackend && benchmarkName !== "TorchCache Benchmark") {
@@ -295,12 +296,14 @@ export default function LLMsSummaryPanel({
             flex: 1,
             cellClassName: (params: GridCellParams<any, any>) => {
               const v = params.value;
-              if (v === undefined) {
-                return "";
+
+              // If the row data has failure, we render it in grey color
+              if (params.row.FAILURE_REPORT) {
+                return styles.failure;
               }
 
-              if(metric === "FAILURE_REPORT"){
-                return styles.error;
+              if (v === undefined) {
+                return "";
               }
 
               // l is the old (base) value, r is the new value
@@ -354,6 +357,9 @@ export default function LLMsSummaryPanel({
             renderCell: (params: GridRenderCellParams<any>) => {
               const v = params.value;
               if (v === undefined) {
+                if (params.row.FAILURE_REPORT) {
+                  return "N/A";
+                }
                 return "";
               }
 
@@ -376,15 +382,23 @@ export default function LLMsSummaryPanel({
               const showTarget =
                 target && target != 0 ? `[target = ${target}]` : "";
 
-              if (params.field == "FAILURE_REPORT"){
-                console.log(params);
-              }
-
               // A Failure is detected for a model and backend
-              if (params.row.FAILURE_REPORT){
-                const isLFailure = params.row.FAILURE_REPORT?.l.actual == -1? false : true;
-                const isRFailure = params.row.FAILURE_REPORT?.r.actual == -1? false : true;
-                return handleModelBackendFailure(params.field, unit, showTarget, isLFailure, isRFailure, l, r, lPercent, rPercent);
+              if (params.row.FAILURE_REPORT) {
+                const isLFailure =
+                  params.row.FAILURE_REPORT?.l.actual == -1 ? false : true;
+                const isRFailure =
+                  params.row.FAILURE_REPORT?.r.actual == -1 ? false : true;
+                return handleModelBackendFailure(
+                  params.field,
+                  unit,
+                  showTarget,
+                  isLFailure,
+                  isRFailure,
+                  l,
+                  r,
+                  lPercent,
+                  rPercent
+                );
               }
 
               if (lCommit === rCommit || !v.highlight) {
diff --git a/torchci/components/metrics.module.css b/torchci/components/metrics.module.css
index 1a7d9ff914..bf775b2fab 100644
--- a/torchci/components/metrics.module.css
+++ b/torchci/components/metrics.module.css
@@ -8,6 +8,11 @@
   color: var(--text-color);
 }
 
+.failure {
+  background-color: var(--workflow-box-none-bg, lightgray);
+  color: var(--text-color);
+}
+
 .error {
   background-color: var(--workflow-box-fail-bg, lightpink);
   color: var(--text-color);
diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts
index 65bed17c6a..3eeb5353c4 100644
--- a/torchci/lib/benchmark/llms/utils/llmUtils.ts
+++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts
@@ -143,15 +143,6 @@ export function combineLeftAndRight(
       dataGroupedByModel[key][metric]["l"] = record;
     });
   }
-  // First round to get all the valid devices
-  Object.keys(dataGroupedByModel).forEach((key: string) => {
-    const [model, backend, mode, dtype, device, arch, extra] = key.split(";");
-    const row: { [k: string]: any } = {
-      // Keep the name as as the row ID as DataGrid requires it
-      name: `${model} ${backend} (${mode} / ${dtype} / ${device} / ${arch})`,
-    };
-
-  });
 
   // Transform the data into a displayable format
   const data: { [k: string]: any }[] = [];
@@ -170,7 +161,7 @@ export function combineLeftAndRight(
       if (!("metadata" in row)) {
         row["metadata"] = {
           model: model,
-          origins: hasR? record["r"].origins : [],
+          origins: hasR ? record["r"].origins : [],
           backend: backend,
           mode: mode,
           dtype: dtype,
@@ -249,7 +240,7 @@ export function combineLeftAndRight(
         row["is_dynamic"] = extraInfo["is_dynamic"];
       }
 
-      if (metric == "FAILURE_REPORT"){
+      if (metric == "FAILURE_REPORT") {
         row[metric] = {
           l: hasL
             ? {
@@ -266,39 +257,34 @@ export function combineLeftAndRight(
                 target: record["r"].target,
               }
             : {
-                actual: -1,// indicate the failure on right side
+                actual: -1, // indicate the failure on right side
                 target: 0,
               },
-          highlight:
-            hasL &&
-            hasR,
+          highlight: hasL && hasR,
         };
-     } else{
-      row[metric] = {
-        l: hasL
-          ? {
-              actual: record["l"].actual,
-              target: record["l"].target,
-            }
-          : {
-              actual: 0,
-              target: 0,
-            },
-        r: hasR
-          ? {
-              actual: record["r"].actual,
-              target: record["r"].target,
-            }
-          : {
-              actual: 0,
-              target: 0,
-            },
-        highlight:
-          hasL &&
-          hasR,
-      };
-    }
-
+      } else {
+        row[metric] = {
+          l: hasL
+            ? {
+                actual: record["l"].actual,
+                target: record["l"].target,
+              }
+            : {
+                actual: 0,
+                target: 0,
+              },
+          r: hasR
+            ? {
+                actual: record["r"].actual,
+                target: record["r"].target,
+              }
+            : {
+                actual: 0,
+                target: 0,
+              },
+          highlight: hasL && hasR,
+        };
+      }
     }
 
     if ("metadata" in row) {

From 1e9af85abfc52be2ee5ae5efc85efcb598e8d0b8 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Mon, 7 Apr 2025 20:28:07 -0700
Subject: [PATCH 03/14] addDeviceLevelFailure

---
 torchci/lib/benchmark/llms/utils/llmUtils.ts | 162 +++++++++++++++----
 1 file changed, 134 insertions(+), 28 deletions(-)

diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts
index 3eeb5353c4..ddbb3fceef 100644
--- a/torchci/lib/benchmark/llms/utils/llmUtils.ts
+++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts
@@ -17,6 +17,9 @@ import {
 } from "../common";
 import { LLMsBenchmarkProps } from "../types/dashboardProps";
 import { TORCHAO_BASELINE } from "./aoUtils";
+import { startsWith } from "lodash";
+import JobArtifact from "components/JobArtifact";
+import { map } from "d3";
 
 export function useBenchmark(
   queryParams: { [key: string]: any },
@@ -95,6 +98,7 @@ export function combineLeftAndRight(
   const rData = rPerfData.data;
 
   const dataGroupedByModel: { [k: string]: any } = {};
+
   rData.forEach((record: LLMsBenchmarkData) => {
     const model = record.model;
     const backend = record.backend;
@@ -155,13 +159,14 @@ export function combineLeftAndRight(
 
     for (const metric in dataGroupedByModel[key]) {
       const record = dataGroupedByModel[key][metric];
+
       const hasL = "l" in record;
       const hasR = "r" in record;
 
       if (!("metadata" in row)) {
         row["metadata"] = {
           model: model,
-          origins: hasR ? record["r"].origins : [],
+          origins: hasR? record["r"].origins : [],
           backend: backend,
           mode: mode,
           dtype: dtype,
@@ -239,8 +244,7 @@ export function combineLeftAndRight(
         const extraInfo = JSON.parse(extra);
         row["is_dynamic"] = extraInfo["is_dynamic"];
       }
-
-      if (metric == "FAILURE_REPORT") {
+      if (metric == "FAILURE_REPORT"){
         row[metric] = {
           l: hasL
             ? {
@@ -257,34 +261,38 @@ export function combineLeftAndRight(
                 target: record["r"].target,
               }
             : {
-                actual: -1, // indicate the failure on right side
-                target: 0,
-              },
-          highlight: hasL && hasR,
-        };
-      } else {
-        row[metric] = {
-          l: hasL
-            ? {
-                actual: record["l"].actual,
-                target: record["l"].target,
-              }
-            : {
-                actual: 0,
-                target: 0,
-              },
-          r: hasR
-            ? {
-                actual: record["r"].actual,
-                target: record["r"].target,
-              }
-            : {
-                actual: 0,
+                actual: -1,// indicate the failure on right side
                 target: 0,
               },
-          highlight: hasL && hasR,
+          highlight:
+            hasL &&
+            hasR,
         };
-      }
+     } else{
+      row[metric] = {
+        l: hasL
+          ? {
+              actual: record["l"].actual,
+              target: record["l"].target,
+            }
+          : {
+              actual: 0,
+              target: 0,
+            },
+        r: hasR
+          ? {
+              actual: record["r"].actual,
+              target: record["r"].target,
+            }
+          : {
+              actual: 0,
+              target: 0,
+            },
+        highlight:
+          hasL &&
+          hasR,
+      };
+    }
     }
 
     if ("metadata" in row) {
@@ -347,3 +355,101 @@ export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) {
 
   return returnedGeomean;
 }
+
+function processDateGroupedByModel(repoName:string, dataGroupedByModel: { [k: string]: any }){
+  const {failure_rows, failure_mapping} = mapDevicesForFailureReport(repoName, dataGroupedByModel);
+
+  failure_mapping.forEach((key: string) => {
+    const obj =  dataGroupedByModel[key]
+    obj["FAILURE_REPORT"] = {
+      l: {
+      }
+    }
+  })
+
+
+
+}
+
+ function mapDevicesForFailureReport(repo:string, maps: { [k: string]: any }){
+  let prefixSet: Set<string> = new Set()
+  let failureIndicatorRows: { [k: string]: any } = {}
+  let failureMapping = new Set<string>()
+
+
+  if(!(repo in GIT_JOB_FAILURE_MAPPING_CONFIG)){
+    return {
+      failure_rows:failureIndicatorRows,
+      failure_mapping:failureMapping
+    }
+  }
+
+  const device_pools = GIT_JOB_FAILURE_MAPPING_CONFIG[repo]["device_pools"]
+  const device_names = device_pools.map((d:any) => d.name)
+
+
+
+  Object.keys(maps).forEach((key: string) => {
+    const [model, backend, mode, dtype, device, arch, extra] = key.split(";");
+    const extraInfo = JSON.parse(extra);
+
+    const metrics = maps[key]
+    if ("FAILURE_REPORT" in metrics && extraInfo["failure_type"]=='GIT_JOB' && device in device_names){
+      if (!(key in failureIndicatorRows)) {
+        const record =  metrics["FAILURE_REPORT"]
+        const hasLFailure = "l" in record;
+        const hasRFailure= "r" in record;
+        failureIndicatorRows[key] = {
+          "l": hasLFailure,
+          "r": hasRFailure
+        }
+      }
+      const prefix = device_pools.find((item:any) => item.name === device).prefix
+      const res_key = `${model};${backend};${mode};${dtype};${prefix};`;
+      if (!(device in prefixSet)) {
+        prefixSet.add(res_key)
+      }
+    }
+  })
+
+ Object.keys(maps).forEach((key: string) => {
+  for (const prefix of prefixSet) {
+    if (key.startsWith(prefix)) {
+      failureMapping.add(key)
+    }
+  }
+})
+
+return {
+  failure_rows:failureIndicatorRows,
+  failure_mapping:failureMapping
+}
+
+}
+
+const GIT_JOB_FAILURE_MAPPING_CONFIG:{ [k: string]: any } = {
+  "pytorch/excutorch":{
+    "device_pools":[{
+      name:"apple_iphone_15",
+      prefix: "Apple iPhone 15"
+    },
+    {
+      name:"samsung_galaxy_s22",
+      prefix:"Samsung Galaxy S22"
+    },
+    {
+       name: "samsung_galaxy_s24",
+       prefix: "Samsung Galaxy S24"
+    },
+    {
+      name: "google_pixel_8_pro",
+      prefix: "Google Pixel 8"
+
+    }],
+  }
+}
+
+function removeFieldsByKey<T extends object, K extends keyof T>(obj: T, keysToRemove: K[]): Omit<T, K> {
+  const filteredEntries = Object.entries(obj).filter(([key]) => !keysToRemove.includes(key as K));
+  return Object.fromEntries(filteredEntries) as Omit<T, K>;
+}

From be1e6674d2b3f8b420de9012a3feb9dce901f87e Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Mon, 7 Apr 2025 20:30:23 -0700
Subject: [PATCH 04/14] addDeviceLevelFailure

---
 torchci/lib/benchmark/llms/utils/llmUtils.ts | 205 ++++++++++---------
 1 file changed, 104 insertions(+), 101 deletions(-)

diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts
index ddbb3fceef..fe8a5c3407 100644
--- a/torchci/lib/benchmark/llms/utils/llmUtils.ts
+++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts
@@ -17,9 +17,6 @@ import {
 } from "../common";
 import { LLMsBenchmarkProps } from "../types/dashboardProps";
 import { TORCHAO_BASELINE } from "./aoUtils";
-import { startsWith } from "lodash";
-import JobArtifact from "components/JobArtifact";
-import { map } from "d3";
 
 export function useBenchmark(
   queryParams: { [key: string]: any },
@@ -166,7 +163,7 @@ export function combineLeftAndRight(
       if (!("metadata" in row)) {
         row["metadata"] = {
           model: model,
-          origins: hasR? record["r"].origins : [],
+          origins: hasR ? record["r"].origins : [],
           backend: backend,
           mode: mode,
           dtype: dtype,
@@ -244,7 +241,7 @@ export function combineLeftAndRight(
         const extraInfo = JSON.parse(extra);
         row["is_dynamic"] = extraInfo["is_dynamic"];
       }
-      if (metric == "FAILURE_REPORT"){
+      if (metric == "FAILURE_REPORT") {
         row[metric] = {
           l: hasL
             ? {
@@ -261,38 +258,34 @@ export function combineLeftAndRight(
                 target: record["r"].target,
               }
             : {
-                actual: -1,// indicate the failure on right side
+                actual: -1, // indicate the failure on right side
                 target: 0,
               },
-          highlight:
-            hasL &&
-            hasR,
+          highlight: hasL && hasR,
         };
-     } else{
-      row[metric] = {
-        l: hasL
-          ? {
-              actual: record["l"].actual,
-              target: record["l"].target,
-            }
-          : {
-              actual: 0,
-              target: 0,
-            },
-        r: hasR
-          ? {
-              actual: record["r"].actual,
-              target: record["r"].target,
-            }
-          : {
-              actual: 0,
-              target: 0,
-            },
-        highlight:
-          hasL &&
-          hasR,
-      };
-    }
+      } else {
+        row[metric] = {
+          l: hasL
+            ? {
+                actual: record["l"].actual,
+                target: record["l"].target,
+              }
+            : {
+                actual: 0,
+                target: 0,
+              },
+          r: hasR
+            ? {
+                actual: record["r"].actual,
+                target: record["r"].target,
+              }
+            : {
+                actual: 0,
+                target: 0,
+              },
+          highlight: hasL && hasR,
+        };
+      }
     }
 
     if ("metadata" in row) {
@@ -356,100 +349,110 @@ export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) {
   return returnedGeomean;
 }
 
-function processDateGroupedByModel(repoName:string, dataGroupedByModel: { [k: string]: any }){
-  const {failure_rows, failure_mapping} = mapDevicesForFailureReport(repoName, dataGroupedByModel);
+function processDateGroupedByModel(
+  repoName: string,
+  dataGroupedByModel: { [k: string]: any }
+) {
+  const { failure_rows, failure_mapping } = mapDevicesForFailureReport(
+    repoName,
+    dataGroupedByModel
+  );
 
   failure_mapping.forEach((key: string) => {
-    const obj =  dataGroupedByModel[key]
+    const obj = dataGroupedByModel[key];
     obj["FAILURE_REPORT"] = {
-      l: {
-      }
-    }
-  })
-
-
-
+      l: {},
+    };
+  });
 }
 
- function mapDevicesForFailureReport(repo:string, maps: { [k: string]: any }){
-  let prefixSet: Set<string> = new Set()
-  let failureIndicatorRows: { [k: string]: any } = {}
-  let failureMapping = new Set<string>()
+function mapDevicesForFailureReport(repo: string, maps: { [k: string]: any }) {
+  let prefixSet: Set<string> = new Set();
+  let failureIndicatorRows: { [k: string]: any } = {};
+  let failureMapping = new Set<string>();
 
-
-  if(!(repo in GIT_JOB_FAILURE_MAPPING_CONFIG)){
+  if (!(repo in GIT_JOB_FAILURE_MAPPING_CONFIG)) {
     return {
-      failure_rows:failureIndicatorRows,
-      failure_mapping:failureMapping
-    }
+      failure_rows: failureIndicatorRows,
+      failure_mapping: failureMapping,
+    };
   }
 
-  const device_pools = GIT_JOB_FAILURE_MAPPING_CONFIG[repo]["device_pools"]
-  const device_names = device_pools.map((d:any) => d.name)
-
-
+  const device_pools = GIT_JOB_FAILURE_MAPPING_CONFIG[repo]["device_pools"];
+  const device_names = device_pools.map((d: any) => d.name);
 
   Object.keys(maps).forEach((key: string) => {
     const [model, backend, mode, dtype, device, arch, extra] = key.split(";");
     const extraInfo = JSON.parse(extra);
 
-    const metrics = maps[key]
-    if ("FAILURE_REPORT" in metrics && extraInfo["failure_type"]=='GIT_JOB' && device in device_names){
+    const metrics = maps[key];
+    if (
+      "FAILURE_REPORT" in metrics &&
+      extraInfo["failure_type"] == "GIT_JOB" &&
+      device in device_names
+    ) {
       if (!(key in failureIndicatorRows)) {
-        const record =  metrics["FAILURE_REPORT"]
+        const record = metrics["FAILURE_REPORT"];
         const hasLFailure = "l" in record;
-        const hasRFailure= "r" in record;
+        const hasRFailure = "r" in record;
         failureIndicatorRows[key] = {
-          "l": hasLFailure,
-          "r": hasRFailure
-        }
+          l: hasLFailure,
+          r: hasRFailure,
+        };
       }
-      const prefix = device_pools.find((item:any) => item.name === device).prefix
+      const prefix = device_pools.find(
+        (item: any) => item.name === device
+      ).prefix;
       const res_key = `${model};${backend};${mode};${dtype};${prefix};`;
       if (!(device in prefixSet)) {
-        prefixSet.add(res_key)
+        prefixSet.add(res_key);
       }
     }
-  })
+  });
 
- Object.keys(maps).forEach((key: string) => {
-  for (const prefix of prefixSet) {
-    if (key.startsWith(prefix)) {
-      failureMapping.add(key)
+  Object.keys(maps).forEach((key: string) => {
+    for (const prefix of prefixSet) {
+      if (key.startsWith(prefix)) {
+        failureMapping.add(key);
+      }
     }
-  }
-})
-
-return {
-  failure_rows:failureIndicatorRows,
-  failure_mapping:failureMapping
-}
+  });
 
+  return {
+    failure_rows: failureIndicatorRows,
+    failure_mapping: failureMapping,
+  };
 }
 
-const GIT_JOB_FAILURE_MAPPING_CONFIG:{ [k: string]: any } = {
-  "pytorch/excutorch":{
-    "device_pools":[{
-      name:"apple_iphone_15",
-      prefix: "Apple iPhone 15"
-    },
-    {
-      name:"samsung_galaxy_s22",
-      prefix:"Samsung Galaxy S22"
-    },
-    {
-       name: "samsung_galaxy_s24",
-       prefix: "Samsung Galaxy S24"
-    },
-    {
-      name: "google_pixel_8_pro",
-      prefix: "Google Pixel 8"
-
-    }],
-  }
-}
+const GIT_JOB_FAILURE_MAPPING_CONFIG: { [k: string]: any } = {
+  "pytorch/excutorch": {
+    device_pools: [
+      {
+        name: "apple_iphone_15",
+        prefix: "Apple iPhone 15",
+      },
+      {
+        name: "samsung_galaxy_s22",
+        prefix: "Samsung Galaxy S22",
+      },
+      {
+        name: "samsung_galaxy_s24",
+        prefix: "Samsung Galaxy S24",
+      },
+      {
+        name: "google_pixel_8_pro",
+        prefix: "Google Pixel 8",
+      },
+    ],
+  },
+};
 
-function removeFieldsByKey<T extends object, K extends keyof T>(obj: T, keysToRemove: K[]): Omit<T, K> {
-  const filteredEntries = Object.entries(obj).filter(([key]) => !keysToRemove.includes(key as K));
+function removeFieldsByKey<T extends object, K extends keyof T>(
+  obj: T,
+  keysToRemove: K[]
+): Omit<T, K> {
+  const filteredEntries = Object.entries(obj).filter(
+    ([key]) => !keysToRemove.includes(key as K)
+  );
   return Object.fromEntries(filteredEntries) as Omit<T, K>;
 }

From be785f812751c93b54fb054a8bbe46b1c87ce082 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Mon, 7 Apr 2025 21:19:59 -0700
Subject: [PATCH 05/14] addDeviceLevelFailure

---
 torchci/lib/benchmark/llms/utils/llmUtils.ts | 108 -------------------
 1 file changed, 108 deletions(-)

diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts
index fe8a5c3407..fe4470bba3 100644
--- a/torchci/lib/benchmark/llms/utils/llmUtils.ts
+++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts
@@ -348,111 +348,3 @@ export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) {
 
   return returnedGeomean;
 }
-
-function processDateGroupedByModel(
-  repoName: string,
-  dataGroupedByModel: { [k: string]: any }
-) {
-  const { failure_rows, failure_mapping } = mapDevicesForFailureReport(
-    repoName,
-    dataGroupedByModel
-  );
-
-  failure_mapping.forEach((key: string) => {
-    const obj = dataGroupedByModel[key];
-    obj["FAILURE_REPORT"] = {
-      l: {},
-    };
-  });
-}
-
-function mapDevicesForFailureReport(repo: string, maps: { [k: string]: any }) {
-  let prefixSet: Set<string> = new Set();
-  let failureIndicatorRows: { [k: string]: any } = {};
-  let failureMapping = new Set<string>();
-
-  if (!(repo in GIT_JOB_FAILURE_MAPPING_CONFIG)) {
-    return {
-      failure_rows: failureIndicatorRows,
-      failure_mapping: failureMapping,
-    };
-  }
-
-  const device_pools = GIT_JOB_FAILURE_MAPPING_CONFIG[repo]["device_pools"];
-  const device_names = device_pools.map((d: any) => d.name);
-
-  Object.keys(maps).forEach((key: string) => {
-    const [model, backend, mode, dtype, device, arch, extra] = key.split(";");
-    const extraInfo = JSON.parse(extra);
-
-    const metrics = maps[key];
-    if (
-      "FAILURE_REPORT" in metrics &&
-      extraInfo["failure_type"] == "GIT_JOB" &&
-      device in device_names
-    ) {
-      if (!(key in failureIndicatorRows)) {
-        const record = metrics["FAILURE_REPORT"];
-        const hasLFailure = "l" in record;
-        const hasRFailure = "r" in record;
-        failureIndicatorRows[key] = {
-          l: hasLFailure,
-          r: hasRFailure,
-        };
-      }
-      const prefix = device_pools.find(
-        (item: any) => item.name === device
-      ).prefix;
-      const res_key = `${model};${backend};${mode};${dtype};${prefix};`;
-      if (!(device in prefixSet)) {
-        prefixSet.add(res_key);
-      }
-    }
-  });
-
-  Object.keys(maps).forEach((key: string) => {
-    for (const prefix of prefixSet) {
-      if (key.startsWith(prefix)) {
-        failureMapping.add(key);
-      }
-    }
-  });
-
-  return {
-    failure_rows: failureIndicatorRows,
-    failure_mapping: failureMapping,
-  };
-}
-
-const GIT_JOB_FAILURE_MAPPING_CONFIG: { [k: string]: any } = {
-  "pytorch/excutorch": {
-    device_pools: [
-      {
-        name: "apple_iphone_15",
-        prefix: "Apple iPhone 15",
-      },
-      {
-        name: "samsung_galaxy_s22",
-        prefix: "Samsung Galaxy S22",
-      },
-      {
-        name: "samsung_galaxy_s24",
-        prefix: "Samsung Galaxy S24",
-      },
-      {
-        name: "google_pixel_8_pro",
-        prefix: "Google Pixel 8",
-      },
-    ],
-  },
-};
-
-function removeFieldsByKey<T extends object, K extends keyof T>(
-  obj: T,
-  keysToRemove: K[]
-): Omit<T, K> {
-  const filteredEntries = Object.entries(obj).filter(
-    ([key]) => !keysToRemove.includes(key as K)
-  );
-  return Object.fromEntries(filteredEntries) as Omit<T, K>;
-}

From 067deec1c97c62df6ac839e1ca229481d54544d2 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Mon, 7 Apr 2025 21:25:44 -0700
Subject: [PATCH 06/14] addDeviceLevelFailure

---
 aws/lambda/job_queue_times_snapshot.json             |  0
 .../benchmark/llms/components/LLMsSummaryPanel.tsx   |  4 ++--
 torchci/lib/benchmark/llms/utils/llmUtils.ts         | 12 ++++++------
 3 files changed, 8 insertions(+), 8 deletions(-)
 delete mode 100644 aws/lambda/job_queue_times_snapshot.json

diff --git a/aws/lambda/job_queue_times_snapshot.json b/aws/lambda/job_queue_times_snapshot.json
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
index 555545d811..5371a55047 100644
--- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -385,9 +385,9 @@ export default function LLMsSummaryPanel({
               // A Failure is detected for a model and backend
               if (params.row.FAILURE_REPORT) {
                 const isLFailure =
-                  params.row.FAILURE_REPORT?.l.actual == -1 ? false : true;
+                  params.row.FAILURE_REPORT?.l.actual == 1 ? true : false;
                 const isRFailure =
-                  params.row.FAILURE_REPORT?.r.actual == -1 ? false : true;
+                  params.row.FAILURE_REPORT?.r.actual == 1 ? true : false;
                 return handleModelBackendFailure(
                   params.field,
                   unit,
diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts
index fe4470bba3..e44feb4269 100644
--- a/torchci/lib/benchmark/llms/utils/llmUtils.ts
+++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts
@@ -245,20 +245,20 @@ export function combineLeftAndRight(
         row[metric] = {
           l: hasL
             ? {
-                actual: record["l"].actual,
-                target: record["l"].target,
+                actual: 1,
+                target: 0,
               }
             : {
-                actual: -1, // indicate the failure on left side
+                actual: 0, // indicate the failure on left side
                 target: 0,
               },
           r: hasR
             ? {
-                actual: record["r"].actual,
-                target: record["r"].target,
+                actual: 1,
+                target: 0,
               }
             : {
-                actual: -1, // indicate the failure on right side
+                actual: 0, // indicate the failure on right side
                 target: 0,
               },
           highlight: hasL && hasR,

From 12e0fe8f7787e72d209cb68ce23abe9f0b06cc68 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Mon, 7 Apr 2025 21:27:39 -0700
Subject: [PATCH 07/14] addDeviceLevelFailure

---
 torchci/lib/benchmark/llms/utils/llmUtils.ts | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts
index e44feb4269..d0f852c4cc 100644
--- a/torchci/lib/benchmark/llms/utils/llmUtils.ts
+++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts
@@ -245,20 +245,20 @@ export function combineLeftAndRight(
         row[metric] = {
           l: hasL
             ? {
-                actual: 1,
+                actual: 1, // indicate the failure on left side
                 target: 0,
               }
             : {
-                actual: 0, // indicate the failure on left side
+                actual: 0,
                 target: 0,
               },
           r: hasR
             ? {
-                actual: 1,
+                actual: 1, // indicate the failure on right side
                 target: 0,
               }
             : {
-                actual: 0, // indicate the failure on right side
+                actual: 0,
                 target: 0,
               },
           highlight: hasL && hasR,

From f3ab8251148c3aa154b74d62428ebf43b81804e0 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Tue, 8 Apr 2025 09:40:33 -0700
Subject: [PATCH 08/14] Update
 torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx

Co-authored-by: Huy Do <huydhn@gmail.com>
---
 .../components/benchmark/llms/components/LLMsSummaryPanel.tsx   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
index 5371a55047..3f543328d7 100644
--- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -226,7 +226,7 @@ export default function LLMsSummaryPanel({
     } else if (isLFailure) {
       return `Failure ->${ractual}${unit} ${rPercent} ${showTarget}`;
     } else if (isRFailure) {
-      return `${lactual}${unit} ${lPercent} -> Failure`;
+      return `${lactual}${unit} ${lPercent} → Failure`;
     }
   };
 

From 6d6b4157fa90a84db63e040285f17932be567acc Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Tue, 8 Apr 2025 09:40:42 -0700
Subject: [PATCH 09/14] Update
 torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx

Co-authored-by: Huy Do <huydhn@gmail.com>
---
 .../components/benchmark/llms/components/LLMsSummaryPanel.tsx   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
index 3f543328d7..6f3977099b 100644
--- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -224,7 +224,7 @@ export default function LLMsSummaryPanel({
       }
       return `Failure -> Fialure`;
     } else if (isLFailure) {
-      return `Failure ->${ractual}${unit} ${rPercent} ${showTarget}`;
+      return `Failure → ${ractual}${unit} ${rPercent} ${showTarget}`;
     } else if (isRFailure) {
       return `${lactual}${unit} ${lPercent} → Failure`;
     }

From 99c3f831bb1feb60cfe1880ca4b33813387a9a3e Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Tue, 8 Apr 2025 12:17:51 -0700
Subject: [PATCH 10/14] addDeviceLevelFailure

---
 .../llms/components/LLMsSummaryPanel.tsx      | 160 +++++++++++++-----
 1 file changed, 117 insertions(+), 43 deletions(-)

diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
index 6f3977099b..f15bd69f25 100644
--- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -1,4 +1,4 @@
-import { Grid2 } from "@mui/material";
+import { Grid2, styled, Tooltip } from "@mui/material";
 import { GridCellParams, GridRenderCellParams } from "@mui/x-data-grid";
 import styles from "components/metrics.module.css";
 import { TablePanelWithData } from "components/metrics/panels/TablePanel";
@@ -12,6 +12,23 @@ import {
   UNIT_FOR_METRIC,
 } from "lib/benchmark/llms/common";
 import { combineLeftAndRight } from "lib/benchmark/llms/utils/llmUtils";
+import { RiAlarmWarningFill } from "react-icons/ri";
+import { VscError } from "react-icons/vsc";
+
+const FlexDiv = styled("div")({
+  display: "flex",
+  flexDirection: "row",
+  justifyContent: "flex-start",
+  alignItems: "center",
+});
+
+const FlexDivCenter = styled("div")({
+  display: "flex",
+  flexDirection: "row",
+  justifyContent: "center",
+  alignItems: "center",
+  margin: "3px",
+});
 
 const getDeviceArch = (
   device: string | undefined,
@@ -188,48 +205,6 @@ export default function LLMsSummaryPanel({
     });
   }
 
-  // handle failure report for a row.
-  const handleModelBackendFailure = (
-    field: string,
-    unit: string,
-    showTarget: string,
-    isLFailure: boolean,
-    isRFailure: boolean,
-    lactual: number,
-    ractual: number,
-    lPercent: string,
-    rPercent: string
-  ) => {
-    // Indicate the failure details in Failure Report column
-    if (field === "FAILURE_REPORT") {
-      if (lCommit === rCommit) {
-        return `Detected Failure on commit`;
-      }
-
-      if (isLFailure && isRFailure) {
-        return `Detected Failure on both base commit and new commit`;
-      }
-      if (isLFailure) {
-        return `Detected Failure on base commit`;
-      }
-      if (isRFailure) {
-        return `Detected Failure on new commit`;
-      }
-    }
-
-    // render the row's value in other metric columns
-    if (isLFailure && isRFailure) {
-      if (lCommit === rCommit) {
-        return `Failure`;
-      }
-      return `Failure -> Fialure`;
-    } else if (isLFailure) {
-      return `Failure → ${ractual}${unit} ${rPercent} ${showTarget}`;
-    } else if (isRFailure) {
-      return `${lactual}${unit} ${lPercent} → Failure`;
-    }
-  };
-
   const hasBackend = data.length > 0 && "backend" in data[0] ? true : false;
   if (hasBackend && benchmarkName !== "TorchCache Benchmark") {
     columns.push({
@@ -389,6 +364,8 @@ export default function LLMsSummaryPanel({
                 const isRFailure =
                   params.row.FAILURE_REPORT?.r.actual == 1 ? true : false;
                 return handleModelBackendFailure(
+                  lCommit,
+                  rCommit,
                   params.field,
                   unit,
                   showTarget,
@@ -431,3 +408,100 @@ export default function LLMsSummaryPanel({
     </Grid2>
   );
 }
+
+// handle failure report for a row.
+const handleModelBackendFailure = (
+  lCommit: string,
+  rCommit: string,
+  field: string,
+  unit: string,
+  showTarget: string,
+  isLFailure: boolean,
+  isRFailure: boolean,
+  lactual: number,
+  ractual: number,
+  lPercent: string,
+  rPercent: string
+) => {
+  // Indicate the failure details in Failure Report column
+  if (field === "FAILURE_REPORT") {
+    if (lCommit === rCommit) {
+      return (
+        <WarningElementWithTooltip message="Detected Failure on commit"></WarningElementWithTooltip>
+      );
+    }
+
+    if (isLFailure && isRFailure) {
+      return (
+        <WarningElementWithTooltip message="Detected Failure on both base commit and new commit"></WarningElementWithTooltip>
+      );
+    }
+    if (isLFailure) {
+      return (
+        <WarningElementWithTooltip message="Detected Failure on base commit"></WarningElementWithTooltip>
+      );
+    }
+    if (isRFailure) {
+      return (
+        <WarningElementWithTooltip message="Detected Failure on new commit"></WarningElementWithTooltip>
+      );
+    }
+  }
+
+  // render the row's value in other metric columns
+  if (isLFailure && isRFailure) {
+    if (lCommit === rCommit) {
+      return (
+        <FailureElementWithTooltip message="device job failed on commit"></FailureElementWithTooltip>
+      );
+    }
+    return (
+      <div>
+        <FailureElementWithTooltip message="device job failed on both commit" />
+        ;
+      </div>
+    );
+  } else if (isLFailure) {
+    return (
+      <FlexDiv>
+        <FailureElementWithTooltip message="device job failed on base commit"></FailureElementWithTooltip>
+        <span> → </span>
+        <span>
+          {ractual}
+          {unit}
+          {rPercent} {showTarget}
+        </span>
+      </FlexDiv>
+    );
+  } else if (isRFailure) {
+    return (
+      <FlexDiv>
+        <span>
+          {lactual}
+          {unit}
+          {lPercent}
+        </span>
+        <span> → </span>
+        <FailureElementWithTooltip message="device job failed on new commit"></FailureElementWithTooltip>
+      </FlexDiv>
+    );
+  }
+};
+
+const FailureElementWithTooltip = ({ message = "" }) => (
+  <Tooltip title={message}>
+    <div style={{ display: "flex", alignItems: "center", color: "red" }}>
+      <VscError />
+    </div>
+  </Tooltip>
+);
+
+const WarningElementWithTooltip = ({ message = "" }) => (
+  <FlexDivCenter>
+    <Tooltip title={message}>
+      <div style={{ display: "flex", alignItems: "center", color: "red" }}>
+        <RiAlarmWarningFill size={20} />
+      </div>
+    </Tooltip>
+  </FlexDivCenter>
+);

From dac485a7d97f36d788e6d9e2d10bd72a23c0baf7 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Tue, 8 Apr 2025 12:42:08 -0700
Subject: [PATCH 11/14] addDeviceLevelFailure

---
 .../llms/components/LLMsSummaryPanel.tsx      | 95 +++++++++++--------
 1 file changed, 56 insertions(+), 39 deletions(-)

diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
index f15bd69f25..c30113c77e 100644
--- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -12,7 +12,7 @@ import {
   UNIT_FOR_METRIC,
 } from "lib/benchmark/llms/common";
 import { combineLeftAndRight } from "lib/benchmark/llms/utils/llmUtils";
-import { RiAlarmWarningFill } from "react-icons/ri";
+import { MdError } from "react-icons/md";
 import { VscError } from "react-icons/vsc";
 
 const FlexDiv = styled("div")({
@@ -139,9 +139,18 @@ export default function LLMsSummaryPanel({
             ? `${model} (${metadata.origins.join(",")})`
             : model;
         return (
-          <a href={url}>
-            <b>{displayName}</b>
-          </a>
+          <FlexDiv>
+            {params.row.FAILURE_REPORT && (
+              <RenderWarningOnNameForFailure
+                lCommit={lCommit}
+                rCommit={rCommit}
+                row={params.row}
+              ></RenderWarningOnNameForFailure>
+            )}
+            <a href={url}>
+              <b>{displayName}</b>
+            </a>
+          </FlexDiv>
         );
       },
     },
@@ -261,6 +270,9 @@ export default function LLMsSummaryPanel({
             (metric !== "speedup" && metric !== "Speedup")
           );
         })
+        .filter((metric: string) => {
+          return metric !== "FAILURE_REPORT";
+        })
         .map((metric: string) => {
           return {
             field: metric,
@@ -359,18 +371,12 @@ export default function LLMsSummaryPanel({
 
               // A Failure is detected for a model and backend
               if (params.row.FAILURE_REPORT) {
-                const isLFailure =
-                  params.row.FAILURE_REPORT?.l.actual == 1 ? true : false;
-                const isRFailure =
-                  params.row.FAILURE_REPORT?.r.actual == 1 ? true : false;
                 return handleModelBackendFailure(
+                  params.row,
                   lCommit,
                   rCommit,
-                  params.field,
                   unit,
                   showTarget,
-                  isLFailure,
-                  isRFailure,
                   l,
                   r,
                   lPercent,
@@ -411,42 +417,18 @@ export default function LLMsSummaryPanel({
 
 // handle failure report for a row.
 const handleModelBackendFailure = (
+  row: any,
   lCommit: string,
   rCommit: string,
-  field: string,
   unit: string,
   showTarget: string,
-  isLFailure: boolean,
-  isRFailure: boolean,
   lactual: number,
   ractual: number,
   lPercent: string,
   rPercent: string
 ) => {
-  // Indicate the failure details in Failure Report column
-  if (field === "FAILURE_REPORT") {
-    if (lCommit === rCommit) {
-      return (
-        <WarningElementWithTooltip message="Detected Failure on commit"></WarningElementWithTooltip>
-      );
-    }
-
-    if (isLFailure && isRFailure) {
-      return (
-        <WarningElementWithTooltip message="Detected Failure on both base commit and new commit"></WarningElementWithTooltip>
-      );
-    }
-    if (isLFailure) {
-      return (
-        <WarningElementWithTooltip message="Detected Failure on base commit"></WarningElementWithTooltip>
-      );
-    }
-    if (isRFailure) {
-      return (
-        <WarningElementWithTooltip message="Detected Failure on new commit"></WarningElementWithTooltip>
-      );
-    }
-  }
+  const isLFailure = row.FAILURE_REPORT?.l.actual == 1 ? true : false;
+  const isRFailure = row.FAILURE_REPORT?.r.actual == 1 ? true : false;
 
   // render the row's value in other metric columns
   if (isLFailure && isRFailure) {
@@ -488,6 +470,41 @@ const handleModelBackendFailure = (
   }
 };
 
+const RenderWarningOnNameForFailure = ({
+  lCommit,
+  rCommit,
+  row,
+}: {
+  lCommit: string;
+  rCommit: string;
+  row: any;
+}) => {
+  const isLFailure = row.FAILURE_REPORT?.l.actual == 1 ? true : false;
+  const isRFailure = row.FAILURE_REPORT?.r.actual == 1 ? true : false;
+  // Indicate the failure details in Failure Report column
+  if (lCommit === rCommit) {
+    return (
+      <WarningElementWithTooltip message="Detected Failure on commit"></WarningElementWithTooltip>
+    );
+  }
+  if (isLFailure && isRFailure) {
+    return (
+      <WarningElementWithTooltip message="Detected Failure on both base commit and new commit"></WarningElementWithTooltip>
+    );
+  }
+  if (isLFailure) {
+    return (
+      <WarningElementWithTooltip message="Detected Failure on base commit"></WarningElementWithTooltip>
+    );
+  }
+  if (isRFailure) {
+    return (
+      <WarningElementWithTooltip message="Detected Failure on new commit"></WarningElementWithTooltip>
+    );
+  }
+  return <></>;
+};
+
 const FailureElementWithTooltip = ({ message = "" }) => (
   <Tooltip title={message}>
     <div style={{ display: "flex", alignItems: "center", color: "red" }}>
@@ -500,7 +517,7 @@ const WarningElementWithTooltip = ({ message = "" }) => (
   <FlexDivCenter>
     <Tooltip title={message}>
       <div style={{ display: "flex", alignItems: "center", color: "red" }}>
-        <RiAlarmWarningFill size={20} />
+        <MdError size={20} />
       </div>
     </Tooltip>
   </FlexDivCenter>

From 6c9a22503425d487585188e139d1f49e8ba00c61 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Tue, 8 Apr 2025 13:20:19 -0700
Subject: [PATCH 12/14] addDeviceLevelFailure

---
 .../benchmark/llms/components/LLMsSummaryPanel.tsx     | 10 +++++-----
 torchci/lib/benchmark/llms/utils/llmUtils.ts           |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
index c30113c77e..5ed38d031d 100644
--- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -271,7 +271,7 @@ export default function LLMsSummaryPanel({
           );
         })
         .filter((metric: string) => {
-          return metric !== "FAILURE_REPORT";
+          return metric !== "FAILURE_REPO";
         })
         .map((metric: string) => {
           return {
@@ -427,8 +427,8 @@ const handleModelBackendFailure = (
   lPercent: string,
   rPercent: string
 ) => {
-  const isLFailure = row.FAILURE_REPORT?.l.actual == 1 ? true : false;
-  const isRFailure = row.FAILURE_REPORT?.r.actual == 1 ? true : false;
+  const isLFailure = row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false;
+  const isRFailure = row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false;
 
   // render the row's value in other metric columns
   if (isLFailure && isRFailure) {
@@ -479,8 +479,8 @@ const RenderWarningOnNameForFailure = ({
   rCommit: string;
   row: any;
 }) => {
-  const isLFailure = row.FAILURE_REPORT?.l.actual == 1 ? true : false;
-  const isRFailure = row.FAILURE_REPORT?.r.actual == 1 ? true : false;
+  const isLFailure = row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false;
+  const isRFailure = row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false;
   // Indicate the failure details in Failure Report column
   if (lCommit === rCommit) {
     return (
diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts
index d0f852c4cc..2c12f85bb2 100644
--- a/torchci/lib/benchmark/llms/utils/llmUtils.ts
+++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts
@@ -245,7 +245,7 @@ export function combineLeftAndRight(
         row[metric] = {
           l: hasL
             ? {
-                actual: 1, // indicate the failure on left side
+                actual: Number.MAX_SAFE_INTEGER, // indicate the failure on left side
                 target: 0,
               }
             : {
@@ -254,7 +254,7 @@ export function combineLeftAndRight(
               },
           r: hasR
             ? {
-                actual: 1, // indicate the failure on right side
+                actual: Number.MAX_SAFE_INTEGER, // indicate the failure on right side
                 target: 0,
               }
             : {

From c572d870a1ff89bca636fe7c44189eac4e14fcf4 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Tue, 8 Apr 2025 13:24:07 -0700
Subject: [PATCH 13/14] addDeviceLevelFailure

---
 .../llms/components/LLMsSummaryPanel.tsx          | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
index 5ed38d031d..dad86ba48b 100644
--- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -271,7 +271,7 @@ export default function LLMsSummaryPanel({
           );
         })
         .filter((metric: string) => {
-          return metric !== "FAILURE_REPO";
+          return metric !== "FAILURE_REPORT";
         })
         .map((metric: string) => {
           return {
@@ -371,6 +371,7 @@ export default function LLMsSummaryPanel({
 
               // A Failure is detected for a model and backend
               if (params.row.FAILURE_REPORT) {
+                console.log("yang here");
                 return handleModelBackendFailure(
                   params.row,
                   lCommit,
@@ -427,8 +428,10 @@ const handleModelBackendFailure = (
   lPercent: string,
   rPercent: string
 ) => {
-  const isLFailure = row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false;
-  const isRFailure = row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false;
+  const isLFailure =
+    row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false;
+  const isRFailure =
+    row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false;
 
   // render the row's value in other metric columns
   if (isLFailure && isRFailure) {
@@ -479,8 +482,10 @@ const RenderWarningOnNameForFailure = ({
   rCommit: string;
   row: any;
 }) => {
-  const isLFailure = row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false;
-  const isRFailure = row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false;
+  const isLFailure =
+    row.FAILURE_REPORT?.l.actual == Number.MAX_SAFE_INTEGER ? true : false;
+  const isRFailure =
+    row.FAILURE_REPORT?.r.actual == Number.MAX_SAFE_INTEGER ? true : false;
   // Indicate the failure details in Failure Report column
   if (lCommit === rCommit) {
     return (

From d8122ff0549f17917302b9addbd60ff26e39ab31 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Tue, 8 Apr 2025 15:07:41 -0700
Subject: [PATCH 14/14] typo

---
 .../components/benchmark/llms/components/LLMsSummaryPanel.tsx    | 1 -
 1 file changed, 1 deletion(-)

diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
index dad86ba48b..5c6773a348 100644
--- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -371,7 +371,6 @@ export default function LLMsSummaryPanel({
 
               // A Failure is detected for a model and backend
               if (params.row.FAILURE_REPORT) {
-                console.log("yang here");
                 return handleModelBackendFailure(
                   params.row,
                   lCommit,