diff --git a/torchci/clickhouse_queries/autorevert_events_with_commits/params.json b/torchci/clickhouse_queries/autorevert_events_with_commits/params.json new file mode 100644 index 0000000000..3d67400a8a --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_events_with_commits/params.json @@ -0,0 +1,8 @@ +{ + "params": { + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)", + "workflowNames": "Array(String)" + }, + "tests": [] +} diff --git a/torchci/clickhouse_queries/autorevert_events_with_commits/query.sql b/torchci/clickhouse_queries/autorevert_events_with_commits/query.sql new file mode 100644 index 0000000000..be73db7e80 --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_events_with_commits/query.sql @@ -0,0 +1,139 @@ +-- Autorevert Events with Linked Revert Commits +-- Links autorevert_events_v2 to the actual revert commits via PR number and timestamp +-- Used for false positive detection + +WITH autorevert_events AS ( + SELECT + toString(a.commit_sha) AS reverted_sha, + a.ts AS autorevert_time, + a.workflows, + a.source_signal_keys + FROM misc.autorevert_events_v2 a FINAL + WHERE + a.repo = 'pytorch/pytorch' + AND a.action = 'revert' + AND a.dry_run = 0 + AND a.failed = 0 + AND a.ts >= toDateTime({startTime: DateTime64(3)}) - INTERVAL 1 DAY + AND a.ts < toDateTime({stopTime: DateTime64(3)}) + INTERVAL 1 DAY + -- Filter by workflow intersection + AND hasAny(a.workflows, {workflowNames: Array(String)}) +), + +-- Get PR number from the reverted commit's message +autorevert_with_pr AS ( + SELECT + a.reverted_sha, + a.autorevert_time, + a.workflows, + a.source_signal_keys, + p.head_commit.'message' AS reverted_message, + -- Extract PR number from "Pull Request resolved: https://github.com/pytorch/pytorch/pull/XXXXX" + arrayElement( + extractAll( + p.head_commit.'message', + 'Pull Request resolved: https://github.com/pytorch/pytorch/pull/(\\d+)' + ), 1 + ) AS pr_number + FROM autorevert_events a + JOIN push p ON p.head_commit.'id' = a.reverted_sha + WHERE p.repository.'full_name' = 'pytorch/pytorch' +), + +-- Find revert commits in the time range +revert_commits AS ( + SELECT + push.head_commit.'id' AS revert_sha, + push.head_commit.'timestamp' AS revert_time, + push.head_commit.'message' AS revert_message, + -- Extract mentioned PR numbers from revert message + -- For nested reverts like 'Reapply "Back out "..." (#164939)" (#165910)" (#166812)', + -- the actual PR is the LAST one mentioned in the title line (before newline) + if( + arrayElement( + extractAll( + push.head_commit.'message', + 'Reverted https://github.com/pytorch/pytorch/pull/(\\d+)' + ), 1 + ) != '', + arrayElement( + extractAll( + push.head_commit.'message', + 'Reverted https://github.com/pytorch/pytorch/pull/(\\d+)' + ), 1 + ), + -- Get the LAST PR number from title (use -1 for last element) + arrayElement( + extractAll( + -- Extract just the first line (title) to get the correct PR + arrayElement( + splitByChar('\n', push.head_commit.'message'), 1 + ), + '#(\\d+)' + ), -1 + ) + ) AS pr_reference + FROM push + WHERE + push.ref IN ('refs/heads/master', 'refs/heads/main') + AND push.repository.'full_name' = 'pytorch/pytorch' + AND push.head_commit.'timestamp' >= {startTime: DateTime64(3)} + AND push.head_commit.'timestamp' < {stopTime: DateTime64(3)} + AND ( + push.head_commit.'message' LIKE 'Revert %' + OR push.head_commit.'message' LIKE 'Reapply %' + OR push.head_commit.'message' LIKE 'Back out%' + ) +), + +-- Join on PR number only, then filter by time in WHERE clause +-- This avoids ClickHouse JOIN ON restrictions +matched_reverts AS ( + SELECT + a.reverted_sha, + a.autorevert_time, + a.workflows, + a.source_signal_keys, + a.pr_number, + substring(a.reverted_message, 1, 100) AS reverted_message_snippet, + r.revert_sha, + r.revert_time, + substring(r.revert_message, 1, 100) AS revert_message_snippet + FROM autorevert_with_pr a + LEFT JOIN revert_commits r ON r.pr_reference = a.pr_number + WHERE + a.pr_number != '' + AND ( + -- Keep autoreverts even if no linked revert found + r.revert_sha IS NULL + OR ( + r.revert_time > a.autorevert_time + AND r.revert_time < a.autorevert_time + INTERVAL 1 HOUR + ) + ) +), + +-- Take the first revert commit after the autorevert event for each PR +linked_autoreverts AS ( + SELECT + *, + row_number() OVER ( + PARTITION BY reverted_sha + ORDER BY revert_time ASC NULLS LAST + ) AS rn + FROM matched_reverts +) + +SELECT + reverted_sha, + autorevert_time, + workflows, + source_signal_keys, + pr_number, + reverted_message_snippet, + revert_sha, + revert_time, + revert_message_snippet +FROM linked_autoreverts +WHERE rn = 1 +ORDER BY autorevert_time DESC diff --git a/torchci/clickhouse_queries/autorevert_false_positives/params.json b/torchci/clickhouse_queries/autorevert_false_positives/params.json new file mode 100644 index 0000000000..4e48ef1960 --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_false_positives/params.json @@ -0,0 +1,7 @@ +{ + "params": { + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)" + }, + "tests": [] +} diff --git a/torchci/clickhouse_queries/autorevert_false_positives/query.sql b/torchci/clickhouse_queries/autorevert_false_positives/query.sql new file mode 100644 index 0000000000..06304456db --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_false_positives/query.sql @@ -0,0 +1,107 @@ +-- Autorevert False Positive Candidates +-- Finds autoreverted commits that were later re-landed +-- NOTE: This is a preliminary list. True false positives require GitHub API verification: +-- - If PR is still OPEN (not relanded) → revert was LEGIT +-- - If PR had commits after revert before reland → revert was LEGIT (author fixed something) +-- - If PR was relanded with NO changes → revert was FALSE POSITIVE + +WITH autorevert_events AS ( + SELECT + toString(a.commit_sha) AS reverted_sha, + min(a.ts) AS revert_time + FROM misc.autorevert_events_v2 a FINAL + WHERE + a.repo = 'pytorch/pytorch' + AND a.action = 'revert' + AND a.dry_run = 0 + AND a.failed = 0 + AND a.ts >= toDateTime({startTime: DateTime64(3)}) - INTERVAL 7 DAY + AND a.ts < toDateTime({stopTime: DateTime64(3)}) + INTERVAL 7 DAY + GROUP BY reverted_sha +), + +-- Get original commit details and extract PR number +autorevert_with_pr AS ( + SELECT + a.reverted_sha, + a.revert_time, + p.head_commit.'message' AS original_message, + arrayElement( + extractAll( + p.head_commit.'message', + 'Pull Request resolved: https://github.com/pytorch/pytorch/pull/(\\d+)' + ), 1 + ) AS pr_number + FROM autorevert_events a + JOIN push p ON p.head_commit.'id' = a.reverted_sha + WHERE p.repository.'full_name' = 'pytorch/pytorch' +), + +-- Find all commits in the time range +all_commits AS ( + SELECT + push.head_commit.'id' AS sha, + push.head_commit.'timestamp' AS time, + push.head_commit.'message' AS message + FROM push + WHERE + push.ref IN ('refs/heads/master', 'refs/heads/main') + AND push.repository.'full_name' = 'pytorch/pytorch' + AND push.head_commit.'timestamp' >= {startTime: DateTime64(3)} + AND push.head_commit.'timestamp' < {stopTime: DateTime64(3)} +), + +-- Find Reland commits +reland_commits AS ( + SELECT + sha AS reland_sha, + time AS reland_time, + message AS reland_message, + -- Extract the original PR being relanded (first PR number mentioned) + arrayElement(extractAll(message, '#(\\d+)'), 1) AS primary_mentioned_pr + FROM all_commits + WHERE + message LIKE 'Reland%' + OR message LIKE '[Reland]%' + OR message LIKE 'Re-land%' +), + +-- Match autoreverts to relands, deduplicated by original PR +-- Take the first reland for each autoreverted PR +matched_relands AS ( + SELECT + a.reverted_sha, + a.revert_time, + a.pr_number AS original_pr, + substring(a.original_message, 1, 100) AS original_message_snippet, + r.reland_sha, + r.reland_time, + substring(r.reland_message, 1, 100) AS reland_message_snippet, + dateDiff('hour', a.revert_time, r.reland_time) AS hours_to_reland, + row_number() + OVER ( + PARTITION BY a.pr_number + ORDER BY r.reland_time ASC + ) + AS rn + FROM autorevert_with_pr a + JOIN reland_commits r ON r.primary_mentioned_pr = a.pr_number + WHERE + a.pr_number != '' + AND r.reland_time > a.revert_time + AND r.reland_time < a.revert_time + INTERVAL 30 DAY +) + +SELECT + original_pr, + reverted_sha, + revert_time, + original_message_snippet, + reland_sha, + reland_time, + reland_message_snippet, + hours_to_reland, + 'needs_verification' AS status +FROM matched_relands +WHERE rn = 1 -- Only first reland per PR +ORDER BY revert_time DESC diff --git a/torchci/clickhouse_queries/autorevert_signal_recovery/params.json b/torchci/clickhouse_queries/autorevert_signal_recovery/params.json new file mode 100644 index 0000000000..b281649c87 --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_signal_recovery/params.json @@ -0,0 +1,29 @@ +{ + "params": { + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)", + "workflowNames": "Array(String)", + "minRedCommits": "UInt8", + "minGreenCommits": "UInt8" + }, + "tests": [ + { + "startTime": "2024-12-01 00:00:00.000", + "stopTime": "2024-12-29 00:00:00.000", + "workflowNames": ["Lint", "pull", "trunk", "linux-aarch64"], + "minRedCommits": 2, + "minGreenCommits": 2 + }, + { + "startTime": { + "from_now": -14 + }, + "stopTime": { + "from_now": 0 + }, + "workflowNames": ["Lint", "pull", "trunk", "linux-aarch64"], + "minRedCommits": 2, + "minGreenCommits": 2 + } + ] +} diff --git a/torchci/clickhouse_queries/autorevert_signal_recovery/query.sql b/torchci/clickhouse_queries/autorevert_signal_recovery/query.sql new file mode 100644 index 0000000000..d53aff79aa --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_signal_recovery/query.sql @@ -0,0 +1,200 @@ +-- Signal Recovery Detection Query +-- Finds instances where a signal (job group) recovers: 2+ red commits followed by 2+ green commits +-- Used for autorevert metrics to identify significant recovery events + +WITH commits AS ( + SELECT + push.head_commit.'timestamp' AS time, + push.head_commit.'id' AS sha, + push.head_commit.'message' AS message + FROM push + WHERE + push.ref IN ('refs/heads/master', 'refs/heads/main') + AND push.repository.'owner'.'name' = 'pytorch' + AND push.repository.'name' = 'pytorch' + AND push.head_commit.'timestamp' >= {startTime: DateTime64(3)} + AND push.head_commit.'timestamp' < {stopTime: DateTime64(3)} +), + +all_runs AS ( + SELECT + workflow_run.id AS id, + workflow_run.head_commit.'id' AS sha, + workflow_run.name AS workflow_name, + commit.time AS time, + commit.message AS message + FROM workflow_run FINAL + JOIN commits commit ON workflow_run.head_commit.'id' = commit.sha + WHERE + workflow_run.name IN ({workflowNames: Array(String)}) + AND workflow_run.event != 'workflow_run' + AND workflow_run.id IN ( + SELECT id FROM materialized_views.workflow_run_by_head_sha + WHERE head_sha IN (SELECT sha FROM commits) + ) +), + +all_jobs AS ( + SELECT + all_runs.time AS time, + all_runs.sha AS sha, + all_runs.message AS message, + all_runs.workflow_name AS workflow_name, + job.run_attempt AS run_attempt, + job.conclusion AS raw_conclusion, + -- Normalize job name to group shards together (same as auto-revert logic) + trim( + replaceRegexpAll( + replaceRegexpAll( + replaceRegexpAll(job.name, '\\s*\\(.*\\)$', ''), + ', \\d+, \\d+, ', ', ' + ), + '\\s+', ' ' + ) + ) AS base_name + FROM default.workflow_job job FINAL + JOIN all_runs ON all_runs.id = job.run_id + WHERE + job.name != 'ciflow_should_run' + AND job.name != 'generate-test-matrix' + AND job.name NOT LIKE '%rerun_disabled_tests%' + AND job.name NOT LIKE '%unstable%' + AND job.id IN ( + SELECT id FROM materialized_views.workflow_job_by_head_sha + WHERE head_sha IN (SELECT sha FROM commits) + ) +), + +-- Step 1: For each (sha, base_name, run_attempt), determine attempt status +attempt_status AS ( + SELECT + time, + sha, + message, + base_name, + workflow_name, + run_attempt, + MAX(raw_conclusion IN ('failure', 'timed_out', 'cancelled')) + AS attempt_has_failure, + MAX(raw_conclusion = '') AS attempt_has_pending + FROM all_jobs + GROUP BY time, sha, message, base_name, workflow_name, run_attempt +), + +-- Step 2: For each (sha, base_name), aggregate across all attempts +signal_status AS ( + SELECT + time, + sha, + message, + base_name, + any(workflow_name) AS workflow_name, + CASE + WHEN MAX(attempt_has_pending) = 1 THEN 'pending' + WHEN MIN(attempt_has_failure) = 1 THEN 'red' + WHEN MAX(attempt_has_failure) = 1 THEN 'flaky' + ELSE 'green' + END AS status + FROM attempt_status + GROUP BY time, sha, message, base_name +), + +-- Step 3: Assign streak IDs using cumulative status changes +signal_with_streaks AS ( + SELECT + base_name, + workflow_name, + sha, + time, + message, + status, + -- Change marker: 1 when status differs from previous + if(status != lagInFrame(status, 1, status) OVER w, 1, 0) AS is_change + FROM signal_status + WHERE status IN ('red', 'green') -- Focus on definitive states + WINDOW w AS ( + PARTITION BY base_name + ORDER BY time ASC + ) +), + +-- Step 4: Compute streak ID (cumulative sum of changes) +signal_with_streak_ids AS ( + SELECT + *, + sum(is_change) + OVER ( + PARTITION BY base_name + ORDER BY time ASC ROWS UNBOUNDED PRECEDING + ) + AS streak_id + FROM signal_with_streaks +), + +-- Step 5: Count streak lengths and find boundaries +streak_lengths AS ( + SELECT + base_name, + streak_id, + status, + count(*) AS streak_length, + min(time) AS streak_start, + max(time) AS streak_end, + argMin(sha, time) AS first_sha, + argMax(sha, time) AS last_sha, + argMin(message, time) AS first_message + FROM signal_with_streak_ids + GROUP BY base_name, streak_id, status +), + +-- Step 6: Find recovery events: green streak that follows a red streak +recovery_events AS ( + SELECT + green.base_name AS signal_key, + red.streak_length AS red_streak_length, + green.streak_length AS green_streak_length, + green.first_sha AS recovery_sha, + green.streak_start AS recovery_time, + green.first_message AS recovery_message, + red.last_sha AS last_red_sha, + red.streak_end AS last_red_time + FROM streak_lengths green + JOIN streak_lengths red + ON + green.base_name = red.base_name + AND green.streak_id = red.streak_id + 1 + WHERE + green.status = 'green' AND red.status = 'red' + AND red.streak_length >= {minRedCommits: UInt8} + AND green.streak_length >= {minGreenCommits: UInt8} +) + +-- Final output +SELECT + signal_key, + recovery_sha, + recovery_time, + recovery_message, + last_red_sha, + last_red_time, + red_streak_length, + green_streak_length, + -- Check if recovery commit is a revert + ( + recovery_message LIKE 'Revert %' + OR recovery_message LIKE 'Reapply %' + OR recovery_message LIKE 'Back out%' + ) + AS is_revert, + -- Extract reverted PR number if it's a revert + extractAll( + recovery_message, + 'Reverted https://github.com/pytorch/pytorch/pull/(\\d+)' + ) AS reverted_pr_numbers, + -- Extract PR number from merge commit message + extractAll( + recovery_message, + 'Pull Request resolved: https://github.com/pytorch/pytorch/pull/(\\d+)' + ) AS merge_pr_numbers +FROM recovery_events +ORDER BY recovery_time DESC diff --git a/torchci/clickhouse_queries/autorevert_significant_reverts/params.json b/torchci/clickhouse_queries/autorevert_significant_reverts/params.json new file mode 100644 index 0000000000..b281649c87 --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_significant_reverts/params.json @@ -0,0 +1,29 @@ +{ + "params": { + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)", + "workflowNames": "Array(String)", + "minRedCommits": "UInt8", + "minGreenCommits": "UInt8" + }, + "tests": [ + { + "startTime": "2024-12-01 00:00:00.000", + "stopTime": "2024-12-29 00:00:00.000", + "workflowNames": ["Lint", "pull", "trunk", "linux-aarch64"], + "minRedCommits": 2, + "minGreenCommits": 2 + }, + { + "startTime": { + "from_now": -14 + }, + "stopTime": { + "from_now": 0 + }, + "workflowNames": ["Lint", "pull", "trunk", "linux-aarch64"], + "minRedCommits": 2, + "minGreenCommits": 2 + } + ] +} diff --git a/torchci/clickhouse_queries/autorevert_significant_reverts/query.sql b/torchci/clickhouse_queries/autorevert_significant_reverts/query.sql new file mode 100644 index 0000000000..5b8d72d90d --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_significant_reverts/query.sql @@ -0,0 +1,279 @@ +-- Significant Reverts Query +-- Finds recovery events that are reverts and attributes them to autorevert vs human +-- Used for autorevert metrics precision/recall calculations + +WITH commits AS ( + SELECT + push.head_commit.'timestamp' AS time, + push.head_commit.'id' AS sha, + push.head_commit.'message' AS message + FROM push + WHERE + push.ref IN ('refs/heads/master', 'refs/heads/main') + AND push.repository.'owner'.'name' = 'pytorch' + AND push.repository.'name' = 'pytorch' + AND push.head_commit.'timestamp' >= {startTime: DateTime64(3)} + AND push.head_commit.'timestamp' < {stopTime: DateTime64(3)} +), + +all_runs AS ( + SELECT + workflow_run.id AS id, + workflow_run.head_commit.'id' AS sha, + workflow_run.name AS workflow_name, + commit.time AS time, + commit.message AS message + FROM workflow_run FINAL + JOIN commits commit ON workflow_run.head_commit.'id' = commit.sha + WHERE + workflow_run.name IN ({workflowNames: Array(String)}) + AND workflow_run.event != 'workflow_run' + AND workflow_run.id IN ( + SELECT id FROM materialized_views.workflow_run_by_head_sha + WHERE head_sha IN (SELECT sha FROM commits) + ) +), + +all_jobs AS ( + SELECT + all_runs.time AS time, + all_runs.sha AS sha, + all_runs.message AS message, + all_runs.workflow_name AS workflow_name, + job.run_attempt AS run_attempt, + job.conclusion AS raw_conclusion, + -- Normalize job name to group shards together (same as auto-revert logic) + trim( + replaceRegexpAll( + replaceRegexpAll( + replaceRegexpAll(job.name, '\\s*\\(.*\\)$', ''), + ', \\d+, \\d+, ', ', ' + ), + '\\s+', ' ' + ) + ) AS base_name + FROM default.workflow_job job FINAL + JOIN all_runs ON all_runs.id = job.run_id + WHERE + job.name != 'ciflow_should_run' + AND job.name != 'generate-test-matrix' + AND job.name NOT LIKE '%rerun_disabled_tests%' + AND job.name NOT LIKE '%unstable%' + AND job.id IN ( + SELECT id FROM materialized_views.workflow_job_by_head_sha + WHERE head_sha IN (SELECT sha FROM commits) + ) +), + +-- Step 1: For each (sha, base_name, run_attempt), determine attempt status +attempt_status AS ( + SELECT + time, + sha, + message, + base_name, + workflow_name, + run_attempt, + MAX(raw_conclusion IN ('failure', 'timed_out', 'cancelled')) + AS attempt_has_failure, + MAX(raw_conclusion = '') AS attempt_has_pending + FROM all_jobs + GROUP BY time, sha, message, base_name, workflow_name, run_attempt +), + +-- Step 2: For each (sha, base_name), aggregate across all attempts +signal_status AS ( + SELECT + time, + sha, + message, + base_name, + any(workflow_name) AS workflow_name, + CASE + WHEN MAX(attempt_has_pending) = 1 THEN 'pending' + WHEN MIN(attempt_has_failure) = 1 THEN 'red' + WHEN MAX(attempt_has_failure) = 1 THEN 'flaky' + ELSE 'green' + END AS status + FROM attempt_status + GROUP BY time, sha, message, base_name +), + +-- Step 3: Assign streak IDs using cumulative status changes +signal_with_streaks AS ( + SELECT + base_name, + workflow_name, + sha, + time, + message, + status, + -- Change marker: 1 when status differs from previous + if(status != lagInFrame(status, 1, status) OVER w, 1, 0) AS is_change + FROM signal_status + WHERE status IN ('red', 'green') -- Focus on definitive states + WINDOW w AS ( + PARTITION BY base_name + ORDER BY time ASC + ) +), + +-- Step 4: Compute streak ID (cumulative sum of changes) +signal_with_streak_ids AS ( + SELECT + *, + sum(is_change) + OVER ( + PARTITION BY base_name + ORDER BY time ASC ROWS UNBOUNDED PRECEDING + ) + AS streak_id + FROM signal_with_streaks +), + +-- Step 5: Count streak lengths and find boundaries +streak_lengths AS ( + SELECT + base_name, + streak_id, + status, + count(*) AS streak_length, + min(time) AS streak_start, + max(time) AS streak_end, + argMin(sha, time) AS first_sha, + argMax(sha, time) AS last_sha, + argMin(message, time) AS first_message + FROM signal_with_streak_ids + GROUP BY base_name, streak_id, status +), + +-- Step 6: Find recovery events: green streak that follows a red streak +recovery_events AS ( + SELECT + green.base_name AS signal_key, + red.streak_length AS red_streak_length, + green.streak_length AS green_streak_length, + green.first_sha AS recovery_sha, + green.streak_start AS recovery_time, + green.first_message AS recovery_message, + red.last_sha AS last_red_sha, + red.streak_end AS last_red_time, + red.first_sha AS first_red_sha, + red.streak_start AS first_red_time + FROM streak_lengths green + JOIN streak_lengths red + ON + green.base_name = red.base_name + AND green.streak_id = red.streak_id + 1 + WHERE + green.status = 'green' AND red.status = 'red' + AND red.streak_length >= {minRedCommits: UInt8} + AND green.streak_length >= {minGreenCommits: UInt8} +), + +-- Step 7: Get autorevert events for attribution +autorevert_events AS ( + SELECT + toString(commit_sha) AS reverted_sha, + ts AS autorevert_time, + source_signal_keys + FROM misc.autorevert_events_v2 FINAL + WHERE + repo = 'pytorch/pytorch' + AND action = 'revert' + AND dry_run = 0 + AND failed = 0 + -- Convert DateTime64 params to DateTime for comparison + AND ts >= toDateTime({startTime: DateTime64(3)}) - INTERVAL 1 DAY + AND ts < toDateTime({stopTime: DateTime64(3)}) + INTERVAL 1 DAY +), + +-- Step 8: Extract reverted commit SHA from recovery message +recovery_with_reverted_sha AS ( + SELECT + r.*, + -- Check if recovery commit is a revert + ( + r.recovery_message LIKE 'Revert %' + OR r.recovery_message LIKE 'Reapply %' + OR r.recovery_message LIKE 'Back out%' + ) AS is_revert, + -- Extract reverted PR number if it's a revert + extractAll( + r.recovery_message, + 'Reverted https://github.com/pytorch/pytorch/pull/(\\d+)' + ) AS reverted_pr_numbers, + -- Extract PR number from merge commit message + extractAll( + r.recovery_message, + 'Pull Request resolved: https://github.com/pytorch/pytorch/pull/(\\d+)' + ) AS merge_pr_numbers, + -- Extract the actual reverted commit SHA from message (e.g., "This reverts commit abc123...") + -- The regex captures the full 40-char SHA since commit messages include full SHAs + arrayElement( + extractAll(r.recovery_message, 'reverts commit ([a-f0-9]+)'), 1 + ) AS reverted_commit_sha + FROM recovery_events r +), + +-- Step 9: Join with autorevert events on full SHA match +recovery_with_attribution AS ( + SELECT + r.signal_key, + r.red_streak_length, + r.green_streak_length, + r.recovery_sha, + r.recovery_time, + r.recovery_message, + r.last_red_sha, + r.last_red_time, + r.first_red_sha, + r.first_red_time, + r.is_revert, + r.reverted_pr_numbers, + r.merge_pr_numbers, + r.reverted_commit_sha, + -- Check for autorevert attribution by matching the reverted commit SHA + a.reverted_sha IS NOT NULL AND a.reverted_sha != '' AS is_autorevert, + a.autorevert_time, + a.source_signal_keys AS autorevert_signal_keys + FROM recovery_with_reverted_sha r + LEFT JOIN autorevert_events a ON r.reverted_commit_sha = a.reverted_sha +), + +-- Filter to only actual reverts before aggregating +reverts_only AS ( + SELECT * FROM recovery_with_attribution + WHERE is_revert = 1 +), + +-- Aggregate by recovery_sha (one row per unique revert commit) +aggregated_reverts AS ( + SELECT + recovery_sha, + any(recovery_time) AS recovery_time, + any(recovery_message) AS recovery_message, + groupArray(signal_key) AS signal_keys, + count() AS signals_fixed, + any(last_red_sha) AS last_red_sha, + any(last_red_time) AS last_red_time, + any(first_red_sha) AS first_red_sha, + any(first_red_time) AS first_red_time, + max(red_streak_length) AS max_red_streak_length, + any(reverted_commit_sha) AS reverted_commit_sha, + any(reverted_pr_numbers) AS reverted_pr_numbers, + any(merge_pr_numbers) AS merge_pr_numbers, + max(is_autorevert) AS is_autorevert, + any(autorevert_time) AS autorevert_time, + any(autorevert_signal_keys) AS autorevert_signal_keys + FROM reverts_only + GROUP BY recovery_sha +) + +-- Final output with recovery type classification +SELECT + *, + if(is_autorevert, 'autorevert_recovery', 'human_revert_recovery') + AS recovery_type +FROM aggregated_reverts +ORDER BY recovery_time DESC diff --git a/torchci/clickhouse_queries/autorevert_weekly_metrics/params.json b/torchci/clickhouse_queries/autorevert_weekly_metrics/params.json new file mode 100644 index 0000000000..13dc6a8421 --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_weekly_metrics/params.json @@ -0,0 +1,29 @@ +{ + "params": { + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)", + "workflowNames": "Array(String)", + "minRedCommits": "UInt8", + "minGreenCommits": "UInt8" + }, + "tests": [ + { + "startTime": "2025-10-01 00:00:00.000", + "stopTime": "2025-12-29 00:00:00.000", + "workflowNames": ["Lint", "pull", "trunk", "linux-aarch64"], + "minRedCommits": 2, + "minGreenCommits": 2 + }, + { + "startTime": { + "from_now": -90 + }, + "stopTime": { + "from_now": 0 + }, + "workflowNames": ["Lint", "pull", "trunk", "linux-aarch64"], + "minRedCommits": 2, + "minGreenCommits": 2 + } + ] +} diff --git a/torchci/clickhouse_queries/autorevert_weekly_metrics/query.sql b/torchci/clickhouse_queries/autorevert_weekly_metrics/query.sql new file mode 100644 index 0000000000..ed19f5b223 --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_weekly_metrics/query.sql @@ -0,0 +1,252 @@ +-- Weekly Autorevert Metrics Query +-- Aggregates signal recovery and revert data by week for trend analysis +-- Computes key metrics: total recoveries, autorevert rate, human revert rate + +WITH commits AS ( + SELECT + push.head_commit.'timestamp' AS time, + push.head_commit.'id' AS sha, + push.head_commit.'message' AS message + FROM push + WHERE + push.ref IN ('refs/heads/master', 'refs/heads/main') + AND push.repository.'owner'.'name' = 'pytorch' + AND push.repository.'name' = 'pytorch' + AND push.head_commit.'timestamp' >= {startTime: DateTime64(3)} + AND push.head_commit.'timestamp' < {stopTime: DateTime64(3)} +), + +all_runs AS ( + SELECT + workflow_run.id AS id, + workflow_run.head_commit.'id' AS sha, + workflow_run.name AS workflow_name, + commit.time AS time, + commit.message AS message + FROM workflow_run FINAL + JOIN commits commit ON workflow_run.head_commit.'id' = commit.sha + WHERE + workflow_run.name IN ({workflowNames: Array(String)}) + AND workflow_run.event != 'workflow_run' + AND workflow_run.id IN ( + SELECT id FROM materialized_views.workflow_run_by_head_sha + WHERE head_sha IN (SELECT sha FROM commits) + ) +), + +all_jobs AS ( + SELECT + all_runs.time AS time, + all_runs.sha AS sha, + all_runs.message AS message, + all_runs.workflow_name AS workflow_name, + job.run_attempt AS run_attempt, + job.conclusion AS raw_conclusion, + trim( + replaceRegexpAll( + replaceRegexpAll( + replaceRegexpAll(job.name, '\\s*\\(.*\\)$', ''), + ', \\d+, \\d+, ', ', ' + ), + '\\s+', ' ' + ) + ) AS base_name + FROM default.workflow_job job FINAL + JOIN all_runs ON all_runs.id = job.run_id + WHERE + job.name != 'ciflow_should_run' + AND job.name != 'generate-test-matrix' + AND job.name NOT LIKE '%rerun_disabled_tests%' + AND job.name NOT LIKE '%unstable%' + AND job.id IN ( + SELECT id FROM materialized_views.workflow_job_by_head_sha + WHERE head_sha IN (SELECT sha FROM commits) + ) +), + +attempt_status AS ( + SELECT + time, + sha, + message, + base_name, + workflow_name, + run_attempt, + MAX(raw_conclusion IN ('failure', 'timed_out', 'cancelled')) + AS attempt_has_failure, + MAX(raw_conclusion = '') AS attempt_has_pending + FROM all_jobs + GROUP BY time, sha, message, base_name, workflow_name, run_attempt +), + +signal_status AS ( + SELECT + time, + sha, + message, + base_name, + any(workflow_name) AS workflow_name, + CASE + WHEN MAX(attempt_has_pending) = 1 THEN 'pending' + WHEN MIN(attempt_has_failure) = 1 THEN 'red' + WHEN MAX(attempt_has_failure) = 1 THEN 'flaky' + ELSE 'green' + END AS status + FROM attempt_status + GROUP BY time, sha, message, base_name +), + +signal_with_streaks AS ( + SELECT + base_name, + workflow_name, + sha, + time, + message, + status, + if(status != lagInFrame(status, 1, status) OVER w, 1, 0) AS is_change + FROM signal_status + WHERE status IN ('red', 'green') + WINDOW w AS ( + PARTITION BY base_name + ORDER BY time ASC + ) +), + +signal_with_streak_ids AS ( + SELECT + *, + sum(is_change) OVER ( + PARTITION BY base_name + ORDER BY time ASC ROWS UNBOUNDED PRECEDING + ) AS streak_id + FROM signal_with_streaks +), + +streak_lengths AS ( + SELECT + base_name, + streak_id, + status, + count(*) AS streak_length, + min(time) AS streak_start, + max(time) AS streak_end, + argMin(sha, time) AS first_sha, + argMax(sha, time) AS last_sha, + argMin(message, time) AS first_message + FROM signal_with_streak_ids + GROUP BY base_name, streak_id, status +), + +recovery_events AS ( + SELECT + green.base_name AS signal_key, + red.streak_length AS red_streak_length, + green.streak_length AS green_streak_length, + green.first_sha AS recovery_sha, + green.streak_start AS recovery_time, + green.first_message AS recovery_message, + red.last_sha AS last_red_sha, + red.streak_end AS last_red_time + FROM streak_lengths green + JOIN streak_lengths red + ON + green.base_name = red.base_name + AND green.streak_id = red.streak_id + 1 + WHERE + green.status = 'green' AND red.status = 'red' + AND red.streak_length >= {minRedCommits: UInt8} + AND green.streak_length >= {minGreenCommits: UInt8} +), + +-- Get autorevert events for attribution +autorevert_events AS ( + SELECT + toString(commit_sha) AS reverted_sha, + ts AS autorevert_time, + source_signal_keys + FROM misc.autorevert_events_v2 FINAL + WHERE + repo = 'pytorch/pytorch' + AND action = 'revert' + AND dry_run = 0 + AND failed = 0 + AND ts >= toDateTime({startTime: DateTime64(3)}) - INTERVAL 1 DAY + AND ts < toDateTime({stopTime: DateTime64(3)}) + INTERVAL 1 DAY +), + +-- Extract reverted commit SHA from recovery message +recovery_with_reverted_sha AS ( + SELECT + r.*, + ( + r.recovery_message LIKE 'Revert %' + OR r.recovery_message LIKE 'Reapply %' + OR r.recovery_message LIKE 'Back out%' + ) AS is_revert, + -- Extract the actual reverted commit SHA from message (e.g., "This reverts commit abc123...") + -- The regex captures the full 40-char SHA since commit messages include full SHAs + arrayElement( + extractAll(r.recovery_message, 'reverts commit ([a-f0-9]+)'), 1 + ) AS reverted_commit_sha + FROM recovery_events r +), + +-- Join with autorevert events on full SHA match +recovery_with_attribution AS ( + SELECT + r.signal_key, + r.red_streak_length, + r.green_streak_length, + r.recovery_sha, + r.recovery_time, + r.recovery_message, + r.last_red_sha, + r.last_red_time, + r.is_revert, + r.reverted_commit_sha, + -- Check for autorevert attribution by matching the reverted commit SHA + a.reverted_sha IS NOT NULL AND a.reverted_sha != '' AS is_autorevert + FROM recovery_with_reverted_sha r + LEFT JOIN autorevert_events a ON r.reverted_commit_sha = a.reverted_sha +), + +-- Deduplicate by recovery_sha to count unique revert commits +-- A single revert can fix multiple signals, but we count it as one revert event +unique_recoveries AS ( + SELECT + recovery_sha, + any(recovery_time) AS recovery_time, + max(is_revert) AS is_revert, + max(is_autorevert) AS is_autorevert + FROM recovery_with_attribution + GROUP BY recovery_sha +) + +-- Aggregate by week +SELECT + toString(toStartOfWeek(recovery_time, 1)) AS week, + -- Total signal recovery events (where recovery was via revert) + countIf(is_revert) AS total_revert_recoveries, + -- Autorevert-triggered recoveries (True Positives) + countIf(is_revert AND is_autorevert) AS autorevert_recoveries, + -- Human-initiated revert recoveries (potential False Negatives - should have been autoreverted) + countIf(is_revert AND NOT is_autorevert) AS human_revert_recoveries, + -- All signal recoveries (including non-revert like fixes) + count() AS total_signal_recoveries, + -- Non-revert recoveries (signal fixed by new commit, not a revert) + countIf(NOT is_revert) AS non_revert_recoveries, + -- Metrics (rounded for display) + round(if( + countIf(is_revert) > 0, + countIf(is_revert AND is_autorevert) * 100.0 / countIf(is_revert), + 0 + ), 1) AS autorevert_rate, + round(if( + countIf(is_revert) > 0, + countIf(is_revert AND NOT is_autorevert) * 100.0 / countIf(is_revert), + 0 + ), 1) AS human_revert_rate +FROM unique_recoveries +GROUP BY week +ORDER BY week ASC diff --git a/torchci/clickhouse_queries/autorevert_workflows/params.json b/torchci/clickhouse_queries/autorevert_workflows/params.json new file mode 100644 index 0000000000..541279d0f7 --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_workflows/params.json @@ -0,0 +1,4 @@ +{ + "params": {}, + "tests": [] +} diff --git a/torchci/clickhouse_queries/autorevert_workflows/query.sql b/torchci/clickhouse_queries/autorevert_workflows/query.sql new file mode 100644 index 0000000000..fb86ef1fd1 --- /dev/null +++ b/torchci/clickhouse_queries/autorevert_workflows/query.sql @@ -0,0 +1,13 @@ +-- Get distinct workflow names from autorevert events +-- Used for workflow selector in autorevert metrics page +-- Only shows workflows that autorevert actually monitors +SELECT + workflow AS workflow_name, + count() AS run_count +FROM misc.autorevert_events_v2 a FINAL +ARRAY JOIN a.workflows AS workflow +WHERE + a.repo = 'pytorch/pytorch' + AND a.ts >= now() - INTERVAL 90 DAY +GROUP BY workflow +ORDER BY run_count DESC diff --git a/torchci/components/layout/NavBar.tsx b/torchci/components/layout/NavBar.tsx index acc9a99b35..7eaf126525 100644 --- a/torchci/components/layout/NavBar.tsx +++ b/torchci/components/layout/NavBar.tsx @@ -105,6 +105,10 @@ function NavBar() { name: "vLLM CI metrics", href: "/metrics/vllm", }, + { + name: "Autorevert Metrics", + href: "/metrics/autorevert", + }, ].map((item) => ({ label: item.name, route: item.href, diff --git a/torchci/pages/api/autorevert/false-positives.ts b/torchci/pages/api/autorevert/false-positives.ts new file mode 100644 index 0000000000..55d3b683e6 --- /dev/null +++ b/torchci/pages/api/autorevert/false-positives.ts @@ -0,0 +1,202 @@ +import { queryClickhouseSaved } from "lib/clickhouse"; +import { getOctokit } from "lib/github"; +import type { NextApiRequest, NextApiResponse } from "next"; + +// Simple in-memory cache with TTL +interface CacheEntry { + data: any; + timestamp: number; +} + +const cache = new Map(); +const CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes + +function getCached(key: string): any | null { + const entry = cache.get(key); + if (!entry) return null; + if (Date.now() - entry.timestamp > CACHE_TTL_MS) { + cache.delete(key); + return null; + } + return entry.data; +} + +function setCache(key: string, data: any): void { + cache.set(key, { data, timestamp: Date.now() }); +} + +interface FalsePositiveCandidate { + original_pr: string; + reverted_sha: string; + revert_time: string; + original_message_snippet: string; + reland_sha: string; + reland_time: string; + reland_message_snippet: string; + hours_to_reland: number; + status: string; +} + +interface VerifiedFalsePositive extends FalsePositiveCandidate { + pr_state: string; + commits_after_revert: number; + verification_status: "confirmed_fp" | "legit_revert" | "unknown"; + verification_reason: string; +} + +async function verifyFalsePositive( + octokit: any, + candidate: FalsePositiveCandidate +): Promise { + const prNumber = parseInt(candidate.original_pr); + const revertTime = new Date(candidate.revert_time); + + try { + // Fetch PR details + const { data: pr } = await octokit.rest.pulls.get({ + owner: "pytorch", + repo: "pytorch", + pull_number: prNumber, + }); + + // Fetch commits on the PR + const commits = await octokit.paginate(octokit.rest.pulls.listCommits, { + owner: "pytorch", + repo: "pytorch", + pull_number: prNumber, + per_page: 100, + }); + + // Count commits after the revert time + const commitsAfterRevert = commits.filter((commit: any) => { + const commitTime = new Date( + commit.commit.committer?.date || commit.commit.author?.date + ); + return commitTime > revertTime; + }).length; + + // Determine verification status + let verificationStatus: "confirmed_fp" | "legit_revert" | "unknown"; + let verificationReason: string; + + if (pr.state === "open") { + // PR is still open - revert was legit, author hasn't relanded yet + verificationStatus = "legit_revert"; + verificationReason = "PR is still open (not relanded)"; + } else if (commitsAfterRevert > 0) { + // PR had commits after revert - author fixed something + verificationStatus = "legit_revert"; + verificationReason = `PR had ${commitsAfterRevert} commit(s) after revert (author fixed issues)`; + } else { + // PR was merged and had no commits after revert - likely false positive + verificationStatus = "confirmed_fp"; + verificationReason = "PR relanded with no changes after revert"; + } + + return { + ...candidate, + pr_state: pr.state, + commits_after_revert: commitsAfterRevert, + verification_status: verificationStatus, + verification_reason: verificationReason, + }; + } catch (error: any) { + console.error(`Error verifying PR #${prNumber}:`, error.message); + return { + ...candidate, + pr_state: "unknown", + commits_after_revert: -1, + verification_status: "unknown", + verification_reason: `Failed to fetch PR data: ${error.message}`, + }; + } +} + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "GET") { + return res.status(405).json({ error: "Method not allowed" }); + } + + const { startTime, stopTime } = req.query; + + if (!startTime || !stopTime) { + return res + .status(400) + .json({ error: "startTime and stopTime are required" }); + } + + // Create cache key from parameters + const cacheKey = `fp-${startTime}-${stopTime}`; + + // Check cache first + const cached = getCached(cacheKey); + if (cached) { + return res.status(200).json(cached); + } + + try { + // Fetch candidates from ClickHouse + const candidates: FalsePositiveCandidate[] = await queryClickhouseSaved( + "autorevert_false_positives", + { + startTime: startTime as string, + stopTime: stopTime as string, + } + ); + + if (candidates.length === 0) { + const result = { candidates: [], verified: [] }; + setCache(cacheKey, result); + return res.status(200).json(result); + } + + // Get Octokit instance + const octokit = await getOctokit("pytorch", "pytorch"); + + // Verify each candidate (with rate limiting consideration) + const verified: VerifiedFalsePositive[] = []; + for (const candidate of candidates) { + // Check per-PR cache + const prCacheKey = `pr-${candidate.original_pr}-${candidate.revert_time}`; + const cachedVerification = getCached(prCacheKey); + + if (cachedVerification) { + verified.push(cachedVerification); + } else { + const verifiedCandidate = await verifyFalsePositive(octokit, candidate); + setCache(prCacheKey, verifiedCandidate); + verified.push(verifiedCandidate); + } + } + + // Separate confirmed false positives from legit reverts + const confirmedFPs = verified.filter( + (v) => v.verification_status === "confirmed_fp" + ); + const legitReverts = verified.filter( + (v) => v.verification_status === "legit_revert" + ); + const unknown = verified.filter((v) => v.verification_status === "unknown"); + + const result = { + summary: { + total_candidates: candidates.length, + confirmed_false_positives: confirmedFPs.length, + legit_reverts: legitReverts.length, + unknown: unknown.length, + }, + confirmed_false_positives: confirmedFPs, + legit_reverts: legitReverts, + unknown: unknown, + }; + + setCache(cacheKey, result); + return res.status(200).json(result); + } catch (error: any) { + console.error("Error fetching false positives:", error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/torchci/pages/api/autorevert/metrics.ts b/torchci/pages/api/autorevert/metrics.ts new file mode 100644 index 0000000000..a0cb758724 --- /dev/null +++ b/torchci/pages/api/autorevert/metrics.ts @@ -0,0 +1,392 @@ +import { queryClickhouseSaved } from "lib/clickhouse"; +import { getOctokit } from "lib/github"; +import type { NextApiRequest, NextApiResponse } from "next"; + +// Simple in-memory cache with TTL +interface CacheEntry { + data: any; + timestamp: number; +} + +const cache = new Map(); +const CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes + +function getCached(key: string): any | null { + const entry = cache.get(key); + if (!entry) return null; + if (Date.now() - entry.timestamp > CACHE_TTL_MS) { + cache.delete(key); + return null; + } + return entry.data; +} + +function setCache(key: string, data: any): void { + cache.set(key, { data, timestamp: Date.now() }); +} + +// Types +interface SignificantRevert { + recovery_sha: string; + recovery_time: string; + recovery_message: string; + signal_keys: string[]; + signals_fixed: number; + max_red_streak_length: number; + reverted_commit_sha: string; + reverted_pr_numbers: string[]; + is_autorevert: boolean; + recovery_type: string; +} + +interface AutorevertEvent { + reverted_sha: string; + autorevert_time: string; + workflows: string[]; + source_signal_keys: string[]; + pr_number: string; + reverted_message_snippet: string; + revert_sha: string | null; + revert_time: string | null; + revert_message_snippet: string | null; +} + +interface FalsePositiveCandidate { + reverted_sha: string; + autorevert_time: string; + pr_number: string; + revert_sha: string | null; + revert_time: string | null; + workflows: string[]; + source_signal_keys: string[]; +} + +interface VerifiedFalsePositive extends FalsePositiveCandidate { + pr_state: string; + pr_merged: boolean; + commits_after_revert: number; + verification_status: "confirmed_fp" | "legit_revert" | "unknown"; + verification_reason: string; +} + +interface WeeklyMetric { + week: string; + total_revert_recoveries: number; + autorevert_recoveries: number; + human_revert_recoveries: number; + total_signal_recoveries: number; + non_revert_recoveries: number; + autorevert_rate: number; + human_revert_rate: number; + // New metrics + false_positives: number; + precision: number; + recall: number; +} + +async function verifyFalsePositive( + octokit: any, + candidate: FalsePositiveCandidate +): Promise { + const prNumber = parseInt(candidate.pr_number); + const revertTime = new Date(candidate.autorevert_time); + + try { + // Fetch PR details + const { data: pr } = await octokit.rest.pulls.get({ + owner: "pytorch", + repo: "pytorch", + pull_number: prNumber, + }); + + // Fetch commits on the PR + const commits = await octokit.paginate(octokit.rest.pulls.listCommits, { + owner: "pytorch", + repo: "pytorch", + pull_number: prNumber, + per_page: 100, + }); + + // Count commits after the revert time + const commitsAfterRevert = commits.filter((commit: any) => { + const commitTime = new Date( + commit.commit.committer?.date || commit.commit.author?.date + ); + return commitTime > revertTime; + }).length; + + // Determine verification status + let verificationStatus: "confirmed_fp" | "legit_revert" | "unknown"; + let verificationReason: string; + + // Get PR labels + const labelNames = (pr.labels || []).map((l: any) => l.name); + + // Check for "Merged" label - PyTorch uses cherry-pick merging via merge bot, + // so GitHub's merged_at won't be set. The "Merged" label indicates actual merge. + const hasMergedLabel = labelNames.includes("Merged"); + + // Check for "autorevert: disable" label - clear signal that autorevert was wrong + const hasAutorevertDisable = labelNames.includes("autorevert: disable"); + + if (hasAutorevertDisable) { + // Author explicitly disabled autorevert - clear false positive + verificationStatus = "confirmed_fp"; + verificationReason = "PR has 'autorevert: disable' label"; + } else if (pr.state === "open") { + // PR is still open - revert was legit, author hasn't relanded + verificationStatus = "legit_revert"; + verificationReason = "PR is still open (not relanded)"; + } else if (commitsAfterRevert > 0) { + // PR had commits after revert - author fixed something + verificationStatus = "legit_revert"; + verificationReason = `PR had ${commitsAfterRevert} commit(s) after revert (author fixed issues)`; + } else if (hasMergedLabel) { + // PR has "Merged" label and no commits after revert - false positive + verificationStatus = "confirmed_fp"; + verificationReason = + "PR was merged (has 'Merged' label) with no changes after revert"; + } else { + // PR was closed but not merged (abandoned) + verificationStatus = "legit_revert"; + verificationReason = "PR was closed without merging (abandoned)"; + } + + return { + ...candidate, + pr_state: pr.state, + pr_merged: hasMergedLabel, + commits_after_revert: commitsAfterRevert, + verification_status: verificationStatus, + verification_reason: verificationReason, + }; + } catch (error: any) { + console.error(`Error verifying PR #${prNumber}:`, error.message); + return { + ...candidate, + pr_state: "unknown", + pr_merged: false, + commits_after_revert: -1, + verification_status: "unknown", + verification_reason: `Failed to fetch PR data: ${error.message}`, + }; + } +} + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "GET") { + return res.status(405).json({ error: "Method not allowed" }); + } + + const { + startTime, + stopTime, + workflowNames, + minRedCommits = "2", + minGreenCommits = "2", + } = req.query; + + if (!startTime || !stopTime || !workflowNames) { + return res.status(400).json({ + error: "startTime, stopTime, and workflowNames are required", + }); + } + + // Parse workflowNames from JSON string + let workflows: string[]; + try { + workflows = JSON.parse(workflowNames as string); + } catch { + return res + .status(400) + .json({ error: "workflowNames must be valid JSON array" }); + } + + // Create cache key from parameters + const cacheKey = `metrics-${startTime}-${stopTime}-${JSON.stringify( + workflows + )}-${minRedCommits}-${minGreenCommits}`; + + // Check cache first + const cached = getCached(cacheKey); + if (cached) { + return res.status(200).json(cached); + } + + try { + const queryParams = { + startTime: startTime as string, + stopTime: stopTime as string, + workflowNames: workflows, + minRedCommits: parseInt(minRedCommits as string), + minGreenCommits: parseInt(minGreenCommits as string), + }; + + // Run queries in parallel + const [significantReverts, autorevertEvents, weeklyMetricsRaw] = + await Promise.all([ + queryClickhouseSaved( + "autorevert_significant_reverts", + queryParams + ) as Promise, + queryClickhouseSaved( + "autorevert_events_with_commits", + queryParams + ) as Promise, + queryClickhouseSaved( + "autorevert_weekly_metrics", + queryParams + ) as Promise, + ]); + + // Build set of recovery SHAs (reverts that fixed signals) + const recoveryShaSet = new Set( + significantReverts.map((r) => r.recovery_sha) + ); + + // Classify autorevert events + const truePositives: AutorevertEvent[] = []; + const falsePositiveCandidates: FalsePositiveCandidate[] = []; + + for (const event of autorevertEvents) { + if (event.revert_sha && recoveryShaSet.has(event.revert_sha)) { + // Autorevert's revert commit has signal recovery - True Positive + truePositives.push(event); + } else { + // No signal recovery found - False Positive Candidate + falsePositiveCandidates.push({ + reverted_sha: event.reverted_sha, + autorevert_time: event.autorevert_time, + pr_number: event.pr_number, + revert_sha: event.revert_sha, + revert_time: event.revert_time, + workflows: event.workflows, + source_signal_keys: event.source_signal_keys, + }); + } + } + + // Count False Negatives: human reverts with signal recovery + const falseNegatives = significantReverts.filter( + (r) => !r.is_autorevert && r.recovery_type === "human_revert_recovery" + ); + + // Verify false positive candidates via GitHub API + let verifiedFPs: VerifiedFalsePositive[] = []; + if (falsePositiveCandidates.length > 0) { + const octokit = await getOctokit("pytorch", "pytorch"); + + for (const candidate of falsePositiveCandidates) { + // Check per-PR cache + const prCacheKey = `pr-verify-${candidate.pr_number}-${candidate.autorevert_time}`; + const cachedVerification = getCached(prCacheKey); + + if (cachedVerification) { + verifiedFPs.push(cachedVerification); + } else { + const verified = await verifyFalsePositive(octokit, candidate); + setCache(prCacheKey, verified); + verifiedFPs.push(verified); + } + } + } + + // Separate confirmed FPs from legit reverts + const confirmedFPs = verifiedFPs.filter( + (v) => v.verification_status === "confirmed_fp" + ); + const legitReverts = verifiedFPs.filter( + (v) => v.verification_status === "legit_revert" + ); + + // Calculate overall precision/recall + // TP = autoreverts with signal recovery + autoreverts without signal recovery but verified as legit + // (legit reverts without signal recovery are still valid autoreverts, just didn't fix our tracked signals) + const tpWithSignalRecovery = truePositives.length; + const tpWithoutSignalRecovery = legitReverts.length; + const tp = tpWithSignalRecovery + tpWithoutSignalRecovery; + const fp = confirmedFPs.length; + const fn = falseNegatives.length; + + const precision = tp + fp > 0 ? (tp / (tp + fp)) * 100 : 100; + const recall = tp + fn > 0 ? (tp / (tp + fn)) * 100 : 100; + + // Enhance weekly metrics with precision/recall + // Group FPs by week for weekly precision calculation + const fpByWeek = new Map(); + for (const fp of confirmedFPs) { + const week = getWeekStart(new Date(fp.autorevert_time)); + fpByWeek.set(week, (fpByWeek.get(week) || 0) + 1); + } + + const weeklyMetrics: WeeklyMetric[] = weeklyMetricsRaw.map((w) => { + const weekFPs = fpByWeek.get(w.week) || 0; + const weekTP = w.autorevert_recoveries; + const weekFN = w.human_revert_recoveries; + + return { + ...w, + false_positives: weekFPs, + precision: + weekTP + weekFPs > 0 + ? Math.round((weekTP / (weekTP + weekFPs)) * 1000) / 10 + : 100, + recall: + weekTP + weekFN > 0 + ? Math.round((weekTP / (weekTP + weekFN)) * 1000) / 10 + : 100, + }; + }); + + const result = { + summary: { + // Counts + total_autoreverts: autorevertEvents.length, + true_positives: tp, + tp_with_signal_recovery: tpWithSignalRecovery, + tp_without_signal_recovery: tpWithoutSignalRecovery, + confirmed_false_positives: fp, + false_negatives: fn, + // Rates + precision: Math.round(precision * 10) / 10, + recall: Math.round(recall * 10) / 10, + // For weekly metrics aggregation + total_revert_recoveries: significantReverts.filter( + (r) => r.recovery_type !== "non_revert_recovery" + ).length, + }, + weeklyMetrics, + significantReverts, + falsePositives: { + candidates_checked: falsePositiveCandidates.length, + confirmed: confirmedFPs, + legit_reverts: legitReverts, + unknown: verifiedFPs.filter((v) => v.verification_status === "unknown"), + }, + falseNegatives: falseNegatives.map((r) => ({ + recovery_sha: r.recovery_sha, + recovery_time: r.recovery_time, + signals_fixed: r.signals_fixed, + reverted_pr_numbers: r.reverted_pr_numbers, + })), + }; + + setCache(cacheKey, result); + return res.status(200).json(result); + } catch (error: any) { + console.error("Error fetching autorevert metrics:", error); + return res.status(500).json({ error: error.message }); + } +} + +function getWeekStart(date: Date): string { + const d = new Date(date); + const day = d.getDay(); + const diff = d.getDate() - day + (day === 0 ? -6 : 1); // Adjust for Monday start + d.setDate(diff); + d.setHours(0, 0, 0, 0); + return d.toISOString().split("T")[0]; +} diff --git a/torchci/pages/metrics/autorevert-spec.md b/torchci/pages/metrics/autorevert-spec.md new file mode 100644 index 0000000000..bbffbde296 --- /dev/null +++ b/torchci/pages/metrics/autorevert-spec.md @@ -0,0 +1,260 @@ +# Autorevert Metrics Page Specification + +## Overview + +The autorevert metrics page (`/metrics/autorevert`) provides performance analytics for the PyTorch autorevert system. It tracks signal recovery events, attributes reverts to autorevert vs human, and identifies false positives. + +## Key Concepts + +### Signal Recovery Event + +A signal recovery occurs when a job group (workflow + normalized job name) transitions from: + +- **2+ consecutive red commits** → **2+ consecutive green commits** + +The first green commit in the green streak is the "recovery commit". + +### Revert Attribution + +Recovery commits that are reverts (message starts with "Revert" or "Back out") are attributed to: + +- **Autorevert**: The reverted commit SHA matches an entry in `autorevert_events_v2` +- **Human**: No matching autorevert event found + +### False Positive Detection + +A false positive is an autorevert that should NOT have happened. Detection criteria: + +1. Autorevert event exists (commit was reverted by the system) +2. No signal recovery associated with the revert (the revert didn't fix any signals) +3. GitHub API verification confirms the original PR was merged without changes after revert + +## Architecture + +### Backend Endpoint: `/api/autorevert/metrics` + +Single endpoint that provides all data for the page. + +**Parameters:** + +- `startTime` - Start of time range (DateTime64) +- `stopTime` - End of time range (DateTime64) +- `workflowNames` - Array of workflow names to analyze +- `minRedCommits` - Minimum red streak length (default: 2) +- `minGreenCommits` - Minimum green streak length (default: 2) + +**Response:** + +```typescript +{ + weeklyMetrics: WeeklyMetric[]; + significantReverts: SignificantRevert[]; + falsePositiveCandidates: FalsePositiveCandidate[]; + summary: { + totalRevertRecoveries: number; + autorevertRecoveries: number; + humanRevertRecoveries: number; + confirmedFalsePositives: number; + autorevertRate: number; + precision: number; + recall: number; + }; +} +``` + +### Data Flow + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ ClickHouse Queries │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ 1. Base Query: Signal Recovery Events │ +│ - Identifies red→green transitions per job group │ +│ - Filters by workflow names │ +│ - Returns recovery_sha for each recovery event │ +│ │ +│ 2. Autorevert Events Query │ +│ - Gets all autorevert events where hasAny(workflows, workflowNames) │ +│ - Links to revert commit via PR number + timestamp │ +│ - Returns: reverted_sha, revert_commit_sha, pr_number, event_time │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Backend Processing │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ 3. Join & Classify │ +│ - Match autorevert revert_commit_sha against recovery_sha list │ +│ - If match: autorevert has signal recovery (True Positive) │ +│ - If no match: False Positive Candidate │ +│ │ +│ 4. GitHub API Verification (for FP candidates only) │ +│ - Fetch original PR state and commit history │ +│ - Classify as: │ +│ - PR still open → Legit revert (author hasn't relanded) │ +│ - PR has commits after revert → Legit revert (author fixed issues) │ +│ - PR merged with no changes → Confirmed False Positive │ +│ │ +│ 5. Calculate Metrics │ +│ - Aggregate by week for charts │ +│ - Calculate precision/recall │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## Linking Autorevert Events to Revert Commits + +The `autorevert_events_v2` table records when the system decides to revert, but doesn't directly store the resulting revert commit SHA. + +**Linking Strategy:** + +1. Extract PR number from the reverted commit's message (via push table) +2. Find revert commits in push table where: + - Message matches `Revert%` or `Back out%` + - Message mentions the PR number (e.g., `#12345` or `/pull/12345`) + - Commit timestamp > autorevert event timestamp +3. Take the first (earliest) matching revert commit + +```sql +-- Pseudocode for linking +WITH autorevert_with_pr AS ( + SELECT + a.commit_sha AS reverted_sha, + a.ts AS autorevert_time, + a.workflows, + -- Extract PR from reverted commit message + extractPR(p.head_commit.message) AS pr_number + FROM autorevert_events_v2 a + JOIN push p ON p.head_commit.id = a.commit_sha + WHERE hasAny(a.workflows, {workflowNames}) +), +revert_commits AS ( + SELECT + sha, + timestamp, + message, + extractPR(message) AS mentioned_pr + FROM push + WHERE message LIKE 'Revert%' OR message LIKE 'Back out%' +), +linked AS ( + SELECT + a.*, + r.sha AS revert_commit_sha, + row_number() OVER ( + PARTITION BY a.reverted_sha + ORDER BY r.timestamp ASC + ) AS rn + FROM autorevert_with_pr a + JOIN revert_commits r + ON r.mentioned_pr = a.pr_number + AND r.timestamp > a.autorevert_time +) +SELECT * FROM linked WHERE rn = 1 +``` + +## Precision/Recall Metrics + +### Definitions + +**True Positive (TP):** Autorevert that successfully recovered a signal + +- Autorevert event exists +- Revert commit appears in significant_reverts (has signal recovery) + +**False Positive (FP):** Autorevert that didn't need to happen + +- Autorevert event exists +- No signal recovery for the revert +- GitHub verification confirms: PR merged with no changes after revert + +**False Negative (FN):** Human revert with signal recovery that autorevert should have caught + +- Recovery event is a revert (human-initiated) +- Revert has signal recovery (appears in significant_reverts) +- No matching autorevert event +- This represents missed opportunities + +**True Negative (TN):** Not directly measurable + +- Commits that were correctly NOT reverted + +### Formulas + +``` +Precision = TP / (TP + FP) + = Autoreverts with signal recovery / All autoreverts + +Recall = TP / (TP + FN) + = Autorevert recoveries / All revert recoveries +``` + +### Weekly Metrics + +For the chart, calculate per week: + +- `autorevert_recoveries` (TP) +- `human_revert_recoveries` (potential FN) +- `non_revert_recoveries` (not relevant to autorevert) +- `false_positives` (FP - autoreverts without recovery) +- `precision` = TP / (TP + FP) +- `recall` = TP / (TP + FN) + +## Database Tables + +### `misc.autorevert_events_v2` + +```sql +ts DateTime -- When autorevert decided to act +repo String -- 'pytorch/pytorch' +action Enum -- 'revert' | 'restart' | 'none' +commit_sha FixedString(40) -- The reverted (bad) commit +workflows Array(String) -- Workflows that triggered the action +source_signal_keys Array(String) -- Signal keys that caused the action +dry_run UInt8 -- 1 if dry run +failed UInt8 -- 1 if action failed +``` + +### `push` (for commit data) + +```sql +head_commit.id String -- Commit SHA +head_commit.timestamp DateTime -- Commit time +head_commit.message String -- Commit message +repository.full_name String -- 'pytorch/pytorch' +ref String -- 'refs/heads/main' +``` + +### `workflow_run` / `workflow_job` (for signal status) + +Used to determine job pass/fail status per commit. + +## UI Components + +1. **Scalar Metrics** - Summary cards showing totals and rates +2. **Weekly Trend Chart** - Stacked bar chart with precision/recall overlay +3. **Significant Reverts Table** - List of reverts with signal recovery +4. **False Positives Table** - Verified false positives with GitHub data + +## Caching Strategy + +- ClickHouse queries: Rely on SWR client-side caching (5 min refresh) +- GitHub API calls: In-memory server-side cache with 10 min TTL +- Cache key includes all query parameters + +## Implementation Status + +- [x] Basic signal recovery detection +- [x] Revert attribution (autorevert vs human) +- [x] Weekly metrics aggregation +- [x] Significant reverts table +- [x] Workflow selection UI +- [x] Unified backend endpoint (`/api/autorevert/metrics`) +- [x] Autorevert → revert commit linking (via PR number + timestamp) +- [x] False positive detection (autoreverts without signal recovery) +- [x] GitHub API verification (PR state, merged status, commits after revert) +- [x] Precision/recall calculation +- [x] Weekly precision/recall chart diff --git a/torchci/pages/metrics/autorevert.tsx b/torchci/pages/metrics/autorevert.tsx new file mode 100644 index 0000000000..b7adb27ed9 --- /dev/null +++ b/torchci/pages/metrics/autorevert.tsx @@ -0,0 +1,691 @@ +import { + Autocomplete, + Box, + Button, + Chip, + Grid, + Paper, + Skeleton, + Stack, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + TextField, + Tooltip, + Typography, +} from "@mui/material"; +import dayjs from "dayjs"; +import { EChartsOption } from "echarts"; +import ReactECharts from "echarts-for-react"; +import { useDarkMode } from "lib/DarkModeContext"; +import { fetcher } from "lib/GeneralUtils"; +import { TimeRangePicker } from "pages/metrics"; +import { useState } from "react"; +import useSWR from "swr"; + +// Viable/strict workflows that block merges - these are the default selection +const VIABLE_STRICT_WORKFLOWS = ["Lint", "pull", "trunk", "linux-aarch64"]; +const MIN_RED_COMMITS = 2; +const MIN_GREEN_COMMITS = 2; + +function ScalarMetric({ + title, + value, + tooltip, + color, +}: { + title: string; + value: string | number | undefined; + tooltip?: string; + color?: string; +}) { + return ( + + + + + {title} + + + {value === undefined ? "-" : value} + + + + + ); +} + +// Legend component explaining TP/FN/FP +function MetricsLegend() { + return ( + + + Metrics Legend + + + + + + + TP = True Positive (correct autorevert) + + + + + + + + FN = False Negative (missed by autorevert) + + + + + + + + FP = False Positive (incorrect autorevert) + + + + + + + Non-revert recoveries + + + + + ); +} + +function WeeklyTrendChart({ data }: { data: any[] | undefined }) { + const { darkMode } = useDarkMode(); + + if (data === undefined) { + return ; + } + + const options: EChartsOption = { + title: { + text: "Weekly Autorevert Metrics", + subtext: "Signal recovery events with precision/recall", + }, + grid: { top: 80, right: 140, bottom: 60, left: 60 }, + xAxis: { + type: "category", + data: data.map((d) => d.week), + axisLabel: { rotate: 45 }, + }, + yAxis: [ + { + type: "value", + name: "Count", + position: "left", + }, + { + type: "value", + name: "Rate %", + position: "right", + max: 100, + }, + ], + legend: { + data: [ + "TP (Autorevert)", + "FN (Human Revert)", + "FP (Wrong Revert)", + "Non-Revert Fix", + "Precision %", + "Recall %", + ], + top: 30, + }, + tooltip: { + trigger: "axis", + formatter: (params: any) => { + if (!Array.isArray(params)) return ""; + const week = params[0]?.axisValue || ""; + let html = `${week}
`; + params.forEach((p: any) => { + const marker = ``; + const value = typeof p.value === "number" ? p.value : 0; + const suffix = p.seriesName.includes("%") ? "%" : ""; + html += `${marker}${p.seriesName}: ${value}${suffix}
`; + }); + return html; + }, + }, + series: [ + { + name: "TP (Autorevert)", + type: "bar", + stack: "counts", + data: data.map((d) => d.autorevert_recoveries), + itemStyle: { color: "#3ba272" }, + }, + { + name: "FN (Human Revert)", + type: "bar", + stack: "counts", + data: data.map((d) => d.human_revert_recoveries), + itemStyle: { color: "#ed6c02" }, + }, + { + name: "FP (Wrong Revert)", + type: "bar", + stack: "counts", + data: data.map((d) => d.false_positives || 0), + itemStyle: { color: "#d32f2f" }, + }, + { + name: "Non-Revert Fix", + type: "bar", + stack: "counts", + data: data.map((d) => d.non_revert_recoveries || 0), + itemStyle: { color: "#8c8c8c" }, + }, + { + name: "Precision %", + type: "line", + yAxisIndex: 1, + data: data.map((d) => d.precision), + itemStyle: { color: "#5470c6" }, + lineStyle: { width: 2 }, + }, + { + name: "Recall %", + type: "line", + yAxisIndex: 1, + data: data.map((d) => d.recall), + itemStyle: { color: "#91cc75" }, + lineStyle: { width: 2, type: "dashed" }, + }, + ], + }; + + return ( + + + + ); +} + +function FalsePositivesTable({ data }: { data: any | undefined }) { + if (data === undefined) { + return ; + } + + const confirmedFPs = data.confirmed || []; + const legitReverts = data.legit_reverts || []; + const candidatesChecked = data.candidates_checked || 0; + + if (candidatesChecked === 0) { + return ( + + + False Positive Analysis + + + No autoreverts without signal recovery found in this time range. + + + ); + } + + const renderRow = (row: any, idx: number) => ( + + + {dayjs(row.autorevert_time).format("YYYY-MM-DD HH:mm")} + + + + #{row.pr_number} + + + + + + + + + + + {row.commits_after_revert >= 0 ? row.commits_after_revert : "?"} + + + + + + {row.reverted_sha?.substring(0, 7)} + + + + + {row.source_signal_keys?.length || 0} signals + + + + ); + + return ( + + + False Positive Analysis + + + Analyzed {candidatesChecked} autoreverts without signal recovery.{" "} + + {confirmedFPs.length} confirmed false positive(s) + {" "} + (PR merged with no changes after revert),{" "} + + {legitReverts.length} legit revert(s) + {" "} + (PR still open or had commits after revert). + + + {confirmedFPs.length > 0 && ( + <> + + Confirmed False Positives ({confirmedFPs.length}) + + + + + + Autorevert Time + PR + Status + Commits After + Reverted SHA + Signals + + + + {confirmedFPs.map((row: any, idx: number) => + renderRow(row, idx) + )} + +
+
+ + )} + + {legitReverts.length > 0 && ( + <> + + Legit Reverts ({legitReverts.length}) - No signal recovery but PR + not relanded unchanged + + + + + + Autorevert Time + PR + Status + Commits After + Reverted SHA + Signals + + + + {legitReverts.map((row: any, idx: number) => + renderRow(row, idx) + )} + +
+
+ + )} +
+ ); +} + +function SignificantRevertsTable({ data }: { data: any[] | undefined }) { + if (data === undefined) { + return ; + } + + return ( + + + Significant Reverts ({data.length} unique reverts with signal recovery) + + + + + + Time + Signals Fixed + Type + Max Red Streak + Recovery SHA + Reverted PR + + + + {data.map((row, idx) => ( + + + {dayjs(row.recovery_time).format("YYYY-MM-DD HH:mm")} + + + + {row.signal_keys?.map((sig: string, i: number) => ( +
{sig}
+ ))} + + } + > + + {row.signals_fixed} signal + {row.signals_fixed !== 1 ? "s" : ""} + +
+
+ + + + {row.max_red_streak_length} + + + {row.recovery_sha?.substring(0, 7)} + + + + {row.reverted_pr_numbers?.length > 0 + ? row.reverted_pr_numbers.map((pr: string, i: number) => ( + + #{pr} + + )) + : "-"} + +
+ ))} +
+
+
+
+ ); +} + +export default function AutorevertMetricsPage() { + const [startTime, setStartTime] = useState(dayjs().subtract(90, "day")); + const [stopTime, setStopTime] = useState(dayjs()); + const [timeRange, setTimeRange] = useState(90); + const [selectedWorkflows, setSelectedWorkflows] = useState( + VIABLE_STRICT_WORKFLOWS + ); + + // Fetch available workflows + const workflowsUrl = `/api/clickhouse/autorevert_workflows?parameters=${encodeURIComponent( + JSON.stringify({}) + )}`; + const { data: availableWorkflows } = useSWR< + { workflow_name: string; run_count: number }[] + >(workflowsUrl, fetcher); + + const workflowOptions = + availableWorkflows?.map((w) => w.workflow_name) || VIABLE_STRICT_WORKFLOWS; + + // Use unified metrics endpoint + const metricsUrl = `/api/autorevert/metrics?startTime=${encodeURIComponent( + startTime.utc().format("YYYY-MM-DDTHH:mm:ss.SSS") + )}&stopTime=${encodeURIComponent( + stopTime.utc().format("YYYY-MM-DDTHH:mm:ss.SSS") + )}&workflowNames=${encodeURIComponent( + JSON.stringify(selectedWorkflows) + )}&minRedCommits=${MIN_RED_COMMITS}&minGreenCommits=${MIN_GREEN_COMMITS}`; + + const { data: metricsData } = useSWR(metricsUrl, fetcher, { + refreshInterval: 5 * 60 * 1000, + }); + + const summary = metricsData?.summary; + + return ( + + + + Autorevert Metrics + + + + + + Tracks autorevert system performance using precision/recall metrics. + Precision = TP / (TP + FP) measures how often + autoreverts are correct. + Recall = TP / (TP + FN) measures how many reverts + autorevert catches. Signal recovery = job group transitions from 2+ red + commits to 2+ green commits. + + + + + + + + setSelectedWorkflows(newValue)} + renderInput={(params) => ( + + )} + sx={{ minWidth: 400, maxWidth: 600 }} + limitTags={3} + /> + + + + + {/* Summary Metrics */} + + + + + + + + + + + + + + + + + + + + + + {/* Metrics Legend */} + + + {/* Weekly Trend Chart */} + + + {/* Significant Reverts Table */} + + + {/* False Positives Table */} + + + ); +}