Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"params": {
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)",
"workflowNames": "Array(String)"
},
"tests": []
}
139 changes: 139 additions & 0 deletions torchci/clickhouse_queries/autorevert_events_with_commits/query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
-- Autorevert Events with Linked Revert Commits
-- Links autorevert_events_v2 to the actual revert commits via PR number and timestamp
-- Used for false positive detection

WITH autorevert_events AS (
SELECT
toString(a.commit_sha) AS reverted_sha,
a.ts AS autorevert_time,
a.workflows,
a.source_signal_keys
FROM misc.autorevert_events_v2 a FINAL
WHERE
a.repo = 'pytorch/pytorch'
AND a.action = 'revert'
AND a.dry_run = 0
AND a.failed = 0
AND a.ts >= toDateTime({startTime: DateTime64(3)}) - INTERVAL 1 DAY
AND a.ts < toDateTime({stopTime: DateTime64(3)}) + INTERVAL 1 DAY
-- Filter by workflow intersection
AND hasAny(a.workflows, {workflowNames: Array(String)})
),

-- Get PR number from the reverted commit's message
autorevert_with_pr AS (
SELECT
a.reverted_sha,
a.autorevert_time,
a.workflows,
a.source_signal_keys,
p.head_commit.'message' AS reverted_message,
-- Extract PR number from "Pull Request resolved: https://github.com/pytorch/pytorch/pull/XXXXX"
arrayElement(
extractAll(
p.head_commit.'message',
'Pull Request resolved: https://github.com/pytorch/pytorch/pull/(\\d+)'
), 1
) AS pr_number
FROM autorevert_events a
JOIN push p ON p.head_commit.'id' = a.reverted_sha
WHERE p.repository.'full_name' = 'pytorch/pytorch'
),

-- Find revert commits in the time range
revert_commits AS (
SELECT
push.head_commit.'id' AS revert_sha,
push.head_commit.'timestamp' AS revert_time,
push.head_commit.'message' AS revert_message,
-- Extract mentioned PR numbers from revert message
-- For nested reverts like 'Reapply "Back out "..." (#164939)" (#165910)" (#166812)',
-- the actual PR is the LAST one mentioned in the title line (before newline)
if(
arrayElement(
extractAll(
push.head_commit.'message',
'Reverted https://github.com/pytorch/pytorch/pull/(\\d+)'
), 1
) != '',
arrayElement(
extractAll(
push.head_commit.'message',
'Reverted https://github.com/pytorch/pytorch/pull/(\\d+)'
), 1
),
-- Get the LAST PR number from title (use -1 for last element)
arrayElement(
extractAll(
-- Extract just the first line (title) to get the correct PR
arrayElement(
splitByChar('\n', push.head_commit.'message'), 1
),
'#(\\d+)'
), -1
)
) AS pr_reference
FROM push
WHERE
push.ref IN ('refs/heads/master', 'refs/heads/main')
AND push.repository.'full_name' = 'pytorch/pytorch'
AND push.head_commit.'timestamp' >= {startTime: DateTime64(3)}
AND push.head_commit.'timestamp' < {stopTime: DateTime64(3)}
AND (
push.head_commit.'message' LIKE 'Revert %'
OR push.head_commit.'message' LIKE 'Reapply %'
OR push.head_commit.'message' LIKE 'Back out%'
)
),

-- Join on PR number only, then filter by time in WHERE clause
-- This avoids ClickHouse JOIN ON restrictions
matched_reverts AS (
SELECT
a.reverted_sha,
a.autorevert_time,
a.workflows,
a.source_signal_keys,
a.pr_number,
substring(a.reverted_message, 1, 100) AS reverted_message_snippet,
r.revert_sha,
r.revert_time,
substring(r.revert_message, 1, 100) AS revert_message_snippet
FROM autorevert_with_pr a
LEFT JOIN revert_commits r ON r.pr_reference = a.pr_number
WHERE
a.pr_number != ''
AND (
-- Keep autoreverts even if no linked revert found
r.revert_sha IS NULL
OR (
r.revert_time > a.autorevert_time
AND r.revert_time < a.autorevert_time + INTERVAL 1 HOUR
)
)
),

-- Take the first revert commit after the autorevert event for each PR
linked_autoreverts AS (
SELECT
*,
row_number() OVER (
PARTITION BY reverted_sha
ORDER BY revert_time ASC NULLS LAST
) AS rn
FROM matched_reverts
)

SELECT
reverted_sha,
autorevert_time,
workflows,
source_signal_keys,
pr_number,
reverted_message_snippet,
revert_sha,
revert_time,
revert_message_snippet
FROM linked_autoreverts
WHERE rn = 1
ORDER BY autorevert_time DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"params": {
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)"
},
"tests": []
}
107 changes: 107 additions & 0 deletions torchci/clickhouse_queries/autorevert_false_positives/query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
-- Autorevert False Positive Candidates
-- Finds autoreverted commits that were later re-landed
-- NOTE: This is a preliminary list. True false positives require GitHub API verification:
-- - If PR is still OPEN (not relanded) → revert was LEGIT
-- - If PR had commits after revert before reland → revert was LEGIT (author fixed something)
-- - If PR was relanded with NO changes → revert was FALSE POSITIVE

WITH autorevert_events AS (
SELECT
toString(a.commit_sha) AS reverted_sha,
min(a.ts) AS revert_time
FROM misc.autorevert_events_v2 a FINAL
WHERE
a.repo = 'pytorch/pytorch'
AND a.action = 'revert'
AND a.dry_run = 0
AND a.failed = 0
AND a.ts >= toDateTime({startTime: DateTime64(3)}) - INTERVAL 7 DAY
AND a.ts < toDateTime({stopTime: DateTime64(3)}) + INTERVAL 7 DAY
GROUP BY reverted_sha
),

-- Get original commit details and extract PR number
autorevert_with_pr AS (
SELECT
a.reverted_sha,
a.revert_time,
p.head_commit.'message' AS original_message,
arrayElement(
extractAll(
p.head_commit.'message',
'Pull Request resolved: https://github.com/pytorch/pytorch/pull/(\\d+)'
), 1
) AS pr_number
FROM autorevert_events a
JOIN push p ON p.head_commit.'id' = a.reverted_sha
WHERE p.repository.'full_name' = 'pytorch/pytorch'
),

-- Find all commits in the time range
all_commits AS (
SELECT
push.head_commit.'id' AS sha,
push.head_commit.'timestamp' AS time,
push.head_commit.'message' AS message
FROM push
WHERE
push.ref IN ('refs/heads/master', 'refs/heads/main')
AND push.repository.'full_name' = 'pytorch/pytorch'
AND push.head_commit.'timestamp' >= {startTime: DateTime64(3)}
AND push.head_commit.'timestamp' < {stopTime: DateTime64(3)}
),

-- Find Reland commits
reland_commits AS (
SELECT
sha AS reland_sha,
time AS reland_time,
message AS reland_message,
-- Extract the original PR being relanded (first PR number mentioned)
arrayElement(extractAll(message, '#(\\d+)'), 1) AS primary_mentioned_pr
FROM all_commits
WHERE
message LIKE 'Reland%'
OR message LIKE '[Reland]%'
OR message LIKE 'Re-land%'
),

-- Match autoreverts to relands, deduplicated by original PR
-- Take the first reland for each autoreverted PR
matched_relands AS (
SELECT
a.reverted_sha,
a.revert_time,
a.pr_number AS original_pr,
substring(a.original_message, 1, 100) AS original_message_snippet,
r.reland_sha,
r.reland_time,
substring(r.reland_message, 1, 100) AS reland_message_snippet,
dateDiff('hour', a.revert_time, r.reland_time) AS hours_to_reland,
row_number()
OVER (
PARTITION BY a.pr_number
ORDER BY r.reland_time ASC
)
AS rn
FROM autorevert_with_pr a
JOIN reland_commits r ON r.primary_mentioned_pr = a.pr_number
WHERE
a.pr_number != ''
AND r.reland_time > a.revert_time
AND r.reland_time < a.revert_time + INTERVAL 30 DAY
)

SELECT
original_pr,
reverted_sha,
revert_time,
original_message_snippet,
reland_sha,
reland_time,
reland_message_snippet,
hours_to_reland,
'needs_verification' AS status
FROM matched_relands
WHERE rn = 1 -- Only first reland per PR
ORDER BY revert_time DESC
29 changes: 29 additions & 0 deletions torchci/clickhouse_queries/autorevert_signal_recovery/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"params": {
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)",
"workflowNames": "Array(String)",
"minRedCommits": "UInt8",
"minGreenCommits": "UInt8"
},
"tests": [
{
"startTime": "2024-12-01 00:00:00.000",
"stopTime": "2024-12-29 00:00:00.000",
"workflowNames": ["Lint", "pull", "trunk", "linux-aarch64"],
"minRedCommits": 2,
"minGreenCommits": 2
},
{
"startTime": {
"from_now": -14
},
"stopTime": {
"from_now": 0
},
"workflowNames": ["Lint", "pull", "trunk", "linux-aarch64"],
"minRedCommits": 2,
"minGreenCommits": 2
}
]
}
Loading