-
Notifications
You must be signed in to change notification settings - Fork 253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
analyze: script for automatically fixing some compile errors #1174
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
d411d44
analyze: add script to automatically fix compile errors
spernsteiner 39b22cf
analyze: auto_fix_errors.py: handle suggested lifetime bounds
spernsteiner 2a24483
auto_fix_errors.py: remove `Copy` from types that don't support it
spernsteiner ba9bad9
auto_fix_errors.py: make compatible with python < 3.7
spernsteiner 7098d9e
auto_fix_errors.py: simplify code using defaultdict
spernsteiner File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,5 +49,6 @@ marks.*.json | |
# Outputs of `c2rust-analyze` | ||
inspect/ | ||
*.analysis.txt | ||
*.rs.fixed | ||
analysis.txt | ||
polonius_cache/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
/inspect/ | ||
*.rlib | ||
/tests/auto_fix_errors/*.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,283 @@ | ||
import argparse | ||
from collections import defaultdict, namedtuple | ||
import json | ||
import re | ||
import sys | ||
|
||
def parse_args() -> argparse.Namespace: | ||
parser = argparse.ArgumentParser( | ||
description='automatically apply rustc-suggested fixes for compile errors') | ||
parser.add_argument('-n', '--dry-run', action='store_true', | ||
help="print fixes that would be applied, but don't modify any files") | ||
parser.add_argument('path', metavar='ERRORS.JSON', | ||
help='output of `rustc --error-format json`') | ||
return parser.parse_args() | ||
|
||
Fix = namedtuple('Fix', ( | ||
# Path to the file to modify. | ||
'file_path', | ||
# Line number where the fix will be applied. This is used for debug output | ||
# only. | ||
'line_number', | ||
# Start of the byte range to replace. | ||
'start_byte', | ||
# End (exclusive) of the byte range to replace. | ||
'end_byte', | ||
# Replacement text (as a `str`, not `bytes`). | ||
'new_text', | ||
# The original error message. This is used for debug output only. | ||
'message', | ||
)) | ||
|
||
LifetimeBound = namedtuple('LifetimeBound', ( | ||
'file_path', | ||
'line_number', | ||
# Byte offset of the start of the lifetime parameter declaration. | ||
'start_byte', | ||
# Byte offset of the end of the lifetime parameter declaration. | ||
'end_byte', | ||
# The lifetime to use in the new bound. If `'a: 'b` is the suggested | ||
# bound, then `start/end_byte` points to the declaration of `'a`, and | ||
# `bound_lifetime` is the string `"'b"`. | ||
'bound_lifetime', | ||
)) | ||
|
||
RemoveDeriveCopy = namedtuple('RemoveDeriveCopy', ( | ||
'file_path', | ||
'line_number', | ||
# Byte offset of the start of the token `Copy`. | ||
'start_byte', | ||
# Byte offset of the end of the token `Copy`. | ||
'end_byte', | ||
)) | ||
|
||
MSG_LIFETIME_BOUND = 'lifetime may not live long enough' | ||
spernsteiner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
MSG_DERIVE_COPY = 'the trait `Copy` may not be implemented for this type' | ||
|
||
LIFETIME_DEFINED_RE = re.compile(r'^lifetime `([^`]*)` defined here$') | ||
CONSIDER_ADDING_BOUND_RE = re.compile(r'^consider adding the following bound: `([^`:]*): ([^`]*)`$') | ||
SPACE_COLON_RE = re.compile(rb'\s*:') | ||
SPACE_COMMA_RE = re.compile(rb'\s*,') | ||
|
||
def main(): | ||
args = parse_args() | ||
|
||
fixes = [] | ||
def gather_fixes(j, message): | ||
for span in j['spans']: | ||
if span.get('suggestion_applicability') != 'MachineApplicable': | ||
continue | ||
fix = Fix( | ||
file_path=span['file_name'], | ||
line_number=span['line_start'], | ||
start_byte=span['byte_start'], | ||
end_byte=span['byte_end'], | ||
new_text=span['suggested_replacement'], | ||
message=message, | ||
) | ||
fixes.append(fix) | ||
|
||
for child in j['children']: | ||
gather_fixes(child, message) | ||
|
||
lifetime_bounds = [] | ||
def gather_lifetime_bounds(j): | ||
# We look for a particular pattern seen in lifetime errors. First, | ||
# there should be a span with label "lifetime 'a defined here" pointing | ||
# at the declaration of `'a`. Second, there should be a child of type | ||
# `help` with the text "consider adding the following bound: `'a: 'b`". | ||
|
||
decl_spans = {} | ||
for span in j['spans']: | ||
m = LIFETIME_DEFINED_RE.match(span['label']) | ||
if m is not None: | ||
lifetime = m.group(1) | ||
if lifetime in decl_spans: | ||
# Duplicate declaration for this lifetime. This shouldn't | ||
# happen, but we can proceed as long as the lifetime isn't | ||
# the target of the bound. We mark the duplicate lifetime | ||
# so it can't be used as the target. | ||
decl_spans[lifetime] = None | ||
continue | ||
decl_spans[lifetime] = span | ||
|
||
for child in j['children']: | ||
if child['level'] != 'help': | ||
continue | ||
m = CONSIDER_ADDING_BOUND_RE.match(child['message']) | ||
if m is None: | ||
continue | ||
lifetime_a = m.group(1) | ||
lifetime_b = m.group(2) | ||
span = decl_spans.get(lifetime_a) | ||
if span is None: | ||
# We don't have anywhere to insert the new bound. This can | ||
# also happen if there were duplicate declaration spans for | ||
# this lifetime (we explicitly insert `None` into the map in | ||
# that case). | ||
continue | ||
lifetime_bounds.append(LifetimeBound( | ||
file_path=span['file_name'], | ||
line_number=span['line_start'], | ||
start_byte=span['byte_start'], | ||
end_byte=span['byte_end'], | ||
bound_lifetime=lifetime_b, | ||
)) | ||
|
||
remove_derive_copies = [] | ||
def handle_derive_copy(j): | ||
# Find the "primary span", which covers the `Copy` token. | ||
primary_span = None | ||
for span in j['spans']: | ||
if span.get('is_primary'): | ||
primary_span = span | ||
break | ||
|
||
if primary_span is None: | ||
return | ||
|
||
remove_derive_copies.append(RemoveDeriveCopy( | ||
file_path=primary_span['file_name'], | ||
line_number=primary_span['line_start'], | ||
start_byte=primary_span['byte_start'], | ||
end_byte=primary_span['byte_end'], | ||
)) | ||
|
||
with open(args.path, 'r') as f: | ||
for line in f: | ||
j = json.loads(line) | ||
|
||
# Only process errors, not warnings. | ||
level = j['level'] | ||
if level == 'error': | ||
pass | ||
elif level in ('warning', 'failure-note'): | ||
continue | ||
else: | ||
# `help`, `note`, etc should not appear at top level. | ||
assert False, 'unexpected `level` %r' % (level,) | ||
|
||
gather_fixes(j, j['message']) | ||
|
||
if j['message'] == MSG_LIFETIME_BOUND: | ||
gather_lifetime_bounds(j) | ||
elif j['message'] == MSG_DERIVE_COPY: | ||
handle_derive_copy(j) | ||
|
||
# Convert suggested lifetime bounds to fixes. We have to group the bounds | ||
# first because there may be multiple suggested bounds for a single | ||
# declaration, in which case we want to generate a single `Fix` that adds | ||
# all of them at once. | ||
|
||
# Maps the `(file_path, line_number, start_byte, end_byte)` of the | ||
# declaration site to the set of new lifetimes to apply at that site. | ||
grouped_lifetime_bounds = {} | ||
for lb in lifetime_bounds: | ||
key = (lb.file_path, lb.line_number, lb.start_byte, lb.end_byte) | ||
if key not in grouped_lifetime_bounds: | ||
grouped_lifetime_bounds[key] = set() | ||
grouped_lifetime_bounds[key].add(lb.bound_lifetime) | ||
|
||
file_content = {} | ||
def read_file(file_path): | ||
if file_path not in file_content: | ||
file_content[file_path] = open(file_path, 'rb').read() | ||
return file_content[file_path] | ||
|
||
for key, bound_lifetimes in sorted(grouped_lifetime_bounds.items()): | ||
(file_path, line_number, start_byte, end_byte) = key | ||
content = read_file(file_path) | ||
decl_lifetime = content[start_byte : end_byte].decode('utf-8') | ||
bound_lifetimes = ' + '.join(bound_lifetimes) | ||
m = SPACE_COLON_RE.match(content, end_byte) | ||
if m is None: | ||
fix_end_byte = end_byte | ||
fix_new_text = '%s: %s' % (decl_lifetime, bound_lifetimes) | ||
else: | ||
space_colon = m.group().decode('utf-8') | ||
fix_end_byte = m.end() | ||
fix_new_text = '%s%s %s +' % (decl_lifetime, space_colon, bound_lifetimes) | ||
fixes.append(Fix( | ||
file_path=file_path, | ||
line_number=line_number, | ||
start_byte=start_byte, | ||
end_byte=fix_end_byte, | ||
new_text=fix_new_text, | ||
message=MSG_LIFETIME_BOUND, | ||
)) | ||
|
||
# Convert errors about `#[derive(Copy)]` to fixes. | ||
for rm in remove_derive_copies: | ||
# The error span points to the `Copy` token, but we actually want to | ||
# remove both `Copy` and the following comma, if one is present. | ||
# | ||
# #[derive(Foo, Copy, Bar)] -> #[derive(Foo, Bar)] | ||
# #[derive(Foo, Copy)] -> #[derive(Foo,)] | ||
# #[derive(Copy, Bar)] -> #[derive(Bar)] | ||
# #[derive(Copy)] -> #[derive()] | ||
# | ||
# Note that `#[derive()]` is legal, though ideally it should be removed | ||
# by a later cleanup pass. | ||
content = read_file(rm.file_path) | ||
m = SPACE_COMMA_RE.match(content, rm.end_byte) | ||
fix_end_byte = m.end() if m is not None else rm.end_byte | ||
fixes.append(Fix( | ||
file_path=rm.file_path, | ||
line_number=rm.line_number, | ||
start_byte=rm.start_byte, | ||
end_byte=fix_end_byte, | ||
new_text='', | ||
message=MSG_DERIVE_COPY, | ||
)) | ||
|
||
# Group fixes by file | ||
fixes_by_file = defaultdict(list) | ||
for fix in fixes: | ||
fixes_by_file[fix.file_path].append(fix) | ||
fixes_by_file = dict(fixes_by_file) | ||
for file_fixes in fixes_by_file.values(): | ||
file_fixes.sort(key=lambda fix: fix.start_byte) | ||
|
||
# Apply fixes | ||
for file_path, file_fixes in sorted(fixes_by_file.items()): | ||
content = read_file(file_path) | ||
chunks = [] | ||
pos = 0 | ||
prev_fix = None | ||
for fix in file_fixes: | ||
old_text = content[fix.start_byte : fix.end_byte].decode('utf-8') | ||
desc = '%s:%d: %r -> %r (%s)' % ( | ||
file_path, fix.line_number, old_text, fix.new_text, fix.message) | ||
if prev_fix is not None: | ||
if fix.start_byte < prev_fix.end_byte: | ||
if fix.start_byte == prev_fix.start_byte \ | ||
and fix.end_byte == prev_fix.end_byte \ | ||
and fix.new_text == prev_fix.new_text: | ||
# `fix` and `prev_fix` suggest the same change, so we | ||
# don't need to apply `fix`. | ||
continue | ||
# We want to apply fix, but can't because it overlaps with | ||
# `prev_fix`. | ||
print('skipping due to overlap: %s' % desc) | ||
continue | ||
|
||
prev_fix = fix | ||
|
||
print(desc) | ||
if fix.start_byte > pos: | ||
chunks.append(content[pos : fix.start_byte]) | ||
chunks.append(fix.new_text.encode('utf-8')) | ||
pos = fix.end_byte | ||
|
||
if pos < len(content): | ||
chunks.append(content[pos:]) | ||
|
||
new_content = b''.join(chunks) | ||
if not args.dry_run: | ||
open(file_path, 'wb').write(new_content) | ||
print('wrote to %r' % file_path) | ||
else: | ||
print('would write to %r' % file_path) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
pub mod common; | ||
|
||
use crate::common::{check_for_missing_tests_for, test_dir_for}; | ||
use std::fs::{self, File}; | ||
use std::process::Command; | ||
|
||
#[test] | ||
fn check_for_missing_tests() { | ||
check_for_missing_tests_for(file!()); | ||
} | ||
|
||
fn test(file_name: &str) { | ||
let test_dir = test_dir_for(file!(), true); | ||
let path = test_dir.join(file_name); | ||
let fixed_path = path.with_extension("rs.fixed"); | ||
let json_path = path.with_extension("json"); | ||
let script_path = test_dir.join("../../scripts/auto_fix_errors.py"); | ||
|
||
fs::copy(&path, &fixed_path).unwrap(); | ||
|
||
// Run with `--error-format json` to produce JSON input for the script. | ||
let mut cmd = Command::new("rustc"); | ||
cmd.arg("-A") | ||
.arg("warnings") | ||
.arg("--crate-name") | ||
.arg(path.file_stem().unwrap()) | ||
.arg("--crate-type") | ||
.arg("rlib") | ||
.arg("--error-format") | ||
.arg("json") | ||
.arg(&fixed_path) | ||
.stderr(File::create(&json_path).unwrap()); | ||
let status = cmd.status().unwrap(); | ||
assert_eq!( | ||
status.code(), | ||
Some(1), | ||
"command {cmd:?} exited with code {status:?}" | ||
); | ||
|
||
// Run the script to fix errors. | ||
let mut cmd = Command::new("python3"); | ||
cmd.arg(&script_path).arg(&json_path); | ||
let status = cmd.status().unwrap(); | ||
assert!( | ||
status.success(), | ||
"command {cmd:?} exited with code {status:?}" | ||
); | ||
|
||
// There should be no more compile errors now. | ||
let mut cmd = Command::new("rustc"); | ||
cmd.arg("-A") | ||
.arg("warnings") | ||
.arg("--crate-name") | ||
.arg(path.file_stem().unwrap()) | ||
.arg("--crate-type") | ||
.arg("rlib") | ||
.arg(&fixed_path); | ||
let status = cmd.status().unwrap(); | ||
assert!( | ||
status.success(), | ||
"command {cmd:?} exited with code {status:?}" | ||
); | ||
} | ||
|
||
macro_rules! define_test { | ||
($name:ident) => { | ||
#[test] | ||
fn $name() { | ||
test(concat!(stringify!($name), ".rs")); | ||
} | ||
}; | ||
} | ||
|
||
macro_rules! define_tests { | ||
($($name:ident,)*) => { | ||
$(define_test! { $name })* | ||
} | ||
} | ||
|
||
define_tests! { | ||
derive_copy, | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: I think it's a lot more idiomatic to use
@dataclass
es instead ofnamedtuple
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I originally used
@dataclass
here, but switched tonamedtuple
because CI was failing - the Ubuntu 18 runner has Python 3.6, but dataclasses were added in 3.7There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh that's really annoying. Python 3.6 was EOL 3 years ago.