Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
2d8d501
Add Rust procedural generation support for operations
devin-ai-integration[bot] Oct 19, 2025
fe15f52
Add comprehensive Rust procedural modifiers (control flow and remove)
devin-ai-integration[bot] Oct 19, 2025
51d7738
Reduce operator list duplication in Rust operations
devin-ai-integration[bot] Oct 21, 2025
dbfdd05
Fix dereference/multiplication confusion in Rust flip operators
devin-ai-integration[bot] Oct 21, 2025
086c9b0
Add comprehensive unit tests for Rust procedural bug generation
devin-ai-integration[bot] Oct 21, 2025
0e8504d
Refactor Rust unit tests to use parametrized format with concrete exa…
devin-ai-integration[bot] Oct 21, 2025
66cc039
Update Rust tests to use modify() method instead of private methods
devin-ai-integration[bot] Oct 22, 2025
17b3f5f
Add scripts for generating and analyzing procedural modification bugs
AlienKevin Oct 27, 2025
cd02baa
Run procmod_bugs.py on all repos for a given language
AlienKevin Oct 27, 2025
77b70c0
Reweight the modifier likelihoods for Rust to boost bug candidates
AlienKevin Oct 27, 2025
843c874
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 27, 2025
5a17d67
Increase robustness of procmod_bugs.py and avoid sys.exit(1)
AlienKevin Oct 27, 2025
fce7a23
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 27, 2025
00e88ce
Analyze all validation results
AlienKevin Oct 27, 2025
98e62e1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 27, 2025
f814224
Added plotting support to analyze_procmod_bugs.py
AlienKevin Oct 31, 2025
b16af43
Default to interleaving modifier types in procmod_bugs.py so timeout …
AlienKevin Oct 31, 2025
5ab4179
Set modifier likelihood to 0.25 for Rust for fairness
AlienKevin Oct 31, 2025
6c3a09e
Filter out modifiers with zero pass rate and always plot value labels…
AlienKevin Oct 31, 2025
b85965b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 31, 2025
7f99d1d
Exclude timeout tasks in Validated; Support --show-generated-bugs option
AlienKevin Oct 31, 2025
f073954
Support --show-timeout-bugs
AlienKevin Oct 31, 2025
44e01ac
Identified an extreme corner case where bug_gen produces duplicate in…
AlienKevin Oct 31, 2025
5cfc839
Fix numeric label on top of timeout bar is enabled
AlienKevin Oct 31, 2025
e012d06
Support plotting per-repo bug distribution
AlienKevin Oct 31, 2025
7459b13
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 31, 2025
b25caf0
Use lighter grey for timeout bars in per-repo case as well
AlienKevin Oct 31, 2025
b40d57a
Plot correlations between repo size/star with number of tests
AlienKevin Oct 31, 2025
6ec9c75
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 83 additions & 1 deletion swesmith/bug_gen/adapters/rust.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,95 @@
import tree_sitter_rust as tsrs
import warnings

from swesmith.constants import TODO_REWRITE, CodeEntity
from swesmith.constants import TODO_REWRITE, CodeEntity, CodeProperty
from tree_sitter import Language, Parser, Query, QueryCursor

RUST_LANGUAGE = Language(tsrs.language())


class RustEntity(CodeEntity):
def _analyze_properties(self):
"""Analyze Rust code properties."""
node = self.node

if node.type == "function_item":
self._tags.add(CodeProperty.IS_FUNCTION)

self._walk_for_properties(node)

def _walk_for_properties(self, n):
"""Walk the AST and analyze properties."""
self._check_control_flow(n)
self._check_operations(n)
self._check_expressions(n)

for child in n.children:
self._walk_for_properties(child)

def _check_control_flow(self, n):
"""Check for control flow patterns."""
if n.type in ["for_expression", "while_expression", "loop_expression"]:
self._tags.add(CodeProperty.HAS_LOOP)
if n.type == "if_expression":
self._tags.add(CodeProperty.HAS_IF)
for child in n.children:
if child.type == "else_clause":
self._tags.add(CodeProperty.HAS_IF_ELSE)
break
if n.type == "match_expression":
self._tags.add(CodeProperty.HAS_SWITCH)

def _check_operations(self, n):
"""Check for various operations."""
if n.type == "index_expression":
self._tags.add(CodeProperty.HAS_LIST_INDEXING)
if n.type == "call_expression":
self._tags.add(CodeProperty.HAS_FUNCTION_CALL)
if n.type == "return_expression":
self._tags.add(CodeProperty.HAS_RETURN)
if n.type in ["let_declaration", "const_item", "static_item"]:
self._tags.add(CodeProperty.HAS_ASSIGNMENT)

def _check_expressions(self, n):
"""Check for expression patterns."""
if n.type == "binary_expression":
self._tags.add(CodeProperty.HAS_BINARY_OP)
if n.type == "unary_expression":
self._tags.add(CodeProperty.HAS_UNARY_OP)
if n.type == "closure_expression":
self._tags.add(CodeProperty.HAS_LAMBDA)

@property
def complexity(self) -> int:
"""Calculate cyclomatic complexity for Rust code."""

def walk(node):
score = 0
if node.type in [
"!=",
"&&",
"<",
"<=",
"==",
">",
">=",
"||",
"match_arm",
"else_clause",
"for_expression",
"while_expression",
"loop_expression",
"if_expression",
]:
score += 1

for child in node.children:
score += walk(child)

return score

return 1 + walk(self.node)

@property
def name(self) -> str:
func_query = Query(RUST_LANGUAGE, "(function_item name: (identifier) @name)")
Expand Down
2 changes: 2 additions & 0 deletions swesmith/bug_gen/procedural/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
# For backward compatibility, expose Python-specific classes
from swesmith.bug_gen.procedural.golang import MODIFIERS_GOLANG
from swesmith.bug_gen.procedural.python import MODIFIERS_PYTHON
from swesmith.bug_gen.procedural.rust import MODIFIERS_RUST

MAP_EXT_TO_MODIFIERS = {
".go": MODIFIERS_GOLANG,
".py": MODIFIERS_PYTHON,
".rs": MODIFIERS_RUST,
}
30 changes: 30 additions & 0 deletions swesmith/bug_gen/procedural/rust/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from swesmith.bug_gen.procedural.base import ProceduralModifier
from swesmith.bug_gen.procedural.rust.control_flow import (
ControlIfElseInvertModifier,
ControlShuffleLinesModifier,
)
from swesmith.bug_gen.procedural.rust.operations import (
OperationBreakChainsModifier,
OperationChangeConstantsModifier,
OperationChangeModifier,
OperationFlipOperatorModifier,
OperationSwapOperandsModifier,
)
from swesmith.bug_gen.procedural.rust.remove import (
RemoveAssignModifier,
RemoveConditionalModifier,
RemoveLoopModifier,
)

MODIFIERS_RUST: list[ProceduralModifier] = [
ControlIfElseInvertModifier(likelihood=0.75),
ControlShuffleLinesModifier(likelihood=0.75),
RemoveAssignModifier(likelihood=0.25),
RemoveConditionalModifier(likelihood=0.25),
RemoveLoopModifier(likelihood=0.25),
OperationBreakChainsModifier(likelihood=0.4),
OperationChangeConstantsModifier(likelihood=0.4),
OperationChangeModifier(likelihood=0.4),
OperationFlipOperatorModifier(likelihood=0.4),
OperationSwapOperandsModifier(likelihood=0.4),
]
6 changes: 6 additions & 0 deletions swesmith/bug_gen/procedural/rust/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from abc import ABC
from swesmith.bug_gen.procedural.base import ProceduralModifier


class RustProceduralModifier(ProceduralModifier, ABC):
"""Base class for Rust-specific procedural modifications."""
202 changes: 202 additions & 0 deletions swesmith/bug_gen/procedural/rust/control_flow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import tree_sitter_rust as tsrs

from swesmith.bug_gen.procedural.base import CommonPMs
from swesmith.bug_gen.procedural.rust.base import RustProceduralModifier
from swesmith.constants import BugRewrite, CodeEntity
from tree_sitter import Language, Parser

RUST_LANGUAGE = Language(tsrs.language())


class ControlIfElseInvertModifier(RustProceduralModifier):
explanation: str = CommonPMs.CONTROL_IF_ELSE_INVERT.explanation
name: str = CommonPMs.CONTROL_IF_ELSE_INVERT.name
conditions: list = CommonPMs.CONTROL_IF_ELSE_INVERT.conditions
min_complexity: int = 5

def modify(self, code_entity: CodeEntity) -> BugRewrite:
"""Apply if-else inversion to the Rust code."""
if not self.flip():
return None

parser = Parser(RUST_LANGUAGE)
tree = parser.parse(bytes(code_entity.src_code, "utf8"))

changed = False

for _ in range(self.max_attempts):
modified_code = self._invert_if_else_statements(
code_entity.src_code, tree.root_node
)

if modified_code != code_entity.src_code:
changed = True
break

if not changed:
return None

return BugRewrite(
rewrite=modified_code,
explanation=self.explanation,
strategy=self.name,
)

def _invert_if_else_statements(self, source_code: str, node) -> str:
"""Recursively find and invert if-else statements by swapping the bodies."""
modifications = []

def collect_if_statements(n):
if n.type == "if_expression":
if_condition = None
if_body = None
else_clause = None
else_body = None

for i, child in enumerate(n.children):
if child.type == "if":
continue
elif if_condition is None and child.type in [
"binary_expression",
"identifier",
"call_expression",
"field_expression",
"unary_expression",
]:
if_condition = child
elif child.type == "block" and if_body is None:
if_body = child
elif child.type == "else_clause":
else_clause = child
for else_child in child.children:
if else_child.type == "block":
else_body = else_child
break
break

if (
if_condition
and if_body
and else_clause
and else_body
and self.flip()
):
modifications.append((n, if_condition, if_body, else_body))

for child in n.children:
collect_if_statements(child)

collect_if_statements(node)

if not modifications:
return source_code

modified_source = source_code
for if_node, condition, if_body, else_body in reversed(modifications):
if_start = if_node.start_byte
if_body_start = if_body.start_byte

prefix = source_code[if_start:if_body_start].strip()

if_body_text = source_code[if_body.start_byte : if_body.end_byte]
else_body_text = source_code[else_body.start_byte : else_body.end_byte]

new_if_else = f"{prefix} {else_body_text} else {if_body_text}"

start_byte = if_node.start_byte
end_byte = if_node.end_byte

modified_source = (
modified_source[:start_byte] + new_if_else + modified_source[end_byte:]
)

return modified_source


class ControlShuffleLinesModifier(RustProceduralModifier):
explanation: str = CommonPMs.CONTROL_SHUFFLE_LINES.explanation
name: str = CommonPMs.CONTROL_SHUFFLE_LINES.name
conditions: list = CommonPMs.CONTROL_SHUFFLE_LINES.conditions
max_complexity: int = 10

def modify(self, code_entity: CodeEntity) -> BugRewrite:
"""Apply line shuffling to the Rust function body."""
parser = Parser(RUST_LANGUAGE)
tree = parser.parse(bytes(code_entity.src_code, "utf8"))

modified_code = self._shuffle_function_statements(
code_entity.src_code, tree.root_node
)

if modified_code == code_entity.src_code:
return None

return BugRewrite(
rewrite=modified_code,
explanation=self.explanation,
strategy=self.name,
)

def _shuffle_function_statements(self, source_code: str, node) -> str:
"""Recursively find function declarations and shuffle their statements."""
modifications = []

def collect_function_declarations(n):
if n.type == "function_item":
body_block = None
for child in n.children:
if child.type == "block":
body_block = child
break

if body_block:
statements = []
for child in body_block.children:
if child.type not in ["{", "}"]:
statements.append(child)

if len(statements) >= 2:
modifications.append((body_block, statements))

for child in n.children:
collect_function_declarations(child)

collect_function_declarations(node)

if not modifications:
return source_code

modified_source = source_code
for body_block, statements in reversed(modifications):
shuffled_indices = list(range(len(statements)))
self.rand.shuffle(shuffled_indices)

if shuffled_indices == list(range(len(statements))):
if len(statements) >= 2:
shuffled_indices[0], shuffled_indices[1] = (
shuffled_indices[1],
shuffled_indices[0],
)

statement_texts = []
for stmt in statements:
stmt_text = source_code[stmt.start_byte : stmt.end_byte]
statement_texts.append(stmt_text)

shuffled_texts = [statement_texts[i] for i in shuffled_indices]

first_stmt_start = statements[0].start_byte
last_stmt_end = statements[-1].end_byte

line_start = source_code.rfind("\n", 0, first_stmt_start) + 1
indent = source_code[line_start:first_stmt_start]

new_content = ("\n" + indent).join(shuffled_texts)

modified_source = (
modified_source[:first_stmt_start]
+ new_content
+ modified_source[last_stmt_end:]
)

return modified_source
Loading