Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions benchmarks/utils/output_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Utilities for processing and enriching output files."""

from __future__ import annotations

import json
from pathlib import Path


def add_resolve_rate_to_predictions(
predictions_path: str | Path,
report_path: str | Path,
) -> None:
"""
Add resolution status from a report to each prediction in a JSONL file.

For each prediction in the predictions file, if the instance_id is found in
the report's resolved_ids or unresolved_ids, a "report" field is added with
{"resolved": true/false}. Predictions not found in either list are left unchanged.

Args:
predictions_path: Path to the predictions JSONL file. Each line should be
a JSON object with an "instance_id" field.
report_path: Path to the report JSON file containing "resolved_ids" and
"unresolved_ids" lists.
"""
predictions_path = Path(predictions_path)
report_path = Path(report_path)

# Load the report
with open(report_path, "r") as f:
report = json.load(f)

resolved_ids = set(report.get("resolved_ids", []))
unresolved_ids = set(report.get("unresolved_ids", []))

# Read all predictions
predictions = []
with open(predictions_path, "r") as f:
for line in f:
line = line.strip()
if line:
predictions.append(json.loads(line))

# Update predictions with resolution status
updated_predictions = []
for prediction in predictions:
instance_id = prediction.get("instance_id")
if instance_id in resolved_ids:
prediction["report"] = {"resolved": True}
elif instance_id in unresolved_ids:
prediction["report"] = {"resolved": False}
# If not in either list, leave unchanged (no report field added)
updated_predictions.append(prediction)

# Write back to the same file
with open(predictions_path, "w") as f:
for prediction in updated_predictions:
f.write(json.dumps(prediction) + "\n")
244 changes: 244 additions & 0 deletions tests/test_output_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
"""Tests for output_utils module."""

import json
import tempfile
from pathlib import Path

import pytest

from benchmarks.utils.output_utils import add_resolve_rate_to_predictions


@pytest.fixture
def sample_predictions():
"""Sample predictions data."""
return [
{"instance_id": "fasterxml/jackson-databind:pr-4469", "model_patch": "patch1"},
{"instance_id": "elastic/logstash:pr-15241", "model_patch": "patch2"},
{"instance_id": "fasterxml/jackson-databind:pr-1234", "model_patch": "patch3"},
{"instance_id": "fasterxml/jackson-databind:pr-2036", "model_patch": "patch4"},
]


@pytest.fixture
def sample_report():
"""Sample report data."""
return {
"total_instances": 9,
"submitted_instances": 9,
"completed_instances": 9,
"resolved_instances": 3,
"unresolved_instances": 6,
"resolved_ids": [
"fasterxml/jackson-databind:pr-2036",
"fasterxml/jackson-core:pr-1016",
"fasterxml/jackson-databind:pr-4228",
],
"unresolved_ids": [
"fasterxml/jackson-core:pr-964",
"fasterxml/jackson-databind:pr-4469",
"elastic/logstash:pr-13997",
"elastic/logstash:pr-16079",
"elastic/logstash:pr-15241",
"elastic/logstash:pr-14970",
],
}


def test_add_resolve_rate_to_predictions(sample_predictions, sample_report):
"""Test that resolution status is correctly added to predictions."""
with tempfile.TemporaryDirectory() as tmpdir:
predictions_path = Path(tmpdir) / "predictions.jsonl"
report_path = Path(tmpdir) / "report.json"

# Write predictions
with open(predictions_path, "w") as f:
for pred in sample_predictions:
f.write(json.dumps(pred) + "\n")

# Write report
with open(report_path, "w") as f:
json.dump(sample_report, f)

# Run the function
add_resolve_rate_to_predictions(predictions_path, report_path)

# Read updated predictions
updated_predictions = []
with open(predictions_path, "r") as f:
for line in f:
updated_predictions.append(json.loads(line))

# Verify results
assert len(updated_predictions) == 4

# pr-4469 should be unresolved
pred_4469 = next(
p
for p in updated_predictions
if p["instance_id"] == "fasterxml/jackson-databind:pr-4469"
)
assert pred_4469["report"] == {"resolved": False}

# pr-15241 should be unresolved
pred_15241 = next(
p
for p in updated_predictions
if p["instance_id"] == "elastic/logstash:pr-15241"
)
assert pred_15241["report"] == {"resolved": False}

# pr-1234 should have no report (not in either list)
pred_1234 = next(
p
for p in updated_predictions
if p["instance_id"] == "fasterxml/jackson-databind:pr-1234"
)
assert "report" not in pred_1234

# pr-2036 should be resolved
pred_2036 = next(
p
for p in updated_predictions
if p["instance_id"] == "fasterxml/jackson-databind:pr-2036"
)
assert pred_2036["report"] == {"resolved": True}


def test_add_resolve_rate_preserves_other_fields(sample_report):
"""Test that other fields in predictions are preserved."""
with tempfile.TemporaryDirectory() as tmpdir:
predictions_path = Path(tmpdir) / "predictions.jsonl"
report_path = Path(tmpdir) / "report.json"

# Write prediction with extra fields
prediction = {
"instance_id": "fasterxml/jackson-databind:pr-2036",
"model_patch": "some patch",
"extra_field": "extra_value",
"nested": {"key": "value"},
}
with open(predictions_path, "w") as f:
f.write(json.dumps(prediction) + "\n")

# Write report
with open(report_path, "w") as f:
json.dump(sample_report, f)

# Run the function
add_resolve_rate_to_predictions(predictions_path, report_path)

# Read updated prediction
with open(predictions_path, "r") as f:
updated = json.loads(f.readline())

# Verify all original fields are preserved
assert updated["instance_id"] == "fasterxml/jackson-databind:pr-2036"
assert updated["model_patch"] == "some patch"
assert updated["extra_field"] == "extra_value"
assert updated["nested"] == {"key": "value"}
assert updated["report"] == {"resolved": True}


def test_add_resolve_rate_empty_predictions(sample_report):
"""Test handling of empty predictions file."""
with tempfile.TemporaryDirectory() as tmpdir:
predictions_path = Path(tmpdir) / "predictions.jsonl"
report_path = Path(tmpdir) / "report.json"

# Write empty predictions file
with open(predictions_path, "w") as f:
pass

# Write report
with open(report_path, "w") as f:
json.dump(sample_report, f)

# Run the function
add_resolve_rate_to_predictions(predictions_path, report_path)

# Read updated predictions
with open(predictions_path, "r") as f:
content = f.read()

assert content == ""


def test_add_resolve_rate_empty_report_lists():
"""Test handling of report with empty resolved/unresolved lists."""
with tempfile.TemporaryDirectory() as tmpdir:
predictions_path = Path(tmpdir) / "predictions.jsonl"
report_path = Path(tmpdir) / "report.json"

# Write predictions
prediction = {"instance_id": "test-instance", "data": "test"}
with open(predictions_path, "w") as f:
f.write(json.dumps(prediction) + "\n")

# Write report with empty lists
report = {"resolved_ids": [], "unresolved_ids": []}
with open(report_path, "w") as f:
json.dump(report, f)

# Run the function
add_resolve_rate_to_predictions(predictions_path, report_path)

# Read updated prediction
with open(predictions_path, "r") as f:
updated = json.loads(f.readline())

# No report should be added since instance is not in either list
assert "report" not in updated


def test_add_resolve_rate_with_string_paths(sample_predictions, sample_report):
"""Test that function works with string paths."""
with tempfile.TemporaryDirectory() as tmpdir:
predictions_path = str(Path(tmpdir) / "predictions.jsonl")
report_path = str(Path(tmpdir) / "report.json")

# Write predictions
with open(predictions_path, "w") as f:
for pred in sample_predictions:
f.write(json.dumps(pred) + "\n")

# Write report
with open(report_path, "w") as f:
json.dump(sample_report, f)

# Run the function with string paths
add_resolve_rate_to_predictions(predictions_path, report_path)

# Read updated predictions
with open(predictions_path, "r") as f:
updated = json.loads(f.readline())

# Verify it worked
assert updated["report"] == {"resolved": False}


def test_add_resolve_rate_missing_keys_in_report():
"""Test handling of report missing resolved_ids or unresolved_ids keys."""
with tempfile.TemporaryDirectory() as tmpdir:
predictions_path = Path(tmpdir) / "predictions.jsonl"
report_path = Path(tmpdir) / "report.json"

# Write predictions
prediction = {"instance_id": "test-instance", "data": "test"}
with open(predictions_path, "w") as f:
f.write(json.dumps(prediction) + "\n")

# Write report without resolved_ids and unresolved_ids
report = {"total_instances": 1}
with open(report_path, "w") as f:
json.dump(report, f)

# Run the function - should not raise
add_resolve_rate_to_predictions(predictions_path, report_path)

# Read updated prediction
with open(predictions_path, "r") as f:
updated = json.loads(f.readline())

# No report should be added
assert "report" not in updated