Skip to content

Commit 5d38414

Browse files
committed
feat: add choice_info field to CSV export with ICL truncation
- Add choice_info and label fields to InputOutputItem model - Add choice_info column to CSV export containing full JSON data - Truncate icl_example_responses to first example + "truncated" indicator - Reduces CSV size from 130MB to 14MB for large datasets while preserving sample data
1 parent b9991b8 commit 5d38414

File tree

3 files changed

+72
-27
lines changed

3 files changed

+72
-27
lines changed

align_browser/csv_exporter.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,35 @@ def extract_justification(item: InputOutputItem) -> str:
6767
return action.get("justification", "")
6868

6969

70+
def extract_choice_info(item: InputOutputItem) -> str:
71+
"""Extract the choice_info as a JSON string, truncating ICL examples."""
72+
if not item.choice_info:
73+
return ""
74+
75+
import json
76+
77+
# Create a copy to avoid modifying the original
78+
filtered_choice_info = {}
79+
80+
for key, value in item.choice_info.items():
81+
if key == "icl_example_responses" and isinstance(value, dict):
82+
# Keep only first example for each KDMA, truncate the rest
83+
truncated_icl = {}
84+
for kdma, examples in value.items():
85+
if isinstance(examples, list) and len(examples) > 0:
86+
# Keep first example, replace rest with "truncated"
87+
truncated_icl[kdma] = [examples[0]]
88+
if len(examples) > 1:
89+
truncated_icl[kdma].append("truncated")
90+
else:
91+
truncated_icl[kdma] = examples
92+
filtered_choice_info[key] = truncated_icl
93+
else:
94+
filtered_choice_info[key] = value
95+
96+
return json.dumps(filtered_choice_info, separators=(",", ":"))
97+
98+
7099
def get_decision_time(
71100
timing_data: Optional[Dict[str, Any]], item_index: int
72101
) -> Optional[float]:
@@ -156,6 +185,7 @@ def experiment_to_csv_rows(
156185
else "",
157186
"choice_text": extract_choice_text(item),
158187
"choice_kdma_association": extract_choice_kdma(item),
188+
"choice_info": extract_choice_info(item),
159189
"justification": extract_justification(item),
160190
"decision_time_s": get_decision_time(timing_data, idx),
161191
"score": get_score(scores_data, idx),
@@ -188,6 +218,7 @@ def write_experiments_to_csv(
188218
"state_description",
189219
"choice_text",
190220
"choice_kdma_association",
221+
"choice_info",
191222
"justification",
192223
"decision_time_s",
193224
"score",

align_browser/experiment_models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ class InputOutputItem(BaseModel):
184184

185185
input: InputData
186186
output: Optional[Dict[str, Any]] = None
187+
choice_info: Optional[Dict[str, Any]] = None
188+
label: Optional[List[Dict[str, Any]]] = None
187189
original_index: int # Index in the original file
188190

189191

align_browser/test_link_cascade.py

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,60 @@
11
"""Test that linked parameter cascading works properly"""
2+
23
from playwright.sync_api import Page
34

45

56
def test_linked_scenario_scene_cascade(page: Page, real_data_test_server: str):
67
"""Test that when scenario and scene are linked, changing scenario cascades scene properly."""
78
page.goto(real_data_test_server)
8-
9+
910
# Wait for page to load
1011
page.wait_for_selector(".comparison-table", timeout=10000)
1112
page.wait_for_function(
1213
"document.querySelectorAll('.table-scenario-select').length > 0", timeout=10000
1314
)
14-
15+
1516
# Add a second column for testing
1617
add_column_btn = page.locator("#add-column-btn")
1718
if add_column_btn.is_visible():
1819
add_column_btn.click()
1920
page.wait_for_timeout(1000)
20-
21+
2122
# Get initial scenario values from both columns
2223
scenario_selects = page.locator(".table-scenario-select")
2324
assert scenario_selects.count() >= 2, "Need at least 2 columns to test"
24-
25+
2526
initial_scenario_col1 = scenario_selects.nth(0).input_value()
2627
initial_scenario_col2 = scenario_selects.nth(1).input_value()
27-
28+
2829
# Get initial scene values - scene dropdowns use table-scenario-select class but in scene row
2930
scene_row = page.locator("tr.parameter-row[data-parameter='scene']")
3031
scene_selects = scene_row.locator(".table-scenario-select")
3132
initial_scene_col1 = scene_selects.nth(0).input_value()
3233
initial_scene_col2 = scene_selects.nth(1).input_value()
33-
34+
3435
print("Initial states:")
3536
print(f" Column 1: scenario={initial_scenario_col1}, scene={initial_scene_col1}")
3637
print(f" Column 2: scenario={initial_scenario_col2}, scene={initial_scene_col2}")
37-
38+
3839
# Link both scenario and scene parameters
3940
scenario_row = page.locator("tr.parameter-row[data-parameter='scenario']")
4041
scenario_link = scenario_row.locator(".link-toggle")
4142
scenario_link.click()
4243
page.wait_for_timeout(500)
43-
44-
scene_row = page.locator("tr.parameter-row[data-parameter='scene']")
44+
45+
scene_row = page.locator("tr.parameter-row[data-parameter='scene']")
4546
scene_link = scene_row.locator(".link-toggle")
4647
scene_link.click()
4748
page.wait_for_timeout(500)
48-
49+
4950
# Verify both are linked - the row itself gets the linked class
50-
assert "linked" in scenario_row.get_attribute("class"), "Scenario row should have 'linked' class"
51-
assert "linked" in scene_row.get_attribute("class"), "Scene row should have 'linked' class"
52-
51+
assert "linked" in scenario_row.get_attribute("class"), (
52+
"Scenario row should have 'linked' class"
53+
)
54+
assert "linked" in scene_row.get_attribute("class"), (
55+
"Scene row should have 'linked' class"
56+
)
57+
5358
# Change scenario in first column to a different value
5459
# Find a different scenario option
5560
scenario_select_col1 = scenario_selects.nth(0)
@@ -60,39 +65,46 @@ def test_linked_scenario_scene_cascade(page: Page, real_data_test_server: str):
6065
if value and value != initial_scenario_col1:
6166
new_scenario = value
6267
break
63-
68+
6469
assert new_scenario is not None, "Could not find a different scenario to switch to"
65-
70+
6671
print(f"\nChanging scenario from {initial_scenario_col1} to {new_scenario}")
6772
scenario_select_col1.select_option(new_scenario)
6873
page.wait_for_timeout(2000) # Wait for cascading and data reload
69-
74+
7075
# Check the results after changing scenario
7176
final_scenario_col1 = scenario_selects.nth(0).input_value()
7277
final_scenario_col2 = scenario_selects.nth(1).input_value()
7378
final_scene_col1 = scene_selects.nth(0).input_value()
7479
final_scene_col2 = scene_selects.nth(1).input_value()
75-
80+
7681
print("\nFinal states:")
7782
print(f" Column 1: scenario={final_scenario_col1}, scene={final_scene_col1}")
7883
print(f" Column 2: scenario={final_scenario_col2}, scene={final_scene_col2}")
79-
84+
8085
# Verify scenarios are synced (both should have the new scenario)
8186
assert final_scenario_col1 == new_scenario, "Column 1 should have the new scenario"
82-
assert final_scenario_col2 == new_scenario, "Column 2 should also have the new scenario (linked)"
83-
87+
assert final_scenario_col2 == new_scenario, (
88+
"Column 2 should also have the new scenario (linked)"
89+
)
90+
8491
# Verify scenes are synced AND valid for the new scenario
85-
assert final_scene_col1 == final_scene_col2, "Scenes should be synced across columns"
86-
92+
assert final_scene_col1 == final_scene_col2, (
93+
"Scenes should be synced across columns"
94+
)
95+
8796
# Check that we don't have "No data available" messages
8897
no_data_messages = page.locator(".no-data-message").all()
8998
for msg in no_data_messages:
9099
if msg.is_visible():
91100
parent_cell = msg.locator("..").first
92-
print(f"WARNING: Found 'No data available' message in cell: {parent_cell.inner_text()}")
93-
101+
print(
102+
f"WARNING: Found 'No data available' message in cell: {parent_cell.inner_text()}"
103+
)
104+
94105
# The key test: scenes should have cascaded to valid values, not preserved invalid ones
95-
assert len([msg for msg in no_data_messages if msg.is_visible()]) == 0, \
106+
assert len([msg for msg in no_data_messages if msg.is_visible()]) == 0, (
96107
"Should not have any 'No data available' messages after linked cascade"
97-
98-
print("\nTest passed! Linked parameters cascade properly.")
108+
)
109+
110+
print("\nTest passed! Linked parameters cascade properly.")

0 commit comments

Comments
 (0)