Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions src/minisweagent/agents/interactive_textual.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from textual.screen import Screen
from textual.widgets import Footer, Header, Input, Static, TextArea

from minisweagent.agents.default import AgentConfig, DefaultAgent, NonTerminatingException, Submitted
from minisweagent.agents.default import AgentConfig, DefaultAgent, LimitsExceeded, NonTerminatingException, Submitted


@dataclass
Expand Down Expand Up @@ -57,7 +57,33 @@ def query(self) -> dict:
self.add_message("assistant", msg["content"])
return msg
self._current_action_from_human = False
return super().query()
try:
return super().query()
except LimitsExceeded:
# Show current limits and prompt for new ones (matching interactive.py behavior)
# Show notification with the limits info
self.app.notify(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we move that into the prompt, i.e., the first prompt is

"Some limit was exceeded. Current number of steps xx, current limit yy. Enter new limit (0 for unconstrained)"

and I expect that the step limit is probably always 0, so it might be good to test if "step limit == 0" (we skip asking for a new step limit, because that's not the one we blew).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, great, let's move that to the prompt, leave the placeholder as it was, and remove the notification for simplicity. Yes, it makes sense that the step limit probably is always 0, so let's add an if statement for that.

f"Limits exceeded. Limit: ${self.config.cost_limit:.2f}. "
f"Current: {self.model.n_calls} steps, ${self.model.cost:.2f}.",
severity="warning",
timeout=10,
)

# First prompt: new step limit (short message near input)
new_step_limit = self.app.input_container.request_input("New step limit:", placeholder="0 for unlimited")
try:
self.config.step_limit = int(new_step_limit) if new_step_limit.strip() else 0
except ValueError:
self.config.step_limit = 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is dangerous, we can't do that, users would think they have set a limit but they haven't, honestly I'd rather have the application crash.

Or we put it in a small while loop that only breaks once it works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, let's put a while loop.


# Second prompt: new cost limit (short message near input)
new_cost_limit = self.app.input_container.request_input("New cost limit:", placeholder="0.0 for unlimited")
try:
self.config.cost_limit = float(new_cost_limit) if new_cost_limit.strip() else 0.0
except ValueError:
self.config.cost_limit = 0.0

return super().query()

def run(self, task: str, **kwargs) -> tuple[str, str]:
try:
Expand Down Expand Up @@ -162,12 +188,14 @@ def on_focus(self) -> None:
else:
self._single_input.focus()

def request_input(self, prompt: str) -> str:
def request_input(self, prompt: str, placeholder: str | None = None) -> str:
"""Request input from user. Returns input text (empty string if confirmed without reason)."""
self._input_event.clear()
self._input_result = None
self.pending_prompt = prompt
self._header_display.update(prompt)
if placeholder:
self._single_input.placeholder = placeholder
self._update_mode_display()
self._app.call_from_thread(self._app.update_content)
self._input_event.wait()
Expand Down
94 changes: 80 additions & 14 deletions tests/agents/test_interactive_textual.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import asyncio
import logging
import threading
from unittest.mock import Mock
from unittest.mock import ANY, Mock

import pytest

Expand Down Expand Up @@ -331,9 +331,9 @@ async def test_confirmation_rejection_with_message():


async def test_agent_with_cost_limit():
"""Test agent behavior when cost limit is exceeded."""
"""Test agent behavior when cost limit is exceeded - now prompts for new limits."""
app = TextualAgent(
model=DeterministicModel(outputs=["Response 1", "Response 2"]),
model=DeterministicModel(outputs=["Response 1", "Response 2", "Response 3"]),
env=LocalEnvironment(),
mode="yolo",
cost_limit=0.01, # Very low limit
Expand All @@ -343,39 +343,105 @@ async def test_agent_with_cost_limit():

async with app.run_test() as pilot:
threading.Thread(target=lambda: app.agent.run("Cost limit test"), daemon=True).start()

for _ in range(50):
await pilot.pause(0.1)
if app.agent_state == "STOPPED":
if app.agent_state == "AWAITING_INPUT" and app.input_container.pending_prompt == "New step limit:":
break
else:
raise AssertionError("Agent did not stop within 5 seconds")
raise AssertionError("Agent did not request new step limit within 5 seconds")

# Should eventually stop due to cost limit and notify with the exit status
assert app.agent_state == "STOPPED"
app.notify.assert_called_with("Agent finished with status: LimitsExceeded")
app.notify.assert_any_call(ANY, severity="warning", timeout=10)

await pilot.press("0")
await pilot.press("enter")
await pilot.pause(0.2)

async def test_agent_with_step_limit():
"""Test agent behavior when step limit is exceeded."""
assert app.input_container.pending_prompt == "New cost limit:"

await pilot.press("1", "0")
await pilot.press("enter")
await pilot.pause(0.2)

assert app.agent_state == "RUNNING" or app.agent_state == "AWAITING_INPUT"


async def test_agent_with_cost_limit_invalid_input():
"""Test ValueError handling when invalid input is provided for limits."""
app = TextualAgent(
model=DeterministicModel(outputs=["Response 1", "Response 2", "Response 3"]),
env=LocalEnvironment(),
mode="yolo",
cost_limit=0.01,
)

async with app.run_test() as pilot:
threading.Thread(target=lambda: app.agent.run("Invalid input test"), daemon=True).start()

for _ in range(50):
await pilot.pause(0.1)
if app.agent_state == "AWAITING_INPUT" and app.input_container.pending_prompt == "New step limit:":
break
else:
raise AssertionError("Agent did not request new step limit within 5 seconds")

await type_text(pilot, "invalid_text")
await pilot.press("enter")
await pilot.pause(0.2)

assert app.input_container.pending_prompt == "New cost limit:"

await type_text(pilot, "also_invalid")
await pilot.press("enter")
await pilot.pause(0.2)

assert app.agent.config.step_limit == 0
assert app.agent.config.cost_limit == 0.0


async def test_agent_with_step_limit():
"""Test agent behavior when step limit is exceeded - now prompts for new limits."""
app = TextualAgent(
model=DeterministicModel(outputs=["Response 1", "Response 2", "Response 3", "Response 4"]),
env=LocalEnvironment(),
mode="yolo",
step_limit=2,
)

app.notify = Mock()
async with app.run_test() as pilot:
# Start the agent with the task
threading.Thread(target=lambda: app.agent.run("Step limit test"), daemon=True).start()

# Wait for agent to hit step limit and request input for new limits
for _ in range(50):
await pilot.pause(0.1)
if app.agent_state == "STOPPED":
if app.agent_state == "AWAITING_INPUT" and app.input_container.pending_prompt == "New step limit:":
break
else:
raise AssertionError("Agent did not stop within 5 seconds")
assert app.agent_state == "STOPPED"
app.notify.assert_called_with("Agent finished with status: LimitsExceeded")
raise AssertionError("Agent did not request new step limit within 5 seconds")

# Verify notification was shown for limits exceeded
app.notify.assert_any_call(
ANY, # The message contains dynamic values
severity="warning",
timeout=10,
)

# Simulate user entering new limits
await pilot.press("1", "0") # step limit = 10
await pilot.press("enter")
await pilot.pause(0.2)

# Now it should ask for cost limit
assert app.input_container.pending_prompt == "New cost limit:"

await pilot.press("1", "0") # cost limit = 10
await pilot.press("enter")
await pilot.pause(0.2)

# Agent should continue running with new limits
assert app.agent_state == "RUNNING" or app.agent_state == "AWAITING_INPUT"


async def test_whitelist_actions_bypass_confirmation():
Expand Down