diff --git a/sidecar/src/agentic/tool/code_edit/types.rs b/sidecar/src/agentic/tool/code_edit/types.rs index 300b9c9f0..20bbda49b 100644 --- a/sidecar/src/agentic/tool/code_edit/types.rs +++ b/sidecar/src/agentic/tool/code_edit/types.rs @@ -63,6 +63,27 @@ impl CodeEditingPartialRequest { self.fs_file_path, self.instruction ) } + + pub fn to_json() -> serde_json::Value { + serde_json::json!({ + "name": "code_edit_input", + "description": r#"Edit a file. The tool is able to edit the file precisely based on instruction. If the file doesn't exist, it will be CREATED. The tool will automatically CREATE any directories needed to write the file. BE CONCISE AND DIRECT, DO NOT BE VERBOSE IN YOUR CODEBLOCKS and only give an overview of the changes."#, + "input_schema": { + "type": "object", + "properties": { + "fs_file_path": { + "type": "string", + "description": "(required) The ABSOLUTE path of the file to write to, will be created if not already present." + }, + "instruction": { + "type": "string", + "description": "(required) The edit instruction, if you are going to output code blocks make sure they are properly placed in ```{{language}} blocks and extensively use `rest of the code` and `...` placeholders, the goal is to be concise.\nOnly given instructions here which are concise and contain the relevant changes, DO NOT BE VERBOSE, BE CONCISE AND DIRECT.", + } + }, + "required": ["fs_file_path", "instruction"], + }, + }) + } } #[derive(Clone, Debug)] @@ -494,7 +515,7 @@ impl Tool for CodeEditingTool { fn tool_description(&self) -> String { "### code_edit_input -Edit a file. The tool is able to edit the file precisely based on instruction. If the file doesn't exist, it will be CREATED. The tool will automatically CREATE any directories needed to write the file. BE CONCISE AND DIRECT, DO NOT BE VERBOSE IN YOUR CODEBLOCKS and only give an overview of the chagnes.".to_owned() +Edit a file. The tool is able to edit the file precisely based on instruction. If the file doesn't exist, it will be CREATED. The tool will automatically CREATE any directories needed to write the file. BE CONCISE AND DIRECT, DO NOT BE VERBOSE IN YOUR CODEBLOCKS and only give an overview of the changes.".to_owned() } fn tool_input_format(&self) -> String { diff --git a/sidecar/src/agentic/tool/input.rs b/sidecar/src/agentic/tool/input.rs index 9d0ee9254..638a0be83 100644 --- a/sidecar/src/agentic/tool/input.rs +++ b/sidecar/src/agentic/tool/input.rs @@ -207,7 +207,7 @@ impl ToolInputPartial { pub fn to_json(tool_type: ToolType) -> Option { match tool_type { - ToolType::CodeEditing => None, + ToolType::CodeEditing => Some(CodeEditingPartialRequest::to_json()), ToolType::ListFiles => Some(ListFilesInputPartial::to_json()), ToolType::SearchFileContentWithRegex => Some(SearchFileContentInputPartial::to_json()), ToolType::OpenFile => Some(OpenFileRequestPartial::to_json()), diff --git a/sidecar/src/agentic/tool/session/service.rs b/sidecar/src/agentic/tool/session/service.rs index d978b8f72..6dfce5078 100644 --- a/sidecar/src/agentic/tool/session/service.rs +++ b/sidecar/src/agentic/tool/session/service.rs @@ -434,16 +434,21 @@ impl SessionService { ) .set_context_crunching_llm(context_crunching_llm.clone()); - session = session - .human_message_tool_use( - exchange_id.to_owned(), - user_message.to_owned(), - all_files, - open_files, - shell.to_owned(), - user_context.clone(), - ) - .await; + // only when it is json mode that we switch the human message + if tool_agent.is_json_mode() { + session = session.pr_description(exchange_id.to_owned(), user_message.to_owned()); + } else { + session = session + .human_message_tool_use( + exchange_id.to_owned(), + user_message.to_owned(), + all_files, + open_files, + shell.to_owned(), + user_context.clone(), + ) + .await; + } let _ = self .save_to_storage(&session, mcts_log_directory.clone()) .await; diff --git a/sidecar/src/agentic/tool/session/session.rs b/sidecar/src/agentic/tool/session/session.rs index c32839348..7bc58f530 100644 --- a/sidecar/src/agentic/tool/session/session.rs +++ b/sidecar/src/agentic/tool/session/session.rs @@ -1006,6 +1006,33 @@ impl Session { self } + // creates a message which tells the agent that this is a PR description + pub fn pr_description(mut self, exchange_id: String, human_message: String) -> Session { + let user_message = format!( + r#" +{human_message} +"# + ); + + // add the action node + let mut action_node = ActionNode::default_with_index(self.exchanges()); + action_node = action_node + .set_message(human_message) + .update_user_context(UserContext::default()); + self.action_nodes.push(action_node); + + // push the exchange + let exchange = Exchange::human_chat( + exchange_id, + user_message, + UserContext::default(), + self.project_labels.to_vec(), + self.repo_ref.clone(), + ); + self.exchanges.push(exchange); + self + } + pub async fn human_message_tool_use( mut self, exchange_id: String, @@ -1332,8 +1359,9 @@ impl Session { message_properties: SymbolEventMessageProperties, ) -> Result { let mut converted_messages = vec![]; + let is_json_mode = tool_use_agent.is_json_mode(); for previous_message in self.exchanges.iter() { - let converted_message = previous_message.to_conversation_message(false).await; + let converted_message = previous_message.to_conversation_message(is_json_mode).await; if let Some(converted_message) = converted_message { converted_messages.push(converted_message); } diff --git a/sidecar/src/agentic/tool/session/tool_use_agent.rs b/sidecar/src/agentic/tool/session/tool_use_agent.rs index 1d921e19d..60b5de97a 100644 --- a/sidecar/src/agentic/tool/session/tool_use_agent.rs +++ b/sidecar/src/agentic/tool/session/tool_use_agent.rs @@ -40,6 +40,7 @@ use crate::{ session::chat::SessionChatRole, terminal::terminal::TerminalInputPartial, test_runner::runner::TestRunnerRequestPartial, + thinking::thinking::ThinkingPartialInput, }, }, mcts::action_node::ActionNode, @@ -348,7 +349,7 @@ pub struct ToolUseAgentProperties { shell: String, // keeping this disabled for now while we write out the prompts and run a few // evals on this to measure how the performance is - _thinking: AgentThinkingMode, + thinking: AgentThinkingMode, // if the current agent is running under a eval harness, this helps tune the system // prompt for the agent appropriately is_eval_run: bool, @@ -369,7 +370,7 @@ impl ToolUseAgentProperties { in_editor, shell, is_eval_run, - _thinking: thinking, + thinking, repo_name, aide_rules, } @@ -404,6 +405,14 @@ impl ToolUseAgent { } } + // should use json mode for tool calling + pub fn is_json_mode(&self) -> bool { + // right now gate it behind an eval run and only when we are doing + // tool based thinking: we provide think as a tool to the agent + self.properties.is_eval_run + && matches!(&self.properties.thinking, AgentThinkingMode::ToolBased) + } + /// Update the temperature for the tool use agent pub fn set_temperature(mut self, temperature: f32) -> Self { self.temperature = temperature; @@ -1398,6 +1407,20 @@ You accomplish a given task iteratively, breaking it down into clear steps and w SymbolError::ToolError(ToolError::SerdeConversionFailed) })?, ), + "code_edit_input" => ToolInputPartial::CodeEditing( + serde_json::from_str::(&tool_input).map_err( + |e| { + println!("code_edit_input::error::{:?}", e); + SymbolError::ToolError(ToolError::SerdeConversionFailed) + }, + )?, + ), + "Think" => ToolInputPartial::Thinking( + serde_json::from_str::(&tool_input).map_err(|e| { + println!("think::error::{:?}", e); + SymbolError::ToolError(ToolError::SerdeConversionFailed) + })?, + ), _ => { println!("unknow tool found: {}", tool_type); return Err(SymbolError::WrongToolOutput); diff --git a/sidecar/src/bin/swe_bench_agent_bin_tool_based.rs b/sidecar/src/bin/swe_bench_agent_bin_tool_based.rs new file mode 100644 index 000000000..da34bbd5a --- /dev/null +++ b/sidecar/src/bin/swe_bench_agent_bin_tool_based.rs @@ -0,0 +1,247 @@ +use std::{path::PathBuf, sync::Arc}; + +/// This contains the binary responsible for running the agents as a farm +/// Dead simple where the inputs are the input to the git repository containing the input +/// and the problem statement, keeping it super simple and limited +use clap::Parser; +use llm_client::{ + clients::types::LLMType, + provider::{AnthropicAPIKey, LLMProvider, LLMProviderAPIKeys}, +}; +use sidecar::{ + agentic::{ + symbol::{ + events::{input::SymbolEventRequestId, message_event::SymbolEventMessageProperties}, + identifier::LLMProperties, + }, + tool::{ + r#type::ToolType, + session::tool_use_agent::{AgentThinkingMode, ToolUseAgentProperties}, + }, + }, + application::{application::Application, config::configuration::Configuration}, + repo::types::RepoRef, + user_context::types::UserContext, +}; + +pub async fn check_session_storage_path(config: Arc, session_id: String) -> String { + let mut session_path = config.index_dir.clone(); + session_path = session_path.join("session"); + // check if the plan_storage_path_exists + if tokio::fs::metadata(&session_path).await.is_err() { + tokio::fs::create_dir(&session_path) + .await + .expect("directory creation to not fail"); + } + session_path = session_path.join(session_id); + session_path + .to_str() + .expect("path conversion to work on all platforms") + .to_owned() +} + +/// Define the command-line arguments +#[derive(Parser, Debug)] +#[command( + author = "skcd", + version = "1.0", + about = "Agent binary sidecar runner" +)] +struct CliArgs { + /// Git directory name + #[arg(long)] + timeout: usize, + + /// Endpoint URL + #[arg(long)] + editor_url: String, + + /// Timeout in seconds + #[arg(long)] + input: PathBuf, + + /// Anthropic api key + #[arg(long, default_value = None)] + anthropic_api_key: String, + + /// OPen Router api key + #[arg(long, default_value = None)] + openrouter_api_key: Option, + + /// The run id for the current run + #[arg(long)] + run_id: String, + + #[arg(long)] + repo_name: String, + + /// Directory to dump all the logs into + #[arg(long)] + log_directory: String, + + /// Use json mode strictly + #[arg(long, default_value = "true")] + json_mode: bool, + + /// Use midwit mode (aka sonnet3.5 with tool) + #[arg(long, default_value = "true")] + midwit_mode: bool, + + /// Run in single trajectory but a lot of them + #[arg(long, default_value = None)] + single_traj_search: Option, + + /// Maximum depth for the search tree + #[arg(long, default_value = "30")] + max_depth: u32, + + /// Model name override + #[arg(long)] + model_name: Option, +} + +/// Define the SWEbenchInstance struct for serialization +#[derive(Debug, serde::Serialize, serde::Deserialize)] +struct SWEbenchInstance { + repo: String, + instance_id: String, + base_commit: String, + patch: String, + test_patch: String, + problem_statement: String, + hints_text: String, + created_at: String, + version: String, + #[serde(rename = "FAIL_TO_PASS")] + fail_to_pass: String, + #[serde(rename = "PASS_TO_PASS")] + pass_to_pass: String, + environment_setup_commit: String, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize)] +struct InputParts { + git_drname: String, + instance: SWEbenchInstance, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + println!("agent::start"); + let args = CliArgs::parse(); + eprintln!("run_id::{}", &args.run_id); + + let mut configuration = Configuration::default(); + // we apply the edits directly over here + configuration.apply_directly = true; + + // setup the application + Application::install_logging(&configuration); + Application::setup_scratch_pad(&configuration).await; + + let application = Application::initialize(configuration) + .await + .expect("application setup should work"); + let exchange_id = "0".to_owned(); + + let llm_model = if let Some(model_name) = args.model_name { + LLMType::Custom(model_name) + } else { + LLMType::ClaudeSonnet3_7 + }; + + let llm_provider = LLMProperties::new( + llm_model, + LLMProvider::Anthropic, + LLMProviderAPIKeys::Anthropic(AnthropicAPIKey::new(args.anthropic_api_key.to_owned())), + ); + // Define context crunching LLM properties - using the same model as the main agent for now + let _context_crunching_llm = Some(llm_provider.clone()); + let cancellation_token = tokio_util::sync::CancellationToken::new(); + let (sender, _receiver) = tokio::sync::mpsc::unbounded_channel(); + let message_properties = SymbolEventMessageProperties::new( + SymbolEventRequestId::new("0".to_owned(), args.run_id.to_owned()), + sender.clone(), + args.editor_url.clone(), + cancellation_token.clone(), + llm_provider, + ); + + let session_storage_path = + check_session_storage_path(application.config.clone(), args.run_id.clone()).await; + + let session_service = application.session_service.clone(); + + let input_path = args.input; + let input_content = tokio::fs::read(input_path).await.expect("path content"); + let input_parts: InputParts = + serde_json::from_slice(&input_content).expect("Parse the serde json"); + + let cloned_session_id = args.run_id.to_string(); + let user_message = input_parts.instance.problem_statement.clone(); + let cloned_working_directory = input_parts.git_drname.to_owned(); + let tool_box = application.tool_box.clone(); + let llm_broker = application.llm_broker.clone(); + + let aide_rules = Some(format!( + r#"You are helping the user in the repository present in {} +FOLLOW these steps to resolve the issue: +1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure. +2. Create a script to reproduce the error and execute it with `python reproduce_error.py` using the execute_command (which uses bash internally), to confirm the error. You should always use `python reproduce_error.py` command exactly to run the reproduction error script. +3. Edit the sourcecode of the repo to resolve the issue +4. Rerun your reproduce script and confirm that the error is fixed! + +Your thinking should be thorough and so it's fine if it's very long."#, + args.repo_name, + )); + + // the default tools which are present to the agent + let tools = vec![ + ToolType::ListFiles, + ToolType::SearchFileContentWithRegex, + ToolType::OpenFile, + ToolType::CodeEditing, + ToolType::AttemptCompletion, + ToolType::TerminalCommand, + ToolType::FindFiles, + ToolType::Think, + ]; + + let tool_use_agent_properties = ToolUseAgentProperties::new( + false, + "bash".to_owned(), + AgentThinkingMode::MiniCOTBeforeTool, + true, // is running under eval harness + args.repo_name.to_owned(), + aide_rules.clone(), + ); + + // wait for the agent to finish over here while busy looping + println!("agent::tool_use::start"); + let _ = session_service + .tool_use_agentic( + cloned_session_id, + session_storage_path, + user_message, + exchange_id, + vec![], + vec![], + "bash".to_owned(), + vec![], + RepoRef::local(&cloned_working_directory).expect("repo_ref to work"), + cloned_working_directory, + tools, + tool_box, + llm_broker, + UserContext::default(), + false, + false, + Some(args.log_directory.clone()), + tool_use_agent_properties, + message_properties, + None, // No context crunching LLM for agent_bin + ) + .await; + println!("agent::tool_use::end"); + Ok(()) +}