diff --git a/agent/prototyper.py b/agent/prototyper.py
index 71d9b5f359..75750d1295 100644
--- a/agent/prototyper.py
+++ b/agent/prototyper.py
@@ -17,6 +17,7 @@
import copy
import os
import subprocess as sp
+import sys
import time
from datetime import timedelta
from typing import Optional
@@ -237,8 +238,55 @@ def _generate_prompt_from_build_result(
cur_round,
trial=build_result.trial)
return build_result_ori, None
-
- # Case 2: Binary exits, meaning not referencing function-under-test.
+
+ # case 2: Arbitrary manual prompt, disregarding any build result
+ manual_prompt = getattr(self.args, 'interactive_debug', False)
+ compile_log = self.llm.truncate_prompt(build_result.compile_log,
+ extra_text=prompt.get()).strip()
+ if manual_prompt:
+ # We trigger interactive debug mode when:
+ # (1) --interactive_debug is enabled
+ # (2) The LLM's latest response fails to produce a valid fuzz target
+ # (3) After the initial prompt from Prototyper
+ manual_text = ''
+ if (not (build_result.compiles and build_result.binary_exists and build_result.is_function_referenced)
+ and cur_round >= 1):
+
+ logger.info("INTERACTIVE DEBUG MODE: Prompting user after failed build",
+ trial=build_result.trial)
+
+ print("\n=== Interactive Debug Prompt Mode ===", flush=True)
+ print(f"Project: {build_result.benchmark.project}", flush=True)
+ print(f"Function: {build_result.benchmark.function_signature}", flush=True)
+ print("--- STDERR from last compilation ---", flush=True)
+ print(build_result.compile_error or "(no stderr)", flush=True)
+ print("------------------------------------", flush=True)
+ print("You may now enter a custom prompt for the LLM.", flush=True)
+ print("When finished:", flush=True)
+ print(" - Press Enter, then Ctrl+D (Linux/macOS)", flush=True)
+ print(" - Or press Ctrl+Z then Enter (Windows)", flush=True)
+ print("======================================\n", flush=True)
+
+ manual_lines = []
+ try:
+ manual_lines = sys.stdin.read().splitlines()
+ except EOFError:
+ manual_lines = []
+
+ manual_text = "\n".join(manual_lines).strip()
+ builder = prompt_builder.PrototyperManualFixerTemplateBuilder(
+ model=self.llm,
+ benchmark=build_result.benchmark,
+ build_result=build_result,
+ compile_log=compile_log,
+ initial=prompt.get(),
+ manual_text = manual_text)
+ prompt = builder.build(example_pair=[],
+ project_dir=self.inspect_tool.project_dir)
+ logger.info("====== Custom Prompt Entered by User ======", trial=build_result.trial)
+ return build_result, prompt
+
+ # Case 3: Binary exits, meaning not referencing function-under-test.
function_signature = build_result.benchmark.function_signature
fuzz_target_source = build_result.fuzz_target_source
build_script_source = build_result.build_script_source
@@ -303,7 +351,7 @@ def _generate_prompt_from_build_result(
prompt.append(prompt_text)
return build_result_ori, prompt
- # Case 3: Compiles, meaning the binary is not saved.
+ # Case 4: Compiles, meaning the binary is not saved.
binary_path = os.path.join('/out', build_result.benchmark.target_name)
if (build_result_ori and build_result_ori.compiles and
build_result_ori.build_script_source):
diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py
index 73c2ca6302..f0ac82b812 100644
--- a/llm_toolkit/prompt_builder.py
+++ b/llm_toolkit/prompt_builder.py
@@ -667,6 +667,81 @@ def build(self,
return self._prompt
+class PrototyperManualFixerTemplateBuilder(PrototyperTemplateBuilder):
+ """Builder specifically targeted C (and excluding C++)."""
+
+ def __init__(self,
+ model: models.LLM,
+ benchmark: Benchmark,
+ build_result: BuildResult,
+ compile_log: str,
+ template_dir: str = DEFAULT_TEMPLATE_DIR,
+ initial: Any = None,
+ manual_text: str = ''):
+ super().__init__(model, benchmark, template_dir, initial)
+ # Load templates.
+ self.priming_template_file = self._find_template(self.agent_templare_dir,
+ 'prototyper-manual-fixing.txt')
+ self.build_result = build_result
+ self.compile_log = compile_log
+ self.manual_text = manual_text
+
+
+ def build(self,
+ example_pair: list[list[str]],
+ project_example_content: Optional[list[list[str]]] = None,
+ project_context_content: Optional[dict] = None,
+ tool_guides: str = '',
+ project_dir: str = '') -> prompts.Prompt:
+ """Constructs a prompt using the templates in |self| and saves it."""
+ del (example_pair, project_example_content, project_context_content, tool_guides)
+
+ if not self.benchmark:
+ return self._prompt
+
+ function_signature = self.benchmark.function_signature
+ binary_path = os.path.join('/out', self.benchmark.target_name)
+
+ # Dynamically decide the first line of the prompt
+ if not self.build_result.compiles:
+ intro_line = (
+ "Build failed. Below is the fuzz target, build script, "
+ "compilation command, and error log."
+ )
+ elif self.build_result.compiles and not self.build_result.binary_exists:
+ intro_line = (
+ f"The fuzz target compiled successfully, but the expected binary "
+ f"was not saved at `{binary_path}`. Please ensure the output binary is "
+ "correctly placed in the `/out` directory."
+ )
+ elif not self.build_result.is_function_referenced:
+ intro_line = (
+ f"The fuzz target compiled and was saved to `{binary_path}`, but the "
+ f"function-under-test `{function_signature}` was not invoked in "
+ "`LLVMFuzzerTestOneInput`. Please modify the fuzz target to call it."
+ )
+ else:
+ intro_line = (
+ f"Build and compilation completed, but an issue still exists. "
+ f"Please review the target, output binary at `{binary_path}`, and logs."
+ )
+
+ if self.build_result.build_script_source:
+ build_text = (f'\n{self.build_result.build_script_source}\n'
+ '')
+ else:
+ build_text = 'Build script reuses `/src/build.bk.sh`.'
+
+ prompt = self._get_template(self.priming_template_file)
+ prompt = prompt.replace('{INTRO_LINE}', intro_line)
+ prompt = prompt.replace('{FUZZ_TARGET_SOURCE}', self.build_result.fuzz_target_source)
+ prompt = prompt.replace('{BUILD_TEXT}', build_text)
+ prompt = prompt.replace('{COMPILE_LOG}', self.compile_log)
+ prompt = prompt.replace('{FUNCTION_SIGNATURE}', function_signature)
+ prompt = prompt.replace('{PROJECT_DIR}', project_dir)
+ prompt = prompt.replace('{MANUAL_PROMPT}', self.manual_text)
+ self._prompt.append(prompt)
+ return self._prompt
class CoverageAnalyzerTemplateBuilder(PrototyperTemplateBuilder):
"""Builder specifically targeted C (and excluding C++)."""
diff --git a/prompts/agent/prototyper-manual-fixing.txt b/prompts/agent/prototyper-manual-fixing.txt
new file mode 100644
index 0000000000..51baa02fae
--- /dev/null
+++ b/prompts/agent/prototyper-manual-fixing.txt
@@ -0,0 +1,19 @@
+{INTRO_LINE}
+\n{FUZZ_TARGET_SOURCE}\n
+{BUILD_TEXT}
+\n{COMPILE_LOG}\n
+USER INSTRUCTIONS:
+{MANUAL_PROMPT}
+SYSTEM INSTRUCTIONS:
+YOU ALSO MUST analyze the error messages with the fuzz target and the build script carefully to identify the root cause.
+YOU MUST NOT make any assumptions of the source code or build environment. Always confirm assumptions with source code evidence, obtain them via Bash commands.
+Once you are absolutely certain of the error root cause, output the FULL SOURCE CODE of the fuzz target (and FULL SOURCE CODE of build script, if /src/build.bk.sh is insufficient).
+TIPS:
+1. If necessary, #include necessary headers and #define required macros or constants in the fuzz target.
+2. Adjust compiler flags to link required libraries in the build script.
+3. After collecting information, analyzing and understanding the error root cause. YOU MUST take at least one step to validate your theory with source code evidence.
+4. Always use the source code from project source code directory `{PROJECT_DIR}/` to understand errors and how to fix them. For example, search for the key words (e.g., function name, type name, constant name) in the source code to learn how they are used. Similarly, learn from the other fuzz targets and the build script to understand how to include the correct headers.
+5. Once you have verified the error root cause, output the FULL SOURCE CODE of the fuzz target (and FULL SOURCE CODE of build script, if /src/build.bk.sh is insufficient).
+6. Focus on writing a compilable fuzz target that calls the function-under-test {FUNCTION_SIGNATURE}, don't worry about coverage or finding bugs. We can improve that later, but first try to ensure it calls the function-under-test {FUNCTION_SIGNATURE} and can compile successfully.
+7. If an error happens repeatedly and cannot be fixed, try to mitigate it. For example, replace or remove the line.
+
diff --git a/run_all_experiments.py b/run_all_experiments.py
index 5568d4f55f..cb75d47df7 100755
--- a/run_all_experiments.py
+++ b/run_all_experiments.py
@@ -257,6 +257,10 @@ def parse_args() -> argparse.Namespace:
type=int,
default=100,
help='Max trial round for agents.')
+ parser.add_argument(
+ '--interactive_debug',
+ action='store_true',
+ help='Pause after build failure for manual prompt input (works best with LLM_NUM_EXP=1).')
args = parser.parse_args()
if args.num_samples: