Skip to content

Add an interactive debug mode to protypyter, that accepts user input to prompt #1129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 51 additions & 3 deletions agent/prototyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import copy
import os
import subprocess as sp
import sys
import time
from datetime import timedelta
from typing import Optional
Expand Down Expand Up @@ -237,8 +238,55 @@ def _generate_prompt_from_build_result(
cur_round,
trial=build_result.trial)
return build_result_ori, None

# Case 2: Binary exits, meaning not referencing function-under-test.

# case 2: Arbitrary manual prompt, disregarding any build result
manual_prompt = getattr(self.args, 'interactive_debug', False)
compile_log = self.llm.truncate_prompt(build_result.compile_log,
extra_text=prompt.get()).strip()
if manual_prompt:
# We trigger interactive debug mode when:
# (1) --interactive_debug is enabled
# (2) The LLM's latest response fails to produce a valid fuzz target
# (3) After the initial prompt from Prototyper
manual_text = ''
if (not (build_result.compiles and build_result.binary_exists and build_result.is_function_referenced)
and cur_round >= 1):

logger.info("INTERACTIVE DEBUG MODE: Prompting user after failed build",
trial=build_result.trial)

print("\n=== Interactive Debug Prompt Mode ===", flush=True)
print(f"Project: {build_result.benchmark.project}", flush=True)
print(f"Function: {build_result.benchmark.function_signature}", flush=True)
print("--- STDERR from last compilation ---", flush=True)
print(build_result.compile_error or "(no stderr)", flush=True)
print("------------------------------------", flush=True)
print("You may now enter a custom prompt for the LLM.", flush=True)
print("When finished:", flush=True)
print(" - Press Enter, then Ctrl+D (Linux/macOS)", flush=True)
print(" - Or press Ctrl+Z then Enter (Windows)", flush=True)
print("======================================\n", flush=True)

manual_lines = []
try:
manual_lines = sys.stdin.read().splitlines()
except EOFError:
manual_lines = []

manual_text = "\n".join(manual_lines).strip()
builder = prompt_builder.PrototyperManualFixerTemplateBuilder(
model=self.llm,
benchmark=build_result.benchmark,
build_result=build_result,
compile_log=compile_log,
initial=prompt.get(),
manual_text = manual_text)
prompt = builder.build(example_pair=[],
project_dir=self.inspect_tool.project_dir)
logger.info("====== Custom Prompt Entered by User ======", trial=build_result.trial)
return build_result, prompt

# Case 3: Binary exits, meaning not referencing function-under-test.
function_signature = build_result.benchmark.function_signature
fuzz_target_source = build_result.fuzz_target_source
build_script_source = build_result.build_script_source
Expand Down Expand Up @@ -303,7 +351,7 @@ def _generate_prompt_from_build_result(
prompt.append(prompt_text)
return build_result_ori, prompt

# Case 3: Compiles, meaning the binary is not saved.
# Case 4: Compiles, meaning the binary is not saved.
binary_path = os.path.join('/out', build_result.benchmark.target_name)
if (build_result_ori and build_result_ori.compiles and
build_result_ori.build_script_source):
Expand Down
75 changes: 75 additions & 0 deletions llm_toolkit/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,81 @@ def build(self,

return self._prompt

class PrototyperManualFixerTemplateBuilder(PrototyperTemplateBuilder):
"""Builder specifically targeted C (and excluding C++)."""

def __init__(self,
model: models.LLM,
benchmark: Benchmark,
build_result: BuildResult,
compile_log: str,
template_dir: str = DEFAULT_TEMPLATE_DIR,
initial: Any = None,
manual_text: str = ''):
super().__init__(model, benchmark, template_dir, initial)
# Load templates.
self.priming_template_file = self._find_template(self.agent_templare_dir,
'prototyper-manual-fixing.txt')
self.build_result = build_result
self.compile_log = compile_log
self.manual_text = manual_text


def build(self,
example_pair: list[list[str]],
project_example_content: Optional[list[list[str]]] = None,
project_context_content: Optional[dict] = None,
tool_guides: str = '',
project_dir: str = '') -> prompts.Prompt:
"""Constructs a prompt using the templates in |self| and saves it."""
del (example_pair, project_example_content, project_context_content, tool_guides)

if not self.benchmark:
return self._prompt

function_signature = self.benchmark.function_signature
binary_path = os.path.join('/out', self.benchmark.target_name)

# Dynamically decide the first line of the prompt
if not self.build_result.compiles:
intro_line = (
"Build failed. Below is the fuzz target, build script, "
"compilation command, and error log."
)
elif self.build_result.compiles and not self.build_result.binary_exists:
intro_line = (
f"The fuzz target compiled successfully, but the expected binary "
f"was not saved at `{binary_path}`. Please ensure the output binary is "
"correctly placed in the `/out` directory."
)
elif not self.build_result.is_function_referenced:
intro_line = (
f"The fuzz target compiled and was saved to `{binary_path}`, but the "
f"function-under-test `{function_signature}` was not invoked in "
"`LLVMFuzzerTestOneInput`. Please modify the fuzz target to call it."
)
else:
intro_line = (
f"Build and compilation completed, but an issue still exists. "
f"Please review the target, output binary at `{binary_path}`, and logs."
)

if self.build_result.build_script_source:
build_text = (f'<build script>\n{self.build_result.build_script_source}\n'
'</build script>')
else:
build_text = 'Build script reuses `/src/build.bk.sh`.'

prompt = self._get_template(self.priming_template_file)
prompt = prompt.replace('{INTRO_LINE}', intro_line)
prompt = prompt.replace('{FUZZ_TARGET_SOURCE}', self.build_result.fuzz_target_source)
prompt = prompt.replace('{BUILD_TEXT}', build_text)
prompt = prompt.replace('{COMPILE_LOG}', self.compile_log)
prompt = prompt.replace('{FUNCTION_SIGNATURE}', function_signature)
prompt = prompt.replace('{PROJECT_DIR}', project_dir)
prompt = prompt.replace('{MANUAL_PROMPT}', self.manual_text)
self._prompt.append(prompt)
return self._prompt

class CoverageAnalyzerTemplateBuilder(PrototyperTemplateBuilder):
"""Builder specifically targeted C (and excluding C++)."""
Expand Down
19 changes: 19 additions & 0 deletions prompts/agent/prototyper-manual-fixing.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{INTRO_LINE}
<fuzz target>\n{FUZZ_TARGET_SOURCE}\n</fuzz target>
{BUILD_TEXT}
<compilation log>\n{COMPILE_LOG}\n</compilation log>
USER INSTRUCTIONS:
{MANUAL_PROMPT}
SYSTEM INSTRUCTIONS:
YOU ALSO MUST analyze the error messages with the fuzz target and the build script carefully to identify the root cause.
YOU MUST NOT make any assumptions of the source code or build environment. Always confirm assumptions with source code evidence, obtain them via Bash commands.
Once you are absolutely certain of the error root cause, output the FULL SOURCE CODE of the fuzz target (and FULL SOURCE CODE of build script, if /src/build.bk.sh is insufficient).
TIPS:
1. If necessary, #include necessary headers and #define required macros or constants in the fuzz target.
2. Adjust compiler flags to link required libraries in the build script.
3. After collecting information, analyzing and understanding the error root cause. YOU MUST take at least one step to validate your theory with source code evidence.
4. Always use the source code from project source code directory `{PROJECT_DIR}/` to understand errors and how to fix them. For example, search for the key words (e.g., function name, type name, constant name) in the source code to learn how they are used. Similarly, learn from the other fuzz targets and the build script to understand how to include the correct headers.
5. Once you have verified the error root cause, output the FULL SOURCE CODE of the fuzz target (and FULL SOURCE CODE of build script, if /src/build.bk.sh is insufficient).
6. Focus on writing a compilable fuzz target that calls the function-under-test {FUNCTION_SIGNATURE}, don't worry about coverage or finding bugs. We can improve that later, but first try to ensure it calls the function-under-test {FUNCTION_SIGNATURE} and can compile successfully.
7. If an error happens repeatedly and cannot be fixed, try to mitigate it. For example, replace or remove the line.

4 changes: 4 additions & 0 deletions run_all_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,10 @@ def parse_args() -> argparse.Namespace:
type=int,
default=100,
help='Max trial round for agents.')
parser.add_argument(
'--interactive_debug',
action='store_true',
help='Pause after build failure for manual prompt input (works best with LLM_NUM_EXP=1).')

args = parser.parse_args()
if args.num_samples:
Expand Down