Fix/index error (#4045)

sweepai · Jun 19, 2024 · 3ed7a92 · 3ed7a92
2 parents 0ddb7c3 + 90aeeb1
commit 3ed7a92
Show file tree

Hide file tree

Showing 8 changed files with 283 additions and 259 deletions.
diff --git a/sweepai/agents/modify.py b/sweepai/agents/modify.py
@@ -19,7 +19,10 @@ def generate_code_suggestions(
     fcrs: list[FileChangeRequest],
     error_messages_dict: dict[int, str],
 ) -> list[StatefulCodeSuggestion]:
-    modify_order = [fcr.filename for fcr in fcrs]
+    modify_order = []
+    for fcr in fcrs:
+        if fcr.filename not in modify_order:
+            modify_order.append(fcr.filename)
 
     code_suggestions = []
     for file_path in modify_order:
@@ -59,7 +62,6 @@ def modify(
     use_openai: bool = False,
     previous_modify_files_dict: dict[str, dict[str, str]] = {},
     renames_dict: dict[str, str] = {},
-    fast: bool = False,
     raise_on_max_iterations: bool = False,
 ) -> dict[str, dict[str, str]]:
     # join fcr in case of duplicates
@@ -158,7 +160,6 @@ def modify(
                 llm_state,
                 chat_logger_messages=detailed_chat_logger_messages,
                 use_openai=use_openai,
-                fast=fast
             )
             print(function_output)
             fcrs = llm_state["fcrs"]

diff --git a/sweepai/agents/modify_utils.py b/sweepai/agents/modify_utils.py
@@ -989,7 +989,6 @@ def handle_function_call(
     llm_state: dict,
     chat_logger_messages: list[dict[str, str]] | None = None,
     use_openai: bool = False,
-    fast: bool = False,
 ):
     llm_response = ""
     tool_name = function_call.function_name
@@ -1153,13 +1152,9 @@ def handle_function_call(
                 # Check if the changes are valid
                 if not error_message:
                     is_last_fcr_for_file = False # TODO: check if this is the last fcr for this file
-                    if fast:
-                        check_results_message = ""
-                        failing_parse = ""
-                    else:
-                        check_results = get_check_results(file_name, new_file_contents, last_fcr_for_file=is_last_fcr_for_file)
-                        check_results_message = check_results.is_worse_than_message(llm_state['initial_check_results'][file_name])
-                        failing_parse = check_results.parse_error_message if not llm_state['initial_check_results'][file_name].parse_error_message else ""
+                    check_results = get_check_results(file_name, new_file_contents, last_fcr_for_file=is_last_fcr_for_file)
+                    check_results_message = check_results.is_worse_than_message(llm_state['initial_check_results'][file_name])
+                    failing_parse = check_results.parse_error_message if not llm_state['initial_check_results'][file_name].parse_error_message else ""
                     current_diff = generate_diff(
                         file_contents, new_file_contents, n=10
                     )
@@ -1212,6 +1207,7 @@ def handle_function_call(
                     "contents": file_contents,
                     "original_contents": file_contents,
                 }
+            llm_state["fcrs"][current_fcr_index].is_completed = True
             if warning_message:
                 original_code_indents = len(original_code) - len(original_code.lstrip())
                 new_code_indents = len(new_code) - len(new_code.lstrip())

diff --git a/sweepai/chat/api.py b/sweepai/chat/api.py
@@ -17,10 +17,10 @@
 
 from sweepai.agents.modify_utils import validate_and_parse_function_call
 from sweepai.agents.search_agent import extract_xml_tag
-from sweepai.chat.search_prompts import relevant_snippets_message, relevant_snippet_template, anthropic_system_message, function_response, anthropic_format_message, pr_format, relevant_snippets_message_for_pr, openai_format_message, openai_system_message
+from sweepai.chat.search_prompts import relevant_snippets_message, relevant_snippet_template, anthropic_system_message, function_response, anthropic_format_message, pr_format, relevant_snippets_message_for_pr, openai_format_message, openai_system_message, query_optimizer_system_prompt, query_optimizer_user_prompt
 from sweepai.config.client import SweepConfig
 from sweepai.config.server import CACHE_DIRECTORY, GITHUB_APP_ID, GITHUB_APP_PEM
-from sweepai.core.chat import ChatGPT
+from sweepai.core.chat import ChatGPT, call_llm
 from sweepai.core.entities import FileChangeRequest, Message, Snippet
 from sweepai.core.pull_request_bot import get_pr_summary_for_chat
 from sweepai.core.review_utils import split_diff_into_patches
@@ -353,7 +353,8 @@ def wrapped_search_codebase(
     for message, snippets in search_codebase.stream(
         repo_name,
         query,
-        access_token
+        access_token,
+        use_optimized_query=not bool(annotations),
     ):
         yield message, snippets
 
@@ -362,6 +363,7 @@ def search_codebase(
     repo_name: str,
     query: str,
     access_token: str,
+    use_optimized_query: bool = True,
 ):
     with Timer() as timer:
         org_name, repo = repo_name.split("/")
@@ -371,13 +373,27 @@ def search_codebase(
             print(f"Cloned {repo_name} to {repo_cache}/{repo}")
         cloned_repo = MockClonedRepo(f"{repo_cache}/{repo}", repo_name, token=access_token)
         cloned_repo.pull()
+
+        if use_optimized_query:
+            yield "Optimizing query...", []
+            query = call_llm(
+                system_prompt=query_optimizer_system_prompt,
+                user_prompt=query_optimizer_user_prompt,
+                params={"query": query},
+                use_openai=True
+            ).strip().removeprefix("Search query:").strip()
+            yield f"Optimized query: {query}", []
+
         for message, snippets in prep_snippets.stream(
             cloned_repo, query, 
             use_multi_query=False,
             NUM_SNIPPETS_TO_KEEP=0,
             skip_analyze_agent=True
         ):
-            yield message, snippets
+            if use_optimized_query:
+                yield f"{message} (optimized query: {query})", snippets
+            else:
+                yield message, snippets
     logger.debug(f"Preparing snippets took {timer.time_elapsed} seconds")
     return snippets
 
@@ -525,7 +541,7 @@ def chat_codebase_stream(
             content=snippets_message,
             role="user"
         ),
-        *messages
+        *messages[:-1]
     ]
 
     def stream_state(
@@ -573,49 +589,63 @@ def stream_state(
                 if not token:
                     continue
                 result_string += token
+                if len(result_string) < 30:
+                    continue
                 current_string, *_ = result_string.split("<function_call>")
-                analysis = extract_xml_tag(current_string, "analysis", include_closing_tag=False) or ""
-                user_response = extract_xml_tag(current_string, "user_response", include_closing_tag=False) or ""
-                self_critique = extract_xml_tag(current_string, "self_critique", include_closing_tag=False)
-
-                current_messages = []
-
-                if analysis:
-                    current_messages.append(
-                        Message(
-                            content=analysis,
-                            role="function",
-                            function_call={
-                                "function_name": "analysis",
-                                "function_parameters": {},
-                                "is_complete": bool(user_response),
-                            }
+                if "<analysis>" in current_string:
+                    analysis = extract_xml_tag(current_string, "analysis", include_closing_tag=False) or ""
+                    user_response = extract_xml_tag(current_string, "user_response", include_closing_tag=False) or ""
+                    self_critique = extract_xml_tag(current_string, "self_critique", include_closing_tag=False)
+
+                    current_messages = []
+
+                    if analysis:
+                        current_messages.append(
+                            Message(
+                                content=analysis,
+                                role="function",
+                                function_call={
+                                    "function_name": "analysis",
+                                    "function_parameters": {},
+                                    "is_complete": bool(user_response),
+                                }
+                            )
                         )
-                    )
-
-                if user_response:
-                    current_messages.append(
-                        Message(
-                            content=user_response,
-                            role="assistant",
+
+                    if user_response:
+                        current_messages.append(
+                            Message(
+                                content=user_response,
+                                role="assistant",
+                            )
                         )
-                    )
-
-                if self_critique:
-                    current_messages.append(
+
+                    if self_critique:
+                        current_messages.append(
+                            Message(
+                                content=self_critique,
+                                role="function",
+                                function_call={
+                                    "function_name": "self_critique",
+                                    "function_parameters": {},
+                                }
+                            )
+                        )
+                    yield [
+                        *new_messages,
+                        *current_messages
+                    ]
+                else:
+                    current_messages = [
                         Message(
-                            content=self_critique,
-                            role="function",
-                            function_call={
-                                "function_name": "self_critique",
-                                "function_parameters": {},
-                            }
+                            content=result_string,
+                            role="assistant",
                         )
-                    )
-                yield [
-                    *new_messages,
-                    *current_messages
-                ]
+                    ]
+                    yield [
+                        *new_messages,
+                        *current_messages
+                    ]
 
             if current_messages[-1].role == "function":
                 current_messages[-1].function_call["is_complete"] = True
@@ -716,10 +746,6 @@ def stream_state(
         # breakpoint()
 
         # last_assistant_message = [message.content for message in new_messages if message.role == "assistant"][-1]
-        try:
-            save_messages_for_visualization(messages=new_messages, use_openai=use_openai)
-        except Exception as e:
-            logger.exception(f"Failed to save messages for visualization due to {e}")
 
         posthog.capture(metadata["username"], "chat_codebase complete", properties={
             **metadata,
@@ -752,10 +778,9 @@ def postprocessed_stream(*args, use_patch=False, **kwargs):
                 }
             ])
 
-    format_message = anthropic_format_message if not model.startswith("gpt") else openai_format_message
     return StreamingResponse(
         postprocessed_stream(
-            format_message,
+            messages[-1].content,
             snippets,
             messages,
             access_token,
@@ -832,6 +857,7 @@ def handle_function_call(function_call: AnthropicFunctionCall, repo_name: str, s
 async def autofix(
     repo_name: str = Body(...),
     code_suggestions: list[CodeSuggestion] = Body(...),
+    branch: str = Body(None),
     access_token: str = Depends(get_token_header)
 ):
     with Timer() as timer:
@@ -845,7 +871,8 @@ async def autofix(
     cloned_repo = ClonedRepo(
         repo_name,
         installation_id=installation_id,
-        token=access_token
+        token=access_token,
+        branch=branch
     )
 
     file_change_requests = []
@@ -872,7 +899,6 @@ def stream():
                 request="",
                 cloned_repo=cloned_repo,
                 relevant_filepaths=[code_suggestion.file_path for code_suggestion in code_suggestions],
-                fast=True,
             ):
                 yield json.dumps([stateful_code_suggestion.__dict__ for stateful_code_suggestion in stateful_code_suggestions])
         except Exception as e:

diff --git a/sweepai/chat/search_prompts.py b/sweepai/chat/search_prompts.py
@@ -145,15 +145,16 @@ def area(self):
 
 ### Format
 
-Use GitHub-styled markdown for your responses, using lists where applicable to improve clarity. You must respond with the following three distinct sections:
+You must respond with the following two distinct sections:
 
 # 1. Summary and analysis
+
 <analysis>
-First, list and summarize each file from the codebase provided that is relevant to the user's question. You may not need to summarize all provided files.
+1. List and summarize each file from the codebase provided that is relevant to the user's question. You may not need to summarize all provided files.
 
-Then, if the user's request is long, list all the requests made by the user.
+2. List all the requests made by the user.
 
-Then, determine if you have sufficient information to answer the user's question. If not, determine the information you need to answer the question completely by making `search_codebase` tool calls.
+3. Organize your response to the user into sections. Plan out reasonable headers so that your response is more digestable.
 </analysis>
 
 # 2. User Response
@@ -164,22 +165,7 @@ def area(self):
 When showing relevant examples of code, only show MINIMAL excerpts of code that address the user's question. Do NOT copy the whole file, but only the lines that are relevant to the user's question.
 
 When suggesting code changes, you add <code_change> blocks inside the <user_response></user_response> tags.
-</user_response>
-
-# 3. Self-Critique
-
-<self_critique>
-Then, self-critique your answer and validate that you have completely answered the user's question and addressed all their points. If the user's answer is extremely broad, you are done.
-
-Otherwise, if the user's question is specific, and asks to implement a feature or fix a bug, determine what additional information you need to answer the user's question. Specifically, validate that all interfaces are being used correctly based on the contents of the retrieved files -- if you cannot verify this, then you must find the relevant information such as the correct interface or schema to validate the usage. If you need to search the codebase for more information, such as for how a particular feature in the codebase works, use the `search_codebase` tool in the next section.
-</self_critique>
-
-# 4. Function Call (Optional)
-
-Then, make each a function call like so:
-<function_call>
-[the function call goes here, using the valid XML format for function calls]
-</function_call>"""
+</user_response>"""
 
 # improve these prompts
 anthropic_system_message = """You are a helpful assistant that will answer a user's questions about a codebase to resolve their issue. You are provided with a list of relevant code snippets from the codebase that you can refer to. You can use this information to help the user solve their issue. You may also make function calls to retrieve additional information from the codebase. 
@@ -284,12 +270,13 @@ def area(self):
 [additional sub tasks as needed]
 </subtasks>"""
 
-openai_system_message = """You are a helpful assistant that will answer a user's questions about a codebase to resolve their issue. You are provided with a list of relevant code snippets from the codebase that you can refer to. You can use this information to help the user solve their issue. You may also make function calls to retrieve additional information from the codebase. 
+openai_system_message = """You are a helpful assistant that will answer a user's questions about a codebase to resolve their issue. You are provided with a list of relevant code snippets from the codebase that you can refer to. You can use this information to help the user solve their issue.
 
 # Guidelines
 
-- When you are uncertain about details such as a type definition in the codebase, search the codebase to find the required information.
-- When showing relevant examples of code, only show MINIMAL excerpts of code that address the user's question. Do NOT copy the whole file, but only the lines that are relevant to the user's question.
+- Focus on providing high-quality explanations. Start with a high-level overview.
+- Only show code as supplementary evidence or to enhance the explanations. When doing so, only show MINIMAL excerpts of code that address the user's question. Do NOT copy the whole file, but only the lines that are relevant to the user's question.
+- Use markdown for your responses, using headers where applicable to improve clarity and lists to enumerate examples.
 - Wherever possible, you should suggest code changes. To do so, you must add <code_change> blocks to the <user_response> block. First, indicate whether you want to modify an existing file or create a new fil, then write in the following format:
 
 <code_change>
@@ -330,26 +317,7 @@ def area(self):
 </new_code>
 </code_change>
 
-Remember to that these <code_change> blocks must be contained within the <user_response></user_response> tags.
-
-In this environment, you have access to a code search tool to assist in fulfilling the user request:
-
-You MUST invoke the tool like this:
-<function_call>
-<search_codebase>
-<query>
-The search query.
-</query>
-</search_codebase>
-</function_call>
-
-<search_codebase>
-<query>
-Single, detailed, specific natural language search question to search the codebase for relevant snippets. This should be in the form of a natural language question, like "What is the structure of the User model in the authentication module?"
-</query>
-</search_codebase>
-
-""" + example_tool_calls + "\n\n" + openai_format_message
+""" + openai_format_message
 
 relevant_snippets_message = """# Codebase
 Repo: {repo_name}
@@ -412,3 +380,18 @@ def area(self):
 Here's the user's message:
 
 {user_message}"""
+
+query_optimizer_system_prompt = """Generate a search query for a hybrid search database to find relevant files in a codebase. The search query should match a relevant part of the code. Keep all file paths and entities exactly.
+
+Examples:
+Question: How can we optimize the database queries in the user profile page to improve load times? Are there any caching mechanisms we can leverage?
+Search query: user profile page database query
+Question: Are there any accessibility issues with the current form validation error messages in the signup flow? How can we ensure they are screen-reader friendly?
+Search query: signup form validation error message
+Question: What's the best way to implement real-time updates for the chat feature in our mobile app? Should we use WebSockets or long-polling?
+Search query: mobile app chat real-time updates WebSockets long-polling
+
+Just respond with the search query, nothing else."""
+
+query_optimizer_user_prompt = """Question: {query}"""
+