diff --git a/python/samples/concepts/README.md b/python/samples/concepts/README.md index 3c62b4156cf7..22f0496e43e6 100644 --- a/python/samples/concepts/README.md +++ b/python/samples/concepts/README.md @@ -10,6 +10,7 @@ - [Assistant Agent Retrieval](./agents/assistant_agent_retrieval.py) - [Assistant Agent Streaming](./agents/assistant_agent_streaming.py) - [Chat Completion Function Termination](./agents/chat_completion_function_termination.py) +- [Chat Completion History Reducer](./agents/chat_completion_history_reducer.py) - [Mixed Chat Agents](./agents/mixed_chat_agents.py) - [Mixed Chat Agents Plugins](./agents/mixed_chat_agents_plugins.py) - [Mixed Chat Files](./agents/mixed_chat_files.py) @@ -45,6 +46,9 @@ - [Simple Chatbot Store Metadata](./chat_completion/simple_chatbot_store_metadata.py) - [Simple Chatbot Streaming](./chat_completion/simple_chatbot_streaming.py) - [Simple Chatbot with Image](./chat_completion/simple_chatbot_with_image.py) +- [Simple Chatbot with Summary History Reducer Keeping Function Content](./chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py) +- [Simple Chatbot with Summary History Reducer](./chat_completion/simple_chatbot_with_summary_history_reducer.py) +- [Simple Chatbot with Truncation History Reducer](./chat_completion/simple_chatbot_with_truncation_history_reducer.py) ### ChatHistory - Using and serializing the [`ChatHistory`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/contents/chat_history.py) diff --git a/python/samples/concepts/agents/chat_completion_history_reducer.py b/python/samples/concepts/agents/chat_completion_history_reducer.py new file mode 100644 index 000000000000..1cdffefe7b78 --- /dev/null +++ b/python/samples/concepts/agents/chat_completion_history_reducer.py @@ -0,0 +1,298 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import logging +from typing import TYPE_CHECKING + +from semantic_kernel.agents import ( + AgentGroupChat, + ChatCompletionAgent, +) +from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion, OpenAIChatCompletion +from semantic_kernel.contents import AuthorRole, ChatHistory, ChatMessageContent +from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ChatHistorySummarizationReducer +from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer +from semantic_kernel.kernel import Kernel + +if TYPE_CHECKING: + from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer + +##################################################################### +# The following sample demonstrates how to implement a chat history # +# reducer as part of the Semantic Kernel Agent Framework. It # +# covers two types of reducers: summarization reduction and a # +# truncation reduction. For this sample, the ChatCompletionAgent # +# is used. # +##################################################################### + + +# Initialize the logger for debugging and information messages +logger = logging.getLogger(__name__) + +# Flag to determine whether to use Azure OpenAI services or OpenAI +# Set this to True if using Azure OpenAI (requires appropriate configuration) +use_azure_openai = True + + +# Helper function to create and configure a Kernel with the desired chat completion service +def _create_kernel_with_chat_completion(service_id: str) -> Kernel: + """A helper function to create a kernel with a chat completion service.""" + kernel = Kernel() + if use_azure_openai: + # Add Azure OpenAI service to the kernel + kernel.add_service(AzureChatCompletion(service_id=service_id)) + else: + # Add OpenAI service to the kernel + kernel.add_service(OpenAIChatCompletion(service_id=service_id)) + return kernel + + +class HistoryReducerExample: + """ + Demonstrates how to create a ChatCompletionAgent with a ChatHistoryReducer + (either truncation or summarization) and how to invoke that agent + multiple times while applying the history reduction. + """ + + # Agent-specific settings + TRANSLATOR_NAME = "NumeroTranslator" # Name of the agent + TRANSLATOR_INSTRUCTIONS = "Add one to the latest user number and spell it in Spanish without explanation." + + def create_truncating_agent( + self, reducer_msg_count: int, reducer_threshold: int + ) -> tuple[ChatCompletionAgent, "ChatHistoryReducer"]: + """ + Creates a ChatCompletionAgent with a truncation-based history reducer. + + Parameters: + - reducer_msg_count: Target number of messages to retain after truncation. + - reducer_threshold: Threshold number of messages to trigger truncation. + + Returns: + - A configured ChatCompletionAgent instance with truncation enabled. + """ + truncation_reducer = ChatHistoryTruncationReducer( + target_count=reducer_msg_count, threshold_count=reducer_threshold + ) + + return ChatCompletionAgent( + name=self.TRANSLATOR_NAME, + instructions=self.TRANSLATOR_INSTRUCTIONS, + kernel=_create_kernel_with_chat_completion("truncate_agent"), + history_reducer=truncation_reducer, + ), truncation_reducer + + def create_summarizing_agent( + self, reducer_msg_count: int, reducer_threshold: int + ) -> tuple[ChatCompletionAgent, "ChatHistoryReducer"]: + """ + Creates a ChatCompletionAgent with a summarization-based history reducer. + + Parameters: + - reducer_msg_count: Target number of messages to retain after summarization. + - reducer_threshold: Threshold number of messages to trigger summarization. + + Returns: + - A configured ChatCompletionAgent instance with summarization enabled. + """ + kernel = _create_kernel_with_chat_completion("summarize_agent") + + summarization_reducer = ChatHistorySummarizationReducer( + service=kernel.get_service(service_id="summarize_agent"), + target_count=reducer_msg_count, + threshold_count=reducer_threshold, + ) + + return ChatCompletionAgent( + name=self.TRANSLATOR_NAME, + instructions=self.TRANSLATOR_INSTRUCTIONS, + kernel=kernel, + history_reducer=summarization_reducer, + ), summarization_reducer + + async def invoke_agent(self, agent: ChatCompletionAgent, chat_history: ChatHistory, message_count: int): + """ + Demonstrates agent invocation with direct history management and reduction. + + Parameters: + - agent: The ChatCompletionAgent to invoke. + - message_count: The number of messages to simulate in the conversation. + """ + + index = 1 + while index <= message_count: + # Provide user input + user_message = ChatMessageContent(role=AuthorRole.USER, content=str(index)) + chat_history.messages.append(user_message) + print(f"# User: '{index}'") + + # Attempt history reduction if a reducer is present + is_reduced = False + if agent.history_reducer is not None: + reduced = await agent.history_reducer.reduce() + if reduced is not None: + chat_history.messages.clear() + chat_history.messages.extend(reduced) + is_reduced = True + print("@ (History was reduced!)") + + # Invoke the agent and display its response + async for response in agent.invoke(chat_history): + chat_history.messages.append(response) + print(f"# {response.role} - {response.name}: '{response.content}'") + + # The index is incremented by 2 because the agent is told to: + # "Add one to the latest user number and spell it in Spanish without explanation." + # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish) + index += 2 + print(f"@ Message Count: {len(chat_history.messages)}\n") + + # If history was reduced, and the chat history is of type `ChatHistorySummarizationReducer`, + # print summaries as it will contain the __summary__ metadata key. + if is_reduced and isinstance(chat_history, ChatHistorySummarizationReducer): + self._print_summaries_from_front(chat_history.messages) + + async def invoke_chat(self, agent: ChatCompletionAgent, message_count: int): + """ + Demonstrates agent invocation within a group chat. + + Parameters: + - agent: The ChatCompletionAgent to invoke. + - message_count: The number of messages to simulate in the conversation. + """ + chat = AgentGroupChat() # Initialize a new group chat + last_history_count = 0 + + index = 1 + while index <= message_count: + # Add user message to the chat + user_msg = ChatMessageContent(role=AuthorRole.USER, content=str(index)) + await chat.add_chat_message(user_msg) + print(f"# User: '{index}'") + + # Invoke the agent and display its response + async for message in chat.invoke(agent): + print(f"# {message.role} - {message.name or '*'}: '{message.content}'") + + # The index is incremented by 2 because the agent is told to: + # "Add one to the latest user number and spell it in Spanish without explanation." + # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish) + index += 2 + + # Retrieve chat messages in descending order (newest first) + msgs = [] + async for m in chat.get_chat_messages(agent): + msgs.append(m) + + print(f"@ Message Count: {len(msgs)}\n") + + # Check for reduction in message count and print summaries + if len(msgs) < last_history_count: + self._print_summaries_from_back(msgs) + + last_history_count = len(msgs) + + def _print_summaries_from_front(self, messages: list[ChatMessageContent]): + """ + Prints summaries from the front of the message list. + + Parameters: + - messages: List of chat messages to process. + """ + summary_index = 0 + while summary_index < len(messages): + msg = messages[summary_index] + if msg.metadata and msg.metadata.get("__summary__"): + print(f"\tSummary: {msg.content}") + summary_index += 1 + else: + break + + def _print_summaries_from_back(self, messages: list[ChatMessageContent]): + """ + Prints summaries from the back of the message list. + + Parameters: + - messages: List of chat messages to process. + """ + summary_index = len(messages) - 1 + while summary_index >= 0: + msg = messages[summary_index] + if msg.metadata and msg.metadata.get("__summary__"): + print(f"\tSummary: {msg.content}") + summary_index -= 1 + else: + break + + +# Main entry point for the script +async def main(): + # Initialize the example class + example = HistoryReducerExample() + + # Demonstrate truncation-based reduction + trunc_agent, history_reducer = example.create_truncating_agent( + # reducer_msg_count: + # Purpose: Defines the target number of messages to retain after applying truncation or summarization. + # What it controls: This parameter determines how much of the most recent conversation history + # is preserved while discarding or summarizing older messages. + # Why change it?: + # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history + # to maintain context. + # - Larger values: Use when retaining more conversational context is critical for accurate responses + # or maintaining a richer dialogue. + reducer_msg_count=10, + # reducer_threshold: + # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds + # reducer_msg_count by a small margin. + # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response) + # are not "orphaned" or lost during truncation or summarization. + # Why change it?: + # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older + # pairs of messages sooner. + # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation, + # especially for sensitive interactions like API function calls or complex responses. + reducer_threshold=10, + ) + # print("===TruncatedAgentReduction Demo===") + # await example.invoke_agent(trunc_agent, chat_history=history_reducer, message_count=50) + + # Demonstrate summarization-based reduction + sum_agent, history_reducer = example.create_summarizing_agent( + # Same configuration for summarization-based reduction + reducer_msg_count=10, # Target number of messages to retain + reducer_threshold=10, # Buffer to avoid premature reduction + ) + print("\n===SummarizedAgentReduction Demo===") + await example.invoke_agent(sum_agent, chat_history=history_reducer, message_count=50) + + # Demonstrate group chat with truncation + print("\n===TruncatedChatReduction Demo===") + trunc_agent.history_reducer.messages.clear() + await example.invoke_chat(trunc_agent, message_count=50) + + # Demonstrate group chat with summarization + print("\n===SummarizedChatReduction Demo===") + sum_agent.history_reducer.messages.clear() + await example.invoke_chat(sum_agent, message_count=50) + + +# Interaction between reducer_msg_count and reducer_threshold: +# The combination of these values determines when reduction occurs and how much history is kept. +# Example: +# If reducer_msg_count = 10 and reducer_threshold = 5, history will not be truncated until the total message count +# exceeds 15. This approach ensures flexibility in retaining conversational context while still adhering to memory +# constraints. + +# Recommendations: +# - Adjust for performance: Use a lower reducer_msg_count in environments with limited memory or when the assistant +# needs faster processing times. +# - Context sensitivity: Increase reducer_msg_count and reducer_threshold in use cases where maintaining continuity +# across multiple interactions is essential (e.g., multi-turn conversations or complex workflows). +# - Experiment: Start with the default values (10 and 10) and refine based on your application's behavior and the +# assistant's response quality. + + +# Execute the main function if the script is run directly +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py new file mode 100644 index 000000000000..338c76519b0e --- /dev/null +++ b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py @@ -0,0 +1,156 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio + +from samples.concepts.setup.chat_completion_services import ( + Services, + get_chat_completion_service_and_request_settings, +) +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistorySummarizationReducer +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +# This sample shows how to create a chatbot using a kernel function and leverage a chat history +# summarization reducer. +# This sample uses the following main components: +# - a ChatCompletionService: This component is responsible for generating responses to user messages. +# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history. +# A Chat History Reducer is a subclass of ChatHistory that provides additional +# functionality to reduce the history. +# - a KernelFunction: This function will be a prompt function, meaning the function is composed of +# a prompt and will be invoked by Semantic Kernel. +# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose. + +# [NOTE] +# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer. +# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly. + +# You can select from the following chat completion services: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# This is the system message that gives the chatbot its personality. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. +""" + +# Create a kernel and register a prompt function. +# The prompt here contains two variables: chat_history and user_input. +# They will be replaced by the kernel with the actual values when the function is invoked. +# [NOTE] +# The chat_history, which is a ChatHistory object, will be serialized to a string internally +# to create/render the final prompt. +# Since this sample uses a chat completion service, the prompt will be deserialized back to +# a ChatHistory object that gets passed to the chat completion service. This new chat history +# object will contain the original messages and the user input. +kernel = Kernel() +chat_function = kernel.add_function( + plugin_name="ChatBot", + function_name="Chat", + prompt="{{$chat_history}}{{$user_input}}", + template_format="semantic-kernel", + # You can attach the request settings to the function or + # pass the settings to the kernel.invoke method via the kernel arguments. + # If you specify the settings in both places, the settings in the kernel arguments will + # take precedence given the same service id. + # prompt_execution_settings=request_settings, +) + +# Invoking a kernel function requires a service, so we add the chat completion service to the kernel. +kernel.add_service(chat_completion_service) + +# The chat history reducer is responsible for summarizing the chat history. +# It's a subclass of ChatHistory that provides additional functionality to reduce the history. +# You may use it just like a regular ChatHistory object. +summarization_reducer = ChatHistorySummarizationReducer( + service=kernel.get_service(), + # target_count: + # Purpose: Defines the target number of messages to retain after applying summarization. + # What it controls: This parameter determines how much of the most recent conversation history + # is preserved while discarding or summarizing older messages. + # Why change it?: + # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history + # to maintain context. + # - Larger values: Use when retaining more conversational context is critical for accurate responses + # or maintaining a richer dialogue. + target_count=3, + # threshold_count: + # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds + # target_count by a small margin. + # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response) + # are not "orphaned" or lost during truncation or summarization. + # Why change it?: + # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older + # pairs of messages sooner. + # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation, + # especially for sensitive interactions like API function calls or complex responses. + threshold_count=2, +) + +summarization_reducer.add_system_message(system_message) + +kernel.add_plugin(plugin=TimePlugin(), plugin_name="TimePlugin") + +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto() + + +async def chat() -> bool: + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + return False + + if user_input == "exit": + print("\n\nExiting chat...") + return False + + await summarization_reducer.reduce() + + kernel_arguments = KernelArguments( + settings=request_settings, + chat_history=summarization_reducer, + user_input=user_input, + ) + answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments) + + if answer: + print(f"Mosscap:> {answer}") + summarization_reducer.add_user_message(user_input) + summarization_reducer.add_message(answer.value[0]) + + return True + + +async def main() -> None: + # Start the chat loop. The chat loop will continue until the user types "exit". + chatting = True + while chatting: + chatting = await chat() + + # Sample output: + # User:> Why is the sky blue in one sentence? + # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere, + # a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more + # prominent in our visual perception. + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py new file mode 100644 index 000000000000..b5d0eae75d24 --- /dev/null +++ b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py @@ -0,0 +1,200 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio + +from samples.concepts.setup.chat_completion_services import ( + Services, + get_chat_completion_service_and_request_settings, +) +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistorySummarizationReducer +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.contents.function_call_content import FunctionCallContent +from semantic_kernel.contents.function_result_content import FunctionResultContent +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +# This sample shows how to create a chatbot using a kernel function and leverage a chat history +# summarization reducer. +# This sample uses the following main components: +# - a ChatCompletionService: This component is responsible for generating responses to user messages. +# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history. +# A Chat History Reducer is a subclass of ChatHistory that provides additional +# functionality to reduce the history. +# - The Chat History Reducer configuration includes a flag `include_function_content_in_summary` that +# allows the reducer to include function call and result content in the summary. +# - a KernelFunction: This function will be a prompt function, meaning the function is composed of +# a prompt and will be invoked by Semantic Kernel. +# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose. + +# [NOTE] +# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer. +# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly. + +# You can select from the following chat completion services: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# This is the system message that gives the chatbot its personality. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. +""" + +# Create a kernel and register a prompt function. +# The prompt here contains two variables: chat_history and user_input. +# They will be replaced by the kernel with the actual values when the function is invoked. +# [NOTE] +# The chat_history, which is a ChatHistory object, will be serialized to a string internally +# to create/render the final prompt. +# Since this sample uses a chat completion service, the prompt will be deserialized back to +# a ChatHistory object that gets passed to the chat completion service. This new chat history +# object will contain the original messages and the user input. +kernel = Kernel() +chat_function = kernel.add_function( + plugin_name="ChatBot", + function_name="Chat", + prompt="{{$chat_history}}{{$user_input}}", + template_format="semantic-kernel", + # You can attach the request settings to the function or + # pass the settings to the kernel.invoke method via the kernel arguments. + # If you specify the settings in both places, the settings in the kernel arguments will + # take precedence given the same service id. + # prompt_execution_settings=request_settings, +) + +# Invoking a kernel function requires a service, so we add the chat completion service to the kernel. +kernel.add_service(chat_completion_service) + +# The chat history reducer is responsible for summarizing the chat history. +# It's a subclass of ChatHistory that provides additional functionality to reduce the history. +# You may use it just like a regular ChatHistory object. +summarization_reducer = ChatHistorySummarizationReducer( + service=kernel.get_service(), + # target_count: + # Purpose: Defines the target number of messages to retain after applying summarization. + # What it controls: This parameter determines how much of the most recent conversation history + # is preserved while discarding or summarizing older messages. + # Why change it?: + # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history + # to maintain context. + # - Larger values: Use when retaining more conversational context is critical for accurate responses + # or maintaining a richer dialogue. + target_count=3, + # threshold_count: + # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds + # target_count by a small margin. + # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response) + # are not "orphaned" or lost during truncation or summarization. + # Why change it?: + # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older + # pairs of messages sooner. + # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation, + # especially for sensitive interactions like API function calls or complex responses. + threshold_count=2, + include_function_content_in_summary=True, +) + +summarization_reducer.add_system_message(system_message) + +kernel.add_plugin(plugin=TimePlugin(), plugin_name="TimePlugin") + +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto() + + +# The following sets are used to hold on to FunctionCallContent and FunctionResultContent items +# that have been previously added to the chat history. +processed_fccs: set[FunctionCallContent] = set() +processed_frcs: set[FunctionResultContent] = set() + + +async def chat() -> bool: + global processed_fccs, processed_frcs + + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + return False + + if user_input == "exit": + print("\n\nExiting chat...") + return False + + await summarization_reducer.reduce() + + kernel_arguments = KernelArguments( + settings=request_settings, + chat_history=summarization_reducer, + user_input=user_input, + ) + answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments) + + if answer: + print(f"Mosscap:> {answer}") + summarization_reducer.add_user_message(user_input) + summarization_reducer.add_message(answer.value[0]) + + # Get the chat history from the FunctionResult's metadata + chat_history: ChatHistory = answer.metadata.get("messages") + if chat_history: + # Process the chat history to extract FunctionCallContent and FunctionResultContent items + # that we haven't previously added to the chat history + fcc: list[FunctionCallContent] = [] + frc: list[FunctionResultContent] = [] + for msg in chat_history.messages: + if msg.items: + for item in msg.items: + match item: + case FunctionCallContent(): + if item.id not in processed_fccs: + fcc.append(item) + case FunctionResultContent(): + if item.id not in processed_frcs: + frc.append(item) + + for i, item in enumerate(fcc): + summarization_reducer.add_assistant_message_list([item]) + processed_fccs.add(item.id) + # Safely check if there's a matching FunctionResultContent + if i < len(frc): + assert fcc[i].id == frc[i].id # nosec + summarization_reducer.add_tool_message_list([frc[i]]) + processed_frcs.add(item.id) + + # Since this example is showing how to include FunctionCallContent and FunctionResultContent + # in the summary, we need to add them to the chat history and also to the processed sets. + + return True + + +async def main() -> None: + # Start the chat loop. The chat loop will continue until the user types "exit". + chatting = True + while chatting: + chatting = await chat() + + # Sample output: + # User:> Why is the sky blue in one sentence? + # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere, + # a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more + # prominent in our visual perception. + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer.py b/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer.py new file mode 100644 index 000000000000..075cbb8620c0 --- /dev/null +++ b/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer.py @@ -0,0 +1,160 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio + +from samples.concepts.setup.chat_completion_services import ( + Services, + get_chat_completion_service_and_request_settings, +) +from semantic_kernel import Kernel +from semantic_kernel.contents import ChatHistoryTruncationReducer +from semantic_kernel.functions import KernelArguments + +# This sample shows how to create a chatbot using a kernel function and leverage a chat history +# truncation reducer. +# This sample uses the following two main components: +# - a ChatCompletionService: This component is responsible for generating responses to user messages. +# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history. +# A Chat History Reducer is a subclass of ChatHistory that provides additional +# functionality to reduce the history. +# - a KernelFunction: This function will be a prompt function, meaning the function is composed of +# a prompt and will be invoked by Semantic Kernel. +# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose. + +# [NOTE] +# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer. +# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly. + +# You can select from the following chat completion services: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# This is the system message that gives the chatbot its personality. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. +""" + +# Create a kernel and register a prompt function. +# The prompt here contains two variables: chat_history and user_input. +# They will be replaced by the kernel with the actual values when the function is invoked. +# [NOTE] +# The chat_history, which is a ChatHistory object, will be serialized to a string internally +# to create/render the final prompt. +# Since this sample uses a chat completion service, the prompt will be deserialized back to +# a ChatHistory object that gets passed to the chat completion service. This new chat history +# object will contain the original messages and the user input. +kernel = Kernel() +chat_function = kernel.add_function( + plugin_name="ChatBot", + function_name="Chat", + prompt="{{$chat_history}}{{$user_input}}", + template_format="semantic-kernel", + # You can attach the request settings to the function or + # pass the settings to the kernel.invoke method via the kernel arguments. + # If you specify the settings in both places, the settings in the kernel arguments will + # take precedence given the same service id. + # prompt_execution_settings=request_settings, +) + +# Invoking a kernel function requires a service, so we add the chat completion service to the kernel. +kernel.add_service(chat_completion_service) + +# The chat history reducer is responsible for truncating the chat history. +# It's a subclass of ChatHistory that provides additional functionality to reduce the history. +# You may use it just like a regular ChatHistory object. +truncation_reducer = ChatHistoryTruncationReducer( + service=kernel.get_service(), + # target_count: + # Purpose: Defines the target number of messages to retain after applying summarization. + # What it controls: This parameter determines how much of the most recent conversation history + # is preserved while discarding or summarizing older messages. + # Why change it?: + # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history + # to maintain context. + # - Larger values: Use when retaining more conversational context is critical for accurate responses + # or maintaining a richer dialogue. + target_count=3, + # threshold_count: + # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds + # target_count by a small margin. + # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response) + # are not "orphaned" or lost during truncation or summarization. + # Why change it?: + # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older + # pairs of messages sooner. + # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation, + # especially for sensitive interactions like API function calls or complex responses. + threshold_count=2, +) + +truncation_reducer.add_system_message(system_message) + + +async def chat() -> bool: + try: + user_input = input("User:> ") + except KeyboardInterrupt: + print("\n\nExiting chat...") + return False + except EOFError: + print("\n\nExiting chat...") + return False + + if user_input == "exit": + print("\n\nExiting chat...") + return False + + # Attempt to reduce before adding the user message to the chat history. + await truncation_reducer.reduce() + + # Get the chat message content from the chat completion service. + kernel_arguments = KernelArguments( + settings=request_settings, + # Use keyword arguments to pass the chat history and user input to the kernel function. + chat_history=truncation_reducer, + user_input=user_input, + ) + + answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments) + # Alternatively, you can invoke the function directly with the kernel as an argument: + # answer = await chat_function.invoke(kernel, kernel_arguments) + if answer: + print(f"Mosscap:> {answer}") + # Since the user_input is rendered by the template, it is not yet part of the chat history, so we add it here. + truncation_reducer.add_user_message(user_input) + # Add the chat message to the chat history to keep track of the conversation. + truncation_reducer.add_message(answer.value[0]) + + return True + + +async def main() -> None: + # Start the chat loop. The chat loop will continue until the user types "exit". + chatting = True + while chatting: + chatting = await chat() + + # Sample output: + # User:> Why is the sky blue in one sentence? + # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere, + # a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more + # prominent in our visual perception. + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py b/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py index 6adde925a390..c556b7e9820c 100644 --- a/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py +++ b/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py @@ -120,14 +120,13 @@ async def main(): chat_history.add_message(result) for item in result.items: - await chat._process_function_call( + await kernel.invoke_function_call( function_call=item, - kernel=kernel, chat_history=chat_history, arguments=KernelArguments(), function_call_count=1, request_index=0, - function_call_behavior=settings.function_choice_behavior, + function_behavior=settings.function_choice_behavior, ) diff --git a/python/samples/concepts/reasoning/simple_reasoning_function_calling.py b/python/samples/concepts/reasoning/simple_reasoning_function_calling.py index 238d69753f88..0da02adacefe 100644 --- a/python/samples/concepts/reasoning/simple_reasoning_function_calling.py +++ b/python/samples/concepts/reasoning/simple_reasoning_function_calling.py @@ -70,7 +70,9 @@ Note: Unsupported features may be added in future updates. """ -chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.OPENAI) +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings( + Services.OPENAI, instruction_role="developer" +) # This is the system message that gives the chatbot its personality. developer_message = """ diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py index 40dd127eda47..ee4d6d2dfa67 100644 --- a/python/samples/concepts/setup/chat_completion_services.py +++ b/python/samples/concepts/setup/chat_completion_services.py @@ -32,28 +32,50 @@ class Services(str, Enum): def get_chat_completion_service_and_request_settings( service_name: Services, + instruction_role: str | None = None, ) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]: - """Return service and request settings.""" + """Return service and request settings. + + Args: + service_name (Services): The service name. + instruction_role (str | None): The role to use for 'instruction' messages, for example, + 'system' or 'developer'. Defaults to 'system'. Currently only supported for OpenAI reasoning models. + """ + # Use lambdas or functions to delay instantiation chat_services = { - Services.OPENAI: get_openai_chat_completion_service_and_request_settings, - Services.AZURE_OPENAI: get_azure_openai_chat_completion_service_and_request_settings, - Services.AZURE_AI_INFERENCE: get_azure_ai_inference_chat_completion_service_and_request_settings, - Services.ANTHROPIC: get_anthropic_chat_completion_service_and_request_settings, - Services.BEDROCK: get_bedrock_chat_completion_service_and_request_settings, - Services.GOOGLE_AI: get_google_ai_chat_completion_service_and_request_settings, - Services.MISTRAL_AI: get_mistral_ai_chat_completion_service_and_request_settings, - Services.OLLAMA: get_ollama_chat_completion_service_and_request_settings, - Services.ONNX: get_onnx_chat_completion_service_and_request_settings, - Services.VERTEX_AI: get_vertex_ai_chat_completion_service_and_request_settings, + Services.OPENAI: lambda: get_openai_chat_completion_service_and_request_settings( + instruction_role=instruction_role + ), + Services.AZURE_OPENAI: lambda: get_azure_openai_chat_completion_service_and_request_settings( + instruction_role=instruction_role + ), + Services.AZURE_AI_INFERENCE: lambda: get_azure_ai_inference_chat_completion_service_and_request_settings( + instruction_role=instruction_role + ), + Services.ANTHROPIC: lambda: get_anthropic_chat_completion_service_and_request_settings(), + Services.BEDROCK: lambda: get_bedrock_chat_completion_service_and_request_settings(), + Services.GOOGLE_AI: lambda: get_google_ai_chat_completion_service_and_request_settings(), + Services.MISTRAL_AI: lambda: get_mistral_ai_chat_completion_service_and_request_settings(), + Services.OLLAMA: lambda: get_ollama_chat_completion_service_and_request_settings(), + Services.ONNX: lambda: get_onnx_chat_completion_service_and_request_settings(), + Services.VERTEX_AI: lambda: get_vertex_ai_chat_completion_service_and_request_settings(), } + + # Call the appropriate lambda or function based on the service name + if service_name not in chat_services: + raise ValueError(f"Unsupported service name: {service_name}") return chat_services[service_name]() -def get_openai_chat_completion_service_and_request_settings() -> tuple[ - "ChatCompletionClientBase", "PromptExecutionSettings" -]: +def get_openai_chat_completion_service_and_request_settings( + instruction_role: str | None = None, +) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]: """Return OpenAI chat completion service and request settings. + Args: + instruction_role (str | None): The role to use for 'instruction' messages, for example, + 'developer' or 'system'. (Optional) + The service credentials can be read by 3 ways: 1. Via the constructor 2. Via the environment variables @@ -70,7 +92,7 @@ def get_openai_chat_completion_service_and_request_settings() -> tuple[ OpenAIChatPromptExecutionSettings, ) - chat_service = OpenAIChatCompletion(service_id=service_id) + chat_service = OpenAIChatCompletion(service_id=service_id, instruction_role=instruction_role) request_settings = OpenAIChatPromptExecutionSettings( service_id=service_id, max_tokens=2000, temperature=0.7, top_p=0.8 ) @@ -78,11 +100,15 @@ def get_openai_chat_completion_service_and_request_settings() -> tuple[ return chat_service, request_settings -def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[ - "ChatCompletionClientBase", "PromptExecutionSettings" -]: +def get_azure_openai_chat_completion_service_and_request_settings( + instruction_role: str | None = None, +) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]: """Return Azure OpenAI chat completion service and request settings. + Args: + instruction_role (str | None): The role to use for 'instruction' messages, for example, + 'developer' or 'system'. (Optional) + The service credentials can be read by 3 ways: 1. Via the constructor 2. Via the environment variables @@ -99,15 +125,15 @@ def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[ AzureChatPromptExecutionSettings, ) - chat_service = AzureChatCompletion(service_id=service_id) + chat_service = AzureChatCompletion(service_id=service_id, instruction_role=instruction_role) request_settings = AzureChatPromptExecutionSettings(service_id=service_id) return chat_service, request_settings -def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tuple[ - "ChatCompletionClientBase", "PromptExecutionSettings" -]: +def get_azure_ai_inference_chat_completion_service_and_request_settings( + instruction_role: str | None = None, +) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]: """Return Azure AI Inference chat completion service and request settings. The service credentials can be read by 3 ways: @@ -129,6 +155,7 @@ def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tup chat_service = AzureAIInferenceChatCompletion( service_id=service_id, ai_model_id="id", # The model ID is simply an identifier as the model id cannot be obtained programmatically. + instruction_role=instruction_role, ) request_settings = AzureAIInferenceChatPromptExecutionSettings(service_id=service_id) diff --git a/python/semantic_kernel/agents/agent.py b/python/semantic_kernel/agents/agent.py index 71728feb8362..56cd115a7751 100644 --- a/python/semantic_kernel/agents/agent.py +++ b/python/semantic_kernel/agents/agent.py @@ -2,17 +2,21 @@ import uuid from collections.abc import Iterable -from typing import ClassVar +from typing import TYPE_CHECKING, ClassVar from pydantic import Field from semantic_kernel.agents.channels.agent_channel import AgentChannel +from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer from semantic_kernel.kernel import Kernel from semantic_kernel.kernel_pydantic import KernelBaseModel from semantic_kernel.utils.experimental_decorator import experimental_class from semantic_kernel.utils.naming import generate_random_ascii_name from semantic_kernel.utils.validation import AGENT_NAME_REGEX +if TYPE_CHECKING: + from semantic_kernel.contents.chat_history import ChatHistory + @experimental_class class Agent(KernelBaseModel): @@ -37,6 +41,22 @@ class Agent(KernelBaseModel): instructions: str | None = None kernel: Kernel = Field(default_factory=Kernel) channel_type: ClassVar[type[AgentChannel] | None] = None + history_reducer: ChatHistoryReducer | None = None + + async def reduce_history(self, history: "ChatHistory") -> bool: + """Perform the reduction on the provided history, returning True if reduction occurred.""" + if self.history_reducer is None: + return False + + self.history_reducer.messages = history.messages + + new_messages = await self.history_reducer.reduce() + if new_messages is not None: + history.messages.clear() + history.messages.extend(new_messages) + return True + + return False def get_channel_keys(self) -> Iterable[str]: """Get the channel keys. @@ -46,7 +66,11 @@ def get_channel_keys(self) -> Iterable[str]: """ if not self.channel_type: raise NotImplementedError("Unable to get channel keys. Channel type not configured.") - return [self.channel_type.__name__] + yield self.channel_type.__name__ + + if self.history_reducer is not None: + yield self.history_reducer.__class__.__name__ + yield str(self.history_reducer.__hash__) async def create_channel(self) -> AgentChannel: """Create a channel. diff --git a/python/semantic_kernel/agents/channels/chat_history_channel.py b/python/semantic_kernel/agents/channels/chat_history_channel.py index 563efeaef610..057c005b3d3d 100644 --- a/python/semantic_kernel/agents/channels/chat_history_channel.py +++ b/python/semantic_kernel/agents/channels/chat_history_channel.py @@ -64,6 +64,9 @@ async def invoke( f"Invalid channel binding for agent with id: `{id}` with name: ({type(agent).__name__})" ) + # pre-process history reduction + await agent.reduce_history(self) + message_count = len(self.messages) mutated_history = set() message_queue: Deque[ChatMessageContent] = deque() @@ -119,6 +122,9 @@ async def invoke_stream( f"Invalid channel binding for agent with id: `{id}` with name: ({type(agent).__name__})" ) + # pre-process history reduction + await agent.reduce_history(self) + message_count = len(self.messages) async for response_message in agent.invoke_stream(self): diff --git a/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py b/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py index 352787e81d8c..cbdb218ad616 100644 --- a/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py +++ b/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py @@ -12,6 +12,7 @@ from semantic_kernel.const import DEFAULT_SERVICE_NAME from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.exceptions import KernelServiceNotFoundError @@ -46,6 +47,7 @@ def __init__( description: str | None = None, instructions: str | None = None, execution_settings: PromptExecutionSettings | None = None, + history_reducer: ChatHistoryReducer | None = None, ) -> None: """Initialize a new instance of ChatCompletionAgent. @@ -59,6 +61,7 @@ def __init__( description: The description of the agent. (optional) instructions: The instructions for the agent. (optional) execution_settings: The execution settings for the agent. (optional) + history_reducer: The history reducer for the agent. (optional) """ if not service_id: service_id = DEFAULT_SERVICE_NAME @@ -75,6 +78,8 @@ def __init__( args["id"] = id if kernel is not None: args["kernel"] = kernel + if history_reducer is not None: + args["history_reducer"] = history_reducer super().__init__(**args) @trace_agent_invocation diff --git a/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py b/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py index 3879fab95aca..65f7dfb2ae0b 100644 --- a/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py +++ b/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py @@ -1,6 +1,13 @@ # Copyright (c) Microsoft. All rights reserved. import logging +import sys + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + from collections.abc import Callable from inspect import isawaitable from typing import TYPE_CHECKING, ClassVar @@ -9,6 +16,7 @@ from semantic_kernel.agents.strategies.selection.selection_strategy import SelectionStrategy from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer from semantic_kernel.exceptions.agent_exceptions import AgentExecutionException from semantic_kernel.functions.kernel_arguments import KernelArguments from semantic_kernel.functions.kernel_function import KernelFunction @@ -34,9 +42,11 @@ class KernelFunctionSelectionStrategy(SelectionStrategy): function: KernelFunction kernel: Kernel result_parser: Callable[..., str] = Field(default_factory=lambda: (lambda: "")) + history_reducer: ChatHistoryReducer | None = None - async def next(self, agents: list["Agent"], history: list[ChatMessageContent]) -> "Agent": - """Check if the agent should terminate. + @override + async def select_agent(self, agents: list["Agent"], history: list[ChatMessageContent]) -> "Agent": + """Select the next agent to interact with. Args: agents: The list of agents to select from. @@ -48,6 +58,12 @@ async def next(self, agents: list["Agent"], history: list[ChatMessageContent]) - Raises: AgentExecutionException: If the strategy fails to execute the function or select the next agent """ + if self.history_reducer is not None: + self.history_reducer.messages = history + reduced_history = await self.history_reducer.reduce() + if reduced_history is not None: + history = reduced_history.messages + original_arguments = self.arguments or KernelArguments() execution_settings = original_arguments.execution_settings or {} diff --git a/python/semantic_kernel/agents/strategies/selection/selection_strategy.py b/python/semantic_kernel/agents/strategies/selection/selection_strategy.py index cef5625432c9..6f453a50a876 100644 --- a/python/semantic_kernel/agents/strategies/selection/selection_strategy.py +++ b/python/semantic_kernel/agents/strategies/selection/selection_strategy.py @@ -1,22 +1,29 @@ # Copyright (c) Microsoft. All rights reserved. -from abc import ABC, abstractmethod +from abc import ABC from typing import TYPE_CHECKING +from semantic_kernel.agents import Agent +from semantic_kernel.exceptions.agent_exceptions import AgentExecutionException from semantic_kernel.kernel_pydantic import KernelBaseModel from semantic_kernel.utils.experimental_decorator import experimental_class if TYPE_CHECKING: - from semantic_kernel.agents import Agent from semantic_kernel.contents.chat_message_content import ChatMessageContent @experimental_class class SelectionStrategy(KernelBaseModel, ABC): - """Contract for an agent selection strategy.""" + """Base strategy class for selecting the next agent in a chat.""" - @abstractmethod - async def next(self, agents: list["Agent"], history: list["ChatMessageContent"]) -> "Agent": + has_selected: bool = False + initial_agent: Agent | None = None + + async def next( + self, + agents: list[Agent], + history: list["ChatMessageContent"], + ) -> Agent: """Select the next agent to interact with. Args: @@ -24,6 +31,27 @@ async def next(self, agents: list["Agent"], history: list["ChatMessageContent"]) history: The history of messages in the conversation. Returns: - The next agent to interact with. + The agent who takes the next turn. + """ + if not agents and self.initial_agent is None: + raise AgentExecutionException("Agent Failure - No agents present to select.") + + # If it's the first selection and we have an initial agent, use it + if not self.has_selected and self.initial_agent is not None: + agent = self.initial_agent + else: + agent = await self.select_agent(agents, history) + + self.has_selected = True + return agent + + async def select_agent( + self, + agents: list[Agent], + history: list["ChatMessageContent"], + ) -> Agent: + """Determines which agent goes next. Override for custom logic. + + By default, this fallback returns the first agent in the list. """ - ... + return agents[0] diff --git a/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py b/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py index 8304f405df7e..b60fc5f0f21f 100644 --- a/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py +++ b/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py @@ -1,5 +1,13 @@ # Copyright (c) Microsoft. All rights reserved. +import logging +import sys + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + from typing import TYPE_CHECKING from pydantic import PrivateAttr @@ -12,34 +20,61 @@ from semantic_kernel.contents.chat_message_content import ChatMessageContent +logger: logging.Logger = logging.getLogger(__name__) + + @experimental_class class SequentialSelectionStrategy(SelectionStrategy): - """A selection strategy that selects agents in a sequential order.""" + """Round-robin turn-taking strategy. Agent order is based on the order in which they joined.""" + + _index: int = PrivateAttr(default=-1) - _index: int = PrivateAttr(default=0) + def reset(self) -> None: + """Reset selection to the initial/first agent.""" + self._index = -1 - def reset(self): - """Reset the index.""" - self._index = 0 + def _increment_index(self, agent_count: int) -> None: + """Increment the index in a circular manner.""" + self._index = (self._index + 1) % agent_count - async def next(self, agents: list["Agent"], history: list["ChatMessageContent"]) -> "Agent": - """Select the next agent to interact with. + @override + async def select_agent( + self, + agents: list["Agent"], + history: list["ChatMessageContent"], + ) -> "Agent": + """Select the next agent in a round-robin fashion. Args: agents: The list of agents to select from. history: The history of messages in the conversation. Returns: - The next agent to interact with. + The agent who takes the next turn. """ - if len(agents) == 0: - raise ValueError("No agents to select from") - if self._index >= len(agents): - self.reset() + self._index = -1 - agent = agents[self._index] + if ( + self.has_selected + and self.initial_agent is not None + and len(agents) > 0 + and agents[0] == self.initial_agent + and self._index < 0 + ): + # Avoid selecting the same agent twice in a row + self._increment_index(len(agents)) - self._index = (self._index + 1) % len(agents) + # Main index increment + self._increment_index(len(agents)) + + # Pick the agent + agent = agents[self._index] + logger.info( + "Selected agent at index %d (ID: %s, name: %s)", + self._index, + agent.id, + agent.name, + ) return agent diff --git a/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py b/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py index f46cd79704ef..93c59e10ed84 100644 --- a/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py +++ b/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py @@ -9,6 +9,7 @@ from semantic_kernel.agents.strategies.termination.termination_strategy import TerminationStrategy from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer from semantic_kernel.functions.kernel_arguments import KernelArguments from semantic_kernel.functions.kernel_function import KernelFunction from semantic_kernel.kernel import Kernel @@ -33,6 +34,7 @@ class KernelFunctionTerminationStrategy(TerminationStrategy): function: KernelFunction kernel: Kernel result_parser: Callable[..., bool] = Field(default_factory=lambda: (lambda: True)) + history_reducer: ChatHistoryReducer | None = None async def should_agent_terminate( self, @@ -48,6 +50,12 @@ async def should_agent_terminate( Returns: True if the agent should terminate, False otherwise """ + if self.history_reducer is not None: + self.history_reducer.messages = history + reduced_history = await self.history_reducer.reduce() + if reduced_history is not None: + history = reduced_history.messages + original_arguments = self.arguments or KernelArguments() execution_settings = original_arguments.execution_settings or {} diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py index 8541fd0dc651..c18fcb30c732 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py @@ -5,7 +5,7 @@ from pydantic import Field, model_validator -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType +from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py index 87e967184234..f5baec134528 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py @@ -4,7 +4,7 @@ import logging import sys from collections.abc import AsyncGenerator, Callable -from typing import Any, ClassVar +from typing import TYPE_CHECKING, Any, ClassVar if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -32,7 +32,6 @@ ) from semantic_kernel.connectors.ai.anthropic.settings.anthropic_settings import AnthropicSettings from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents.chat_history import ChatHistory @@ -56,6 +55,9 @@ trace_streaming_chat_completion, ) +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + # map finish reasons from Anthropic to Semantic Kernel ANTHROPIC_TO_SEMANTIC_KERNEL_FINISH_REASON_MAP = { "end_turn": SemanticKernelFinishReason.STOP, @@ -136,7 +138,7 @@ def service_url(self) -> str | None: @override def _update_function_choice_settings_callback( self, - ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]: + ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]: return update_settings_from_function_call_configuration @override diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/utils.py b/python/semantic_kernel/connectors/ai/anthropic/services/utils.py index 31acecb0468f..e41905e1cc91 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/services/utils.py +++ b/python/semantic_kernel/connectors/ai/anthropic/services/utils.py @@ -3,11 +3,9 @@ import json import logging from collections.abc import Callable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.function_result_content import FunctionResultContent @@ -18,6 +16,11 @@ logger: logging.Logger = logging.getLogger(__name__) +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + + def _format_user_message(message: ChatMessageContent) -> dict[str, Any]: """Format a user message to the expected object for the Anthropic client. @@ -118,8 +121,8 @@ def _format_tool_message(message: ChatMessageContent) -> dict[str, Any]: def update_settings_from_function_call_configuration( - function_choice_configuration: FunctionCallChoiceConfiguration, - settings: PromptExecutionSettings, + function_choice_configuration: "FunctionCallChoiceConfiguration", + settings: "PromptExecutionSettings", type: FunctionChoiceType, ) -> None: """Update the settings from a FunctionChoiceConfiguration.""" diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py index 772ddb28e6c7..64e0806804e1 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py @@ -52,6 +52,7 @@ def __init__( env_file_path: str | None = None, env_file_encoding: str | None = None, client: ChatCompletionsClient | EmbeddingsClient | None = None, + instruction_role: str | None = None, **kwargs: Any, ) -> None: """Initialize the Azure AI Inference Chat Completion service. @@ -68,6 +69,7 @@ def __init__( env_file_path (str | None): The path to the environment file. (Optional) env_file_encoding (str | None): The encoding of the environment file. (Optional) client (ChatCompletionsClient | None): The Azure AI Inference client to use. (Optional) + instruction_role (str | None): The role to use for 'instruction' messages. (Optional) **kwargs: Additional keyword arguments. Raises: @@ -100,11 +102,16 @@ def __init__( user_agent=SEMANTIC_KERNEL_USER_AGENT, ) - super().__init__( - client=client, - managed_client=managed_client, + args: dict[str, Any] = { + "client": client, + "managed_client": managed_client, **kwargs, - ) + } + + if instruction_role: + args["instruction_role"] = instruction_role + + super().__init__(**args) def __del__(self) -> None: """Close the client when the object is deleted.""" diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py index 8ac10561f142..9a43591938e6 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py @@ -30,7 +30,6 @@ from semantic_kernel.connectors.ai.azure_ai_inference.services.utils import MESSAGE_CONVERTERS from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase from semantic_kernel.connectors.ai.completion_usage import CompletionUsage -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.contents.chat_history import ChatHistory @@ -46,6 +45,7 @@ from semantic_kernel.utils.experimental_decorator import experimental_class if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings logger: logging.Logger = logging.getLogger(__name__) @@ -66,6 +66,7 @@ def __init__( env_file_path: str | None = None, env_file_encoding: str | None = None, client: ChatCompletionsClient | None = None, + instruction_role: str | None = None, ) -> None: """Initialize the Azure AI Inference Chat Completion service. @@ -82,20 +83,29 @@ def __init__( env_file_path (str | None): The path to the environment file. (Optional) env_file_encoding (str | None): The encoding of the environment file. (Optional) client (ChatCompletionsClient | None): The Azure AI Inference client to use. (Optional) + instruction_role (str | None): The role to use for 'instruction' messages, for example, summarization + prompts could use `developer` or `system`. (Optional) Raises: ServiceInitializationError: If an error occurs during initialization. """ - super().__init__( - ai_model_id=ai_model_id, - service_id=service_id or ai_model_id, - client_type=AzureAIInferenceClientType.ChatCompletions, - api_key=api_key, - endpoint=endpoint, - env_file_path=env_file_path, - env_file_encoding=env_file_encoding, - client=client, - ) + args: dict[str, Any] = { + "ai_model_id": ai_model_id, + "api_key": api_key, + "client_type": AzureAIInferenceClientType.ChatCompletions, + "client": client, + "endpoint": endpoint, + "env_file_path": env_file_path, + "env_file_encoding": env_file_encoding, + } + + if service_id: + args["service_id"] = service_id + + if instruction_role: + args["instruction_role"] = instruction_role + + super().__init__(**args) # region Overriding base class methods @@ -179,7 +189,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") @override def _update_function_choice_settings_callback( self, - ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]: + ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]: return update_settings_from_function_call_configuration @override @@ -199,7 +209,13 @@ def _prepare_chat_history_for_request( chat_request_messages: list[ChatRequestMessage] = [] for message in chat_history.messages: - chat_request_messages.append(MESSAGE_CONVERTERS[message.role](message)) + # If instruction_role is 'developer' and the message role is 'system', change it to 'developer' + role = ( + AuthorRole.DEVELOPER + if self.instruction_role == "developer" and message.role == AuthorRole.SYSTEM + else message.role + ) + chat_request_messages.append(MESSAGE_CONVERTERS[role](message)) return chat_request_messages diff --git a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py index c163b6ffda74..5c4f3e6cd192 100644 --- a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py @@ -30,7 +30,6 @@ ) from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase from semantic_kernel.connectors.ai.completion_usage import CompletionUsage -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.contents.chat_message_content import ITEM_TYPES, ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent @@ -52,6 +51,7 @@ ) if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents.chat_history import ChatHistory @@ -160,7 +160,7 @@ async def _inner_get_streaming_chat_message_contents( @override def _update_function_choice_settings_callback( self, - ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]: + ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]: return update_settings_from_function_choice_configuration @override diff --git a/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py b/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py index 6274bdb01ffe..7607696559c5 100644 --- a/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py +++ b/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py @@ -4,12 +4,10 @@ import json from collections.abc import Callable, Mapping from functools import partial -from typing import Any +from typing import TYPE_CHECKING, Any from semantic_kernel.connectors.ai.bedrock.bedrock_prompt_execution_settings import BedrockChatPromptExecutionSettings -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.const import DEFAULT_FULLY_QUALIFIED_NAME_SEPARATOR from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent @@ -20,6 +18,10 @@ from semantic_kernel.contents.utils.finish_reason import FinishReason from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + async def run_in_executor(executor, func, *args, **kwargs): """Run a function in an executor.""" @@ -177,8 +179,8 @@ def format_bedrock_function_name_to_kernel_function_fully_qualified_name(bedrock def update_settings_from_function_choice_configuration( - function_choice_configuration: FunctionCallChoiceConfiguration, - settings: PromptExecutionSettings, + function_choice_configuration: "FunctionCallChoiceConfiguration", + settings: "PromptExecutionSettings", type: FunctionChoiceType, ) -> None: """Update the settings from a FunctionChoiceConfiguration.""" diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py index de9edf36c268..5c527e994564 100644 --- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py @@ -9,14 +9,9 @@ from typing import TYPE_CHECKING, Any, ClassVar from opentelemetry.trace import Span, Tracer, get_tracer, use_span +from pydantic import Field -from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration -from semantic_kernel.connectors.ai.function_calling_utils import ( - merge_function_results, - merge_streaming_function_results, -) -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.const import AUTO_FUNCTION_INVOCATION_SPAN_NAME from semantic_kernel.contents.annotation_content import AnnotationContent from semantic_kernel.contents.file_reference_content import FileReferenceContent @@ -26,6 +21,7 @@ from semantic_kernel.utils.telemetry.model_diagnostics.gen_ai_attributes import AVAILABLE_FUNCTIONS if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.chat_message_content import ChatMessageContent @@ -41,6 +37,7 @@ class ChatCompletionClientBase(AIServiceClientBase, ABC): # Connectors that support function calling should set this to True SUPPORTS_FUNCTION_CALLING: ClassVar[bool] = False + instruction_role: str = Field(default_factory=lambda: "system", description="The role for instructions.") # region Internal methods to be implemented by the derived classes @@ -102,6 +99,10 @@ async def get_chat_message_contents( Returns: A list of chat message contents representing the response(s) from the LLM. """ + from semantic_kernel.connectors.ai.function_calling_utils import ( + merge_function_results, + ) + # Create a copy of the settings to avoid modifying the original settings settings = copy.deepcopy(settings) # Later on, we already use the tools or equivalent settings, we cast here. @@ -111,15 +112,6 @@ async def get_chat_message_contents( if not self.SUPPORTS_FUNCTION_CALLING: return await self._inner_get_chat_message_contents(chat_history, settings) - # For backwards compatibility we need to convert the `FunctionCallBehavior` to `FunctionChoiceBehavior` - # if this method is called with a `FunctionCallBehavior` object as part of the settings - if hasattr(settings, "function_call_behavior") and isinstance( - settings.function_call_behavior, FunctionCallBehavior - ): - settings.function_choice_behavior = FunctionChoiceBehavior.from_function_call_behavior( - settings.function_call_behavior - ) - kernel: "Kernel" = kwargs.get("kernel") # type: ignore if settings.function_choice_behavior is not None: if kernel is None: @@ -217,6 +209,10 @@ async def get_streaming_chat_message_contents( Yields: A stream representing the response(s) from the LLM. """ + from semantic_kernel.connectors.ai.function_calling_utils import ( + merge_streaming_function_results, + ) + # Create a copy of the settings to avoid modifying the original settings settings = copy.deepcopy(settings) # Later on, we already use the tools or equivalent settings, we cast here. @@ -230,15 +226,6 @@ async def get_streaming_chat_message_contents( yield streaming_chat_message_contents return - # For backwards compatibility we need to convert the `FunctionCallBehavior` to `FunctionChoiceBehavior` - # if this method is called with a `FunctionCallBehavior` object as part of the settings - if hasattr(settings, "function_call_behavior") and isinstance( - settings.function_call_behavior, FunctionCallBehavior - ): - settings.function_choice_behavior = FunctionChoiceBehavior.from_function_call_behavior( - settings.function_call_behavior - ) - kernel: "Kernel" = kwargs.get("kernel") # type: ignore if settings.function_choice_behavior is not None: if kernel is None: @@ -397,7 +384,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") def _update_function_choice_settings_callback( self, - ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]: + ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]: """Return the callback function to update the settings from a function call configuration. Override this method to provide a custom callback function to diff --git a/python/semantic_kernel/connectors/ai/function_call_behavior.py b/python/semantic_kernel/connectors/ai/function_call_behavior.py deleted file mode 100644 index 913df72f7853..000000000000 --- a/python/semantic_kernel/connectors/ai/function_call_behavior.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -from collections.abc import Callable -from typing import TYPE_CHECKING, Literal - -from pydantic.dataclasses import dataclass -from typing_extensions import deprecated - -from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata -from semantic_kernel.kernel_pydantic import KernelBaseModel - -if TYPE_CHECKING: - from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings - from semantic_kernel.kernel import Kernel - -DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS = 5 - - -@dataclass -class FunctionCallConfiguration: - """Class that holds the configured functions for function calling.""" - - available_functions: list["KernelFunctionMetadata"] | None = None - required_functions: list["KernelFunctionMetadata"] | None = None - - -@deprecated("The `FunctionCallBehavior` class is deprecated; use `FunctionChoiceBehavior` instead.", category=None) -class FunctionCallBehavior(KernelBaseModel): - """Class that controls function calling behavior. - - DEPRECATED: This class has been replaced by FunctionChoiceBehavior. - - Args: - enable_kernel_functions (bool): Enable kernel functions. - max_auto_invoke_attempts (int): The maximum number of auto invoke attempts. - - Attributes: - enable_kernel_functions (bool): Enable kernel functions. - max_auto_invoke_attempts (int): The maximum number of auto invoke attempts. - - Properties: - auto_invoke_kernel_functions: Check if the kernel functions should be auto-invoked. - Determined as max_auto_invoke_attempts > 0. - - Methods: - configure: Configures the settings for the function call behavior, - the default version in this class, does nothing, use subclasses for different behaviors. - - Class methods: - AutoInvokeKernelFunctions: Returns KernelFunctions class with auto_invoke enabled, all functions. - EnableKernelFunctions: Returns KernelFunctions class with auto_invoke disabled, all functions. - EnableFunctions: Set the enable kernel functions flag, filtered functions, auto_invoke optional. - RequiredFunction: Set the required function flag, auto_invoke optional. - - """ - - enable_kernel_functions: bool = True - max_auto_invoke_attempts: int = DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS - - @property - def auto_invoke_kernel_functions(self): - """Check if the kernel functions should be auto-invoked.""" - return self.max_auto_invoke_attempts > 0 - - @auto_invoke_kernel_functions.setter - def auto_invoke_kernel_functions(self, value: bool): - """Set the auto_invoke_kernel_functions flag.""" - if not value: - self.max_auto_invoke_attempts = 0 - else: - if self.max_auto_invoke_attempts == 0: - self.max_auto_invoke_attempts = DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS - - def configure( - self, - kernel: "Kernel", - update_settings_callback: Callable[..., None], - settings: "PromptExecutionSettings", - ) -> None: - """Configures the settings for the function call behavior. - - Using the base ToolCallBehavior means that you manually have to set tool_choice and tools. - - For different behaviors, use the subclasses of ToolCallBehavior: - KernelFunctions (all functions in the Kernel) - EnabledFunctions (filtered set of functions from the Kernel) - RequiredFunction (a single function) - - By default, the update_settings_callback is called with FunctionCallConfiguration, - which contains a list of available functions or a list of required functions, it also - takes the PromptExecutionSettings object. - - It should update the prompt execution settings with the available functions or required functions. - - Alternatively you can override this class and add your own logic in the configure method. - """ - return - - @classmethod - @deprecated("Use the `FunctionChoiceBehavior` `Auto` class instead.") - def AutoInvokeKernelFunctions(cls) -> "KernelFunctions": - """Returns KernelFunctions class with auto_invoke enabled.""" - return KernelFunctions(max_auto_invoke_attempts=DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS) - - @classmethod - @deprecated("Use the `FunctionChoiceBehavior` `Auto` class method instead.") - def EnableKernelFunctions(cls) -> "KernelFunctions": - """Returns KernelFunctions class with auto_invoke disabled. - - Function calls are enabled in this case, just not invoked. - """ - return KernelFunctions(max_auto_invoke_attempts=0) - - @classmethod - @deprecated("Use the `FunctionChoiceBehavior` `Auto` class method instead.") - def EnableFunctions( - cls, - auto_invoke: bool = False, - *, - filters: dict[ - Literal["excluded_plugins", "included_plugins", "excluded_functions", "included_functions"], list[str] - ] - | None = {}, - ) -> "EnabledFunctions": - """Set the enable kernel functions flag.""" - return EnabledFunctions( - filters=filters, max_auto_invoke_attempts=DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS if auto_invoke else 0 - ) - - @classmethod - @deprecated("Use the `FunctionChoiceBehavior` `Required` class method instead.") - def RequiredFunction( - cls, - auto_invoke: bool = False, - *, - function_fully_qualified_name: str, - ) -> "RequiredFunction": - """Set the required function flag.""" - return RequiredFunction( - function_fully_qualified_name=function_fully_qualified_name, - max_auto_invoke_attempts=1 if auto_invoke else 0, - ) - - -@deprecated("Use the `FunctionChoiceBehavior` `Auto` class instead.") -class KernelFunctions(FunctionCallBehavior): - """Function call behavior for making all kernel functions available for tool calls.""" - - def configure( - self, - kernel: "Kernel", - update_settings_callback: Callable[..., None], - settings: "PromptExecutionSettings", - ) -> None: - """Set the options for the tool call behavior in the settings.""" - if self.enable_kernel_functions: - update_settings_callback( - FunctionCallConfiguration(available_functions=kernel.get_full_list_of_function_metadata()), settings - ) - - -@deprecated("Use the `FunctionChoiceBehavior` `Auto` class instead.") -class EnabledFunctions(FunctionCallBehavior): - """Function call behavior for making a filtered set of functions available for tool calls.""" - - filters: dict[ - Literal["excluded_plugins", "included_plugins", "excluded_functions", "included_functions"], list[str] - ] - - def configure( - self, - kernel: "Kernel", - update_settings_callback: Callable[..., None], - settings: "PromptExecutionSettings", - ) -> None: - """Set the options for the tool call behavior in the settings.""" - if self.enable_kernel_functions: - update_settings_callback( - FunctionCallConfiguration(available_functions=kernel.get_list_of_function_metadata(self.filters)), - settings, - ) - - -@deprecated("Use the `FunctionChoiceBehavior` `Required` class instead.") -class RequiredFunction(FunctionCallBehavior): - """Function call behavior for making a single function available for tool calls.""" - - function_fully_qualified_name: str - - def configure( - self, - kernel: "Kernel", - update_settings_callback: Callable[..., None], - settings: "PromptExecutionSettings", - ) -> None: - """Set the options for the tool call behavior in the settings.""" - if not self.enable_kernel_functions: - return - # since using this always calls this single function, we do not want to allow repeated calls - if self.max_auto_invoke_attempts > 1: - self.max_auto_invoke_attempts = 1 - update_settings_callback( - FunctionCallConfiguration( - required_functions=kernel.get_list_of_function_metadata({ - "included_functions": [self.function_fully_qualified_name] - }) - ), - settings, - ) diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py index c7ab3dba6b39..7a5c2950c4e0 100644 --- a/python/semantic_kernel/connectors/ai/function_calling_utils.py +++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py @@ -3,9 +3,6 @@ from collections import OrderedDict from typing import TYPE_CHECKING, Any -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_result_content import FunctionResultContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError @@ -15,6 +12,8 @@ FunctionChoiceType, ) from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + from semantic_kernel.contents.chat_message_content import ChatMessageContent + from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata @@ -80,13 +79,16 @@ def _combine_filter_dicts(*dicts: dict[str, list[str]]) -> dict: def merge_function_results( - messages: list[ChatMessageContent], -) -> list[ChatMessageContent]: + messages: list["ChatMessageContent"], +) -> list["ChatMessageContent"]: """Combine multiple function result content types to one chat message content type. This method combines the FunctionResultContent items from separate ChatMessageContent messages, and is used in the event that the `context.terminate = True` condition is met. """ + from semantic_kernel.contents.chat_message_content import ChatMessageContent + from semantic_kernel.contents.function_result_content import FunctionResultContent + items: list[Any] = [] for message in messages: items.extend([item for item in message.items if isinstance(item, FunctionResultContent)]) @@ -99,10 +101,10 @@ def merge_function_results( def merge_streaming_function_results( - messages: list[ChatMessageContent | StreamingChatMessageContent], + messages: list["ChatMessageContent | StreamingChatMessageContent"], ai_model_id: str, function_invoke_attempt: int, -) -> list[StreamingChatMessageContent]: +) -> list["StreamingChatMessageContent"]: """Combine multiple streaming function result content types to one streaming chat message content type. This method combines the FunctionResultContent items from separate StreamingChatMessageContent messages, @@ -116,6 +118,9 @@ def merge_streaming_function_results( Returns: The combined streaming chat message content type. """ + from semantic_kernel.contents.function_result_content import FunctionResultContent + from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent + items: list[Any] = [] for message in messages: items.extend([item for item in message.items if isinstance(item, FunctionResultContent)]) diff --git a/python/semantic_kernel/connectors/ai/function_choice_behavior.py b/python/semantic_kernel/connectors/ai/function_choice_behavior.py index 759274d632f2..f32a57e26952 100644 --- a/python/semantic_kernel/connectors/ai/function_choice_behavior.py +++ b/python/semantic_kernel/connectors/ai/function_choice_behavior.py @@ -2,18 +2,14 @@ import logging from collections.abc import Callable -from enum import Enum from typing import TYPE_CHECKING, Literal, TypeVar -from typing_extensions import deprecated - -from semantic_kernel.connectors.ai.function_calling_utils import _combine_filter_dicts +from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError from semantic_kernel.kernel_pydantic import KernelBaseModel from semantic_kernel.utils.experimental_decorator import experimental_class if TYPE_CHECKING: - from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.kernel import Kernel @@ -27,15 +23,6 @@ _T = TypeVar("_T", bound="FunctionChoiceBehavior") -@experimental_class -class FunctionChoiceType(Enum): - """The type of function choice behavior.""" - - AUTO = "auto" - NONE = "none" - REQUIRED = "required" - - @experimental_class class FunctionChoiceBehavior(KernelBaseModel): """Class that controls function choice behavior. @@ -75,31 +62,6 @@ class FunctionChoiceBehavior(KernelBaseModel): ) = None type_: FunctionChoiceType | None = None - @classmethod - @deprecated("The `FunctionCallBehavior` class is deprecated; use `FunctionChoiceBehavior` instead.") - def from_function_call_behavior(cls: type[_T], behavior: "FunctionCallBehavior") -> _T: - """Create a FunctionChoiceBehavior from a FunctionCallBehavior.""" - from semantic_kernel.connectors.ai.function_call_behavior import ( - EnabledFunctions, - KernelFunctions, - RequiredFunction, - ) - - if isinstance(behavior, (EnabledFunctions, KernelFunctions)): - return cls.Auto( - auto_invoke=behavior.auto_invoke_kernel_functions, - filters=behavior.filters if hasattr(behavior, "filters") else None, - ) - if isinstance(behavior, (RequiredFunction)): - return cls.Required( - auto_invoke=behavior.auto_invoke_kernel_functions, - filters={"included_functions": [behavior.function_fully_qualified_name]}, - ) - return cls( - enable_kernel_functions=behavior.enable_kernel_functions, - maximum_auto_invoke_attempts=behavior.max_auto_invoke_attempts, - ) - @property def auto_invoke_kernel_functions(self): """Return True if auto_invoke_kernel_functions is enabled.""" @@ -218,6 +180,8 @@ def Required( @classmethod def from_dict(cls: type[_T], data: dict) -> _T: """Create a FunctionChoiceBehavior from a dictionary.""" + from semantic_kernel.connectors.ai.function_calling_utils import _combine_filter_dicts + type_map = { "auto": cls.Auto, "none": cls.NoneInvoke, diff --git a/python/semantic_kernel/connectors/ai/function_choice_type.py b/python/semantic_kernel/connectors/ai/function_choice_type.py new file mode 100644 index 000000000000..d4bc2b3a598f --- /dev/null +++ b/python/semantic_kernel/connectors/ai/function_choice_type.py @@ -0,0 +1,14 @@ +# Copyright (c) Microsoft. All rights reserved. + +from enum import Enum + +from semantic_kernel.utils.experimental_decorator import experimental_class + + +@experimental_class +class FunctionChoiceType(Enum): + """The type of function choice behavior.""" + + AUTO = "auto" + NONE = "none" + REQUIRED = "required" diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py index df8f64cf4c6c..b7005c3c1f5d 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py @@ -5,6 +5,11 @@ from collections.abc import AsyncGenerator, Callable from typing import TYPE_CHECKING, Any, ClassVar +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + import google.generativeai as genai from google.generativeai import GenerativeModel from google.generativeai.protos import Candidate, Content @@ -13,7 +18,6 @@ from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase from semantic_kernel.connectors.ai.completion_usage import CompletionUsage -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.connectors.ai.google.google_ai.google_ai_prompt_execution_settings import ( GoogleAIChatPromptExecutionSettings, @@ -50,13 +54,9 @@ ) if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings -if sys.version_info >= (3, 12): - from typing import override # pragma: no cover -else: - from typing_extensions import override # pragma: no cover - logger: logging.Logger = logging.getLogger(__name__) @@ -186,7 +186,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") @override def _update_function_choice_settings_callback( self, - ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]: + ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]: return update_settings_from_function_choice_configuration @override diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py index 6086d0167694..77c88526eedb 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py @@ -2,11 +2,10 @@ import json import logging -from typing import Any +from typing import TYPE_CHECKING, Any from google.generativeai.protos import Blob, Candidate, FunctionCall, FunctionResponse, Part -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.connectors.ai.google.google_ai.google_ai_prompt_execution_settings import ( GoogleAIChatPromptExecutionSettings, @@ -15,7 +14,6 @@ FUNCTION_CHOICE_TYPE_TO_GOOGLE_FUNCTION_CALLING_MODE, GEMINI_FUNCTION_NAME_SEPARATOR, ) -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.function_result_content import FunctionResultContent @@ -25,6 +23,10 @@ from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + logger: logging.Logger = logging.getLogger(__name__) @@ -148,8 +150,8 @@ def kernel_function_metadata_to_google_ai_function_call_format(metadata: KernelF def update_settings_from_function_choice_configuration( - function_choice_configuration: FunctionCallChoiceConfiguration, - settings: PromptExecutionSettings, + function_choice_configuration: "FunctionCallChoiceConfiguration", + settings: "PromptExecutionSettings", type: FunctionChoiceType, ) -> None: """Update the settings from a FunctionChoiceConfiguration.""" diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py index 18f5b2feb6ca..f3211066d466 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py @@ -2,12 +2,12 @@ import json import logging +from typing import TYPE_CHECKING from google.cloud.aiplatform_v1beta1.types.content import Blob, Candidate, Part from google.cloud.aiplatform_v1beta1.types.tool import FunctionCall, FunctionResponse from vertexai.generative_models import FunctionDeclaration, Tool, ToolConfig -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.connectors.ai.google.shared_utils import ( FUNCTION_CHOICE_TYPE_TO_GOOGLE_FUNCTION_CALLING_MODE, @@ -16,7 +16,6 @@ from semantic_kernel.connectors.ai.google.vertex_ai.vertex_ai_prompt_execution_settings import ( VertexAIChatPromptExecutionSettings, ) -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.function_result_content import FunctionResultContent @@ -26,6 +25,10 @@ from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + logger: logging.Logger = logging.getLogger(__name__) @@ -149,8 +152,8 @@ def kernel_function_metadata_to_vertex_ai_function_call_format(metadata: KernelF def update_settings_from_function_choice_configuration( - function_choice_configuration: FunctionCallChoiceConfiguration, - settings: PromptExecutionSettings, + function_choice_configuration: "FunctionCallChoiceConfiguration", + settings: "PromptExecutionSettings", type: FunctionChoiceType, ) -> None: """Update the settings from a FunctionChoiceConfiguration.""" diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py index 6372c71c5b1c..bd7c1346accf 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py @@ -2,7 +2,12 @@ import sys from collections.abc import AsyncGenerator, AsyncIterable, Callable -from typing import Any, ClassVar +from typing import TYPE_CHECKING, Any, ClassVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover import vertexai from google.cloud.aiplatform_v1beta1.types.content import Content @@ -11,7 +16,6 @@ from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase from semantic_kernel.connectors.ai.completion_usage import CompletionUsage -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.connectors.ai.google.shared_utils import ( filter_system_message, @@ -29,7 +33,6 @@ VertexAIChatPromptExecutionSettings, ) from semantic_kernel.connectors.ai.google.vertex_ai.vertex_ai_settings import VertexAISettings -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.chat_message_content import ITEM_TYPES, ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent @@ -48,10 +51,9 @@ trace_streaming_chat_completion, ) -if sys.version_info >= (3, 12): - from typing import override # pragma: no cover -else: - from typing_extensions import override # pragma: no cover +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings class VertexAIChatCompletion(VertexAIBase, ChatCompletionClientBase): @@ -181,7 +183,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") @override def _update_function_choice_settings_callback( self, - ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]: + ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]: return update_settings_from_function_choice_configuration @override diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py index b374235225a4..2405897a6c39 100644 --- a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py @@ -3,7 +3,7 @@ import logging import sys from collections.abc import AsyncGenerator, Callable -from typing import Any, ClassVar +from typing import TYPE_CHECKING, Any, ClassVar if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -24,7 +24,6 @@ from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase from semantic_kernel.connectors.ai.completion_usage import CompletionUsage -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.connectors.ai.mistral_ai.prompt_execution_settings.mistral_ai_prompt_execution_settings import ( @@ -32,7 +31,6 @@ ) from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_base import MistralAIBase from semantic_kernel.connectors.ai.mistral_ai.settings.mistral_ai_settings import MistralAISettings -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents import ( ChatMessageContent, FunctionCallContent, @@ -50,6 +48,10 @@ trace_streaming_chat_completion, ) +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + logger: logging.Logger = logging.getLogger(__name__) @@ -315,7 +317,7 @@ def update_settings_from_function_call_configuration_mistral( @override def _update_function_choice_settings_callback( self, - ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]: + ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]: return self.update_settings_from_function_call_configuration_mistral @override diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py index baf2d04f2914..103133af2c9f 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py @@ -17,7 +17,6 @@ from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase from semantic_kernel.connectors.ai.completion_usage import CompletionUsage -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import OllamaChatPromptExecutionSettings from semantic_kernel.connectors.ai.ollama.ollama_settings import OllamaSettings @@ -45,6 +44,7 @@ ) if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings CMC_TYPE = TypeVar("CMC_TYPE", bound=ChatMessageContent) @@ -137,7 +137,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") @override def _update_function_choice_settings_callback( self, - ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]: + ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]: return update_settings_from_function_choice_configuration @override diff --git a/python/semantic_kernel/connectors/ai/ollama/services/utils.py b/python/semantic_kernel/connectors/ai/ollama/services/utils.py index 7cf0e6e225cb..9745a4063484 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/utils.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/utils.py @@ -2,19 +2,22 @@ import json from collections.abc import Callable, Mapping +from typing import TYPE_CHECKING from ollama._types import Message -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.function_result_content import FunctionResultContent from semantic_kernel.contents.image_content import ImageContent from semantic_kernel.contents.utils.author_role import AuthorRole +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + def _format_system_message(message: ChatMessageContent) -> Message: """Format a system message to the expected object for the client. @@ -110,8 +113,8 @@ def _format_tool_message(message: ChatMessageContent) -> Message: def update_settings_from_function_choice_configuration( - function_choice_configuration: FunctionCallChoiceConfiguration, - settings: PromptExecutionSettings, + function_choice_configuration: "FunctionCallChoiceConfiguration", + settings: "PromptExecutionSettings", type: FunctionChoiceType, ) -> None: """Update the settings from a FunctionChoiceConfiguration. diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py index 425639d6a291..d2b37d44bb40 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py @@ -1,17 +1,10 @@ # Copyright (c) Microsoft. All rights reserved. import logging -import sys from typing import Annotated, Any, Literal -if sys.version_info >= (3, 11): - from typing import Self # pragma: no cover -else: - from typing_extensions import Self # pragma: no cover - from pydantic import BaseModel, Field, field_validator, model_validator -from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError @@ -73,7 +66,6 @@ class OpenAIChatPromptExecutionSettings(OpenAIPromptExecutionSettings): messages: Annotated[ list[dict[str, Any]] | None, Field(description="Do not set this manually. It is set by the service.") ] = None - function_call_behavior: Annotated[FunctionCallBehavior | None, Field(exclude=True)] = None parallel_tool_calls: bool | None = True tools: Annotated[ list[dict[str, Any]] | None, @@ -153,32 +145,6 @@ def validate_response_format_and_set_flag(cls, values: Any) -> Any: return values - @model_validator(mode="before") - @classmethod - def validate_function_calling_behaviors(cls, data: Any) -> Any: - """Check if function_call_behavior is set and if so, move to use function_choice_behavior instead.""" - # In an attempt to phase out the use of `function_call_behavior` in favor of `function_choice_behavior`, - # we are syncing the `function_call_behavior` with `function_choice_behavior` if the former is set. - # This allows us to make decisions off of `function_choice_behavior`. Anytime the `function_call_behavior` - # is updated, this validation will run to ensure the `function_choice_behavior` stays in sync. - from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior - - if isinstance(data, dict) and "function_call_behavior" in data.get("extension_data", {}): - data["function_choice_behavior"] = FunctionChoiceBehavior.from_function_call_behavior( - data.get("extension_data", {}).get("function_call_behavior") - ) - return data - - @field_validator("function_call_behavior", mode="after") - @classmethod - def check_for_function_call_behavior(cls, v) -> Self: - """Check if function_choice_behavior is set, if not, set it to default.""" - if v is not None: - logger.warning( - "The `function_call_behavior` parameter is deprecated. Please use the `function_choice_behavior` parameter instead." # noqa: E501 - ) - return v - class OpenAIEmbeddingPromptExecutionSettings(PromptExecutionSettings): """Specific settings for the text embedding endpoint.""" diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py index 03289fd45d58..2549f9027961 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py @@ -54,6 +54,7 @@ def __init__( async_client: AsyncAzureOpenAI | None = None, env_file_path: str | None = None, env_file_encoding: str | None = None, + instruction_role: str | None = None, ) -> None: """Initialize an AzureChatCompletion service. @@ -77,6 +78,8 @@ def __init__( async_client (AsyncAzureOpenAI | None): An existing client to use. (Optional) env_file_path (str | None): Use the environment settings file as a fallback to using env vars. env_file_encoding (str | None): The encoding of the environment settings file, defaults to 'utf-8'. + instruction_role (str | None): The role to use for 'instruction' messages, for example, summarization + prompts could use `developer` or `system`. (Optional) """ try: azure_openai_settings = AzureOpenAISettings.create( @@ -108,6 +111,7 @@ def __init__( default_headers=default_headers, ai_model_type=OpenAIModelTypes.CHAT, client=async_client, + instruction_role=instruction_role, ) @classmethod diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py index 93662af62579..da50e4ee56b6 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py @@ -36,6 +36,7 @@ def __init__( token_endpoint: str | None = None, default_headers: Mapping[str, str] | None = None, client: AsyncAzureOpenAI | None = None, + instruction_role: str | None = None, ) -> None: """Internal class for configuring a connection to an Azure OpenAI service. @@ -56,6 +57,8 @@ def __init__( token_endpoint (str): Azure AD token endpoint use to get the token. (Optional) default_headers (Union[Mapping[str, str], None]): Default headers for HTTP requests. (Optional) client (AsyncAzureOpenAI): An existing client to use. (Optional) + instruction_role (str | None): The role to use for 'instruction' messages, for example, summarization + prompts could use `developer` or `system`. (Optional) """ # Merge APP_INFO into the headers if it exists @@ -95,6 +98,8 @@ def __init__( } if service_id: args["service_id"] = service_id + if instruction_role: + args["instruction_role"] = instruction_role super().__init__(**args) def to_dict(self) -> dict[str, str]: diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py index c643f11859a7..6d59561377ba 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py @@ -30,6 +30,7 @@ def __init__( async_client: AsyncOpenAI | None = None, env_file_path: str | None = None, env_file_encoding: str | None = None, + instruction_role: str | None = None, ) -> None: """Initialize an OpenAIChatCompletion service. @@ -47,6 +48,7 @@ def __init__( env_file_path (str | None): Use the environment settings file as a fallback to environment variables. (Optional) env_file_encoding (str | None): The encoding of the environment settings file. (Optional) + instruction_role (str | None): The role to use for 'instruction' messages, for example, """ try: openai_settings = OpenAISettings.create( @@ -72,6 +74,7 @@ def __init__( ai_model_type=OpenAIModelTypes.CHAT, default_headers=default_headers, client=async_client, + instruction_role=instruction_role, ) @classmethod diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py index 0c1e843c5d47..605b78812ae5 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py @@ -19,17 +19,16 @@ from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase from semantic_kernel.connectors.ai.completion_usage import CompletionUsage -from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior -from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( OpenAIChatPromptExecutionSettings, ) from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIHandler -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.contents.annotation_content import AnnotationContent from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.file_reference_content import FileReferenceContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent from semantic_kernel.contents.streaming_text_content import StreamingTextContent @@ -46,6 +45,7 @@ ) if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.functions.kernel_arguments import KernelArguments from semantic_kernel.kernel import Kernel @@ -150,7 +150,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") @override def _update_function_choice_settings_callback( self, - ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]: + ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]: return update_settings_from_function_call_configuration @override @@ -267,6 +267,41 @@ def _get_function_call_from_chat_choice(self, choice: Choice | ChunkChoice) -> l # When you enable asynchronous content filtering in Azure OpenAI, you may receive empty deltas return [] + def _prepare_chat_history_for_request( + self, + chat_history: "ChatHistory", + role_key: str = "role", + content_key: str = "content", + ) -> Any: + """Prepare the chat history for a request. + + Allowing customization of the key names for role/author, and optionally overriding the role. + + ChatRole.TOOL messages need to be formatted different than system/user/assistant messages: + They require a "tool_call_id" and (function) "name" key, and the "metadata" key should + be removed. The "encoding" key should also be removed. + + Override this method to customize the formatting of the chat history for a request. + + Args: + chat_history (ChatHistory): The chat history to prepare. + role_key (str): The key name for the role/author. + content_key (str): The key name for the content/message. + + Returns: + prepared_chat_history (Any): The prepared chat history for a request. + """ + return [ + { + **message.to_dict(role_key=role_key, content_key=content_key), + role_key: "developer" + if self.instruction_role == "developer" and message.to_dict(role_key=role_key)[role_key] == "system" + else message.to_dict(role_key=role_key)[role_key], + } + for message in chat_history.messages + if not isinstance(message, (AnnotationContent, FileReferenceContent)) + ] + # endregion # region function calling @@ -279,15 +314,9 @@ async def _process_function_call( arguments: "KernelArguments | None", function_call_count: int, request_index: int, - function_call_behavior: FunctionChoiceBehavior | FunctionCallBehavior, + function_call_behavior: FunctionChoiceBehavior, ) -> "AutoFunctionInvocationContext | None": """Processes the tool calls in the result and update the chat history.""" - # deprecated and might not even be used anymore, hard to trigger directly - if isinstance(function_call_behavior, FunctionCallBehavior): # pragma: no cover - # We need to still support a `FunctionCallBehavior` input so it doesn't break current - # customers. Map from `FunctionCallBehavior` -> `FunctionChoiceBehavior` - function_call_behavior = FunctionChoiceBehavior.from_function_call_behavior(function_call_behavior) - return await kernel.invoke_function_call( function_call=function_call, chat_history=chat_history, diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py index 7ead64865445..d3d72795665b 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py @@ -29,6 +29,7 @@ def __init__( service_id: str | None = None, default_headers: Mapping[str, str] | None = None, client: AsyncOpenAI | None = None, + instruction_role: str | None = None, ) -> None: """Initialize a client for OpenAI services. @@ -48,6 +49,8 @@ def __init__( default_headers (Mapping[str, str]): Default headers for HTTP requests. (Optional) client (AsyncOpenAI): An existing OpenAI client, optional. + instruction_role (str): The role to use for 'instruction' + messages, for example, summarization prompts could use `developer` or `system`. (Optional) """ # Merge APP_INFO into the headers if it exists @@ -71,6 +74,8 @@ def __init__( } if service_id: args["service_id"] = service_id + if instruction_role: + args["instruction_role"] = instruction_role super().__init__(**args) def to_dict(self) -> dict[str, str]: diff --git a/python/semantic_kernel/contents/__init__.py b/python/semantic_kernel/contents/__init__.py index 352a5915cc68..c326115ccd86 100644 --- a/python/semantic_kernel/contents/__init__.py +++ b/python/semantic_kernel/contents/__init__.py @@ -6,6 +6,8 @@ from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.function_result_content import FunctionResultContent +from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ChatHistorySummarizationReducer +from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer from semantic_kernel.contents.image_content import ImageContent from semantic_kernel.contents.streaming_annotation_content import StreamingAnnotationContent from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent @@ -20,6 +22,8 @@ "AudioContent", "AuthorRole", "ChatHistory", + "ChatHistorySummarizationReducer", + "ChatHistoryTruncationReducer", "ChatMessageContent", "FinishReason", "FunctionCallContent", diff --git a/python/semantic_kernel/contents/function_call_content.py b/python/semantic_kernel/contents/function_call_content.py index 08b9c9e19757..7067311f4c8a 100644 --- a/python/semantic_kernel/contents/function_call_content.py +++ b/python/semantic_kernel/contents/function_call_content.py @@ -221,4 +221,13 @@ def to_dict(self) -> dict[str, str | Any]: def __hash__(self) -> int: """Return the hash of the function call content.""" - return hash((self.tag, self.id, self.index, self.name, self.function_name, self.plugin_name, self.arguments)) + args_hashable = frozenset(self.arguments.items()) if isinstance(self.arguments, Mapping) else None + return hash(( + self.tag, + self.id, + self.index, + self.name, + self.function_name, + self.plugin_name, + args_hashable, + )) diff --git a/python/semantic_kernel/contents/history_reducer/__init__.py b/python/semantic_kernel/contents/history_reducer/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py new file mode 100644 index 000000000000..bc05c705ceda --- /dev/null +++ b/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py @@ -0,0 +1,31 @@ +# Copyright (c) Microsoft. All rights reserved. + +import sys +from abc import ABC, abstractmethod + +if sys.version < "3.11": + from typing_extensions import Self # pragma: no cover +else: + from typing import Self # type: ignore # pragma: no cover + +from pydantic import Field + +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.utils.experimental_decorator import experimental_class + + +@experimental_class +class ChatHistoryReducer(ChatHistory, ABC): + """Defines a contract for reducing chat history.""" + + target_count: int = Field(..., gt=0, description="Target message count.") + threshold_count: int = Field(0, ge=0, description="Threshold count to avoid orphaning messages.") + + @abstractmethod + async def reduce(self) -> Self | None: + """Reduce the chat history in some way (e.g., truncate, summarize). + + Returns: + A possibly shorter list of messages, or None if no change is needed. + """ + ... diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py b/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py new file mode 100644 index 000000000000..6742c0b56816 --- /dev/null +++ b/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py @@ -0,0 +1,211 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +from collections.abc import Callable + +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.function_call_content import FunctionCallContent +from semantic_kernel.contents.function_result_content import FunctionResultContent +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.utils.experimental_decorator import experimental_function + +logger = logging.getLogger(__name__) + + +SUMMARY_METADATA_KEY = "__summary__" + + +@experimental_function +def get_call_result_pairs(history: list[ChatMessageContent]) -> list[tuple[int, int]]: + """Identify all (FunctionCallContent, FunctionResultContent) pairs in the history. + + Return a list of (call_index, result_index) pairs for safe referencing. + """ + pairs: list[tuple[int, int]] = [] # Correct type: list of tuples with integers + call_ids_seen: dict[str, int] = {} # Map call IDs (str) to their indices (int) + + # Gather all function-call IDs and their indices. + for i, msg in enumerate(history): + for item in msg.items: + if isinstance(item, FunctionCallContent) and item.id is not None: + call_ids_seen[item.id] = i + + # Now, match each FunctionResultContent to the earliest call ID with the same ID. + for j, msg in enumerate(history): + for item in msg.items: + if isinstance(item, FunctionResultContent) and item.id is not None: + call_id = item.id + if call_id in call_ids_seen: + call_index = call_ids_seen[call_id] + pairs.append((call_index, j)) + # Remove the call ID so we don't match it a second time + del call_ids_seen[call_id] + break + + return pairs + + +@experimental_function +def locate_summarization_boundary(history: list[ChatMessageContent]) -> int: + """Identify the index of the first message that is not a summary message. + + This is indicated by the presence of the SUMMARY_METADATA_KEY in the message metadata. + + Returns: + The insertion point index for normal history messages (i.e., after all summary messages). + """ + for idx, msg in enumerate(history): + if not msg.metadata or SUMMARY_METADATA_KEY not in msg.metadata: + return idx + return len(history) + + +@experimental_function +def locate_safe_reduction_index( + history: list[ChatMessageContent], + target_count: int, + threshold_count: int = 0, + offset_count: int = 0, +) -> int | None: + """Identify the index of the first message at or beyond the specified target_count. + + This index does not orphan sensitive content (function calls/results). + + This method ensures that the presence of a function-call always follows with its result, + so the function-call and its function-result are never separated. + + In addition, it attempts to locate a user message within the threshold window so that + context with the subsequent assistant response is preserved. + + Args: + history: The entire chat history. + target_count: The desired message count after reduction. + threshold_count: The threshold beyond target_count required to trigger reduction. + If total messages <= (target_count + threshold_count), no reduction occurs. + offset_count: Optional number of messages to skip at the start (e.g. existing summary messages). + + Returns: + The index that identifies the starting point for a reduced history that does not orphan + sensitive content. Returns None if reduction is not needed. + """ + total_count = len(history) + threshold_index = total_count - (threshold_count or 0) - target_count + if threshold_index <= offset_count: + return None + + message_index = total_count - target_count + + # Move backward to avoid cutting function calls / results + while message_index >= offset_count: + if not any( + isinstance(item, (FunctionCallContent, FunctionResultContent)) for item in history[message_index].items + ): + break + message_index -= 1 + + # This is our initial target truncation index + target_index = message_index + + # Attempt to see if there's a user message in the threshold window + while message_index >= threshold_index: + if history[message_index].role == AuthorRole.USER: + return message_index + message_index -= 1 + + return target_index + + +@experimental_function +def extract_range( + history: list[ChatMessageContent], + start: int, + end: int | None = None, + filter_func: Callable[[ChatMessageContent], bool] | None = None, + preserve_pairs: bool = False, +) -> list[ChatMessageContent]: + """Extract a range of messages from the source history, skipping any message for which we do not want to keep. + + For example, function calls/results, if desired. + + Args: + history: The source history. + start: The index of the first message to extract (inclusive). + end: The index of the last message to extract (exclusive). If None, extracts through end. + filter_func: A function that takes a ChatMessageContent and returns True if the message should + be skipped, False otherwise. + preserve_pairs: If True, ensures that function call and result pairs are either both kept or both skipped. + + Returns: + A list of extracted messages. + """ + if end is None: + end = len(history) + + sliced = list(range(start, end)) + + # If we need to preserve call->result pairs, gather them + pair_map = {} + if preserve_pairs: + pairs = get_call_result_pairs(history) + # store in a dict for quick membership checking + # call_idx -> result_idx, and also result_idx -> call_idx + for cidx, ridx in pairs: + pair_map[cidx] = ridx + pair_map[ridx] = cidx + + extracted: list[ChatMessageContent] = [] + i = 0 + while i < len(sliced): + idx = sliced[i] + msg = history[idx] + + # If filter_func excludes it, skip it + if filter_func and filter_func(msg): + i += 1 + continue + + # If preserve_pairs is on, and there's a paired index, skip or include them both + if preserve_pairs and idx in pair_map: + paired_idx = pair_map[idx] + # If the pair is within [start, end), we must keep or skip them together + if start <= paired_idx < end: + # Check if the pair or itself fails filter_func + if filter_func and (filter_func(history[paired_idx]) or filter_func(msg)): + # skip both + i += 1 + # Also skip the paired index if it's in our current slice + if paired_idx in sliced: + # remove it from the slice so we don't process it again + sliced.remove(paired_idx) + continue + # keep both + extracted.append(msg) + if paired_idx > idx: + # We'll skip the pair in the normal iteration by removing from slice + # but add it to extracted right now + extracted.append(history[paired_idx]) + if paired_idx in sliced: + sliced.remove(paired_idx) + else: + # if paired_idx < idx, it might appear later, so skip for now + # but we may have already processed it if i was the 2nd item + # either way, do not add duplicates + pass + i += 1 + continue + # If the paired_idx is outside [start, end), there's no conflict + # so we can just do normal logic + extracted.append(msg) + i += 1 + else: + # keep it if filter_func not triggered + extracted.append(msg) + i += 1 + + return extracted + + +@experimental_function +def contains_function_call_or_result(msg: ChatMessageContent) -> bool: + """Return True if the message has any function call or function result.""" + return any(isinstance(item, (FunctionCallContent, FunctionResultContent)) for item in msg.items) diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py new file mode 100644 index 000000000000..1feaf1a839ad --- /dev/null +++ b/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py @@ -0,0 +1,226 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +import sys +from typing import Any + +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.utils.experimental_decorator import experimental_class + +if sys.version < "3.11": + from typing_extensions import Self # pragma: no cover +else: + from typing import Self # type: ignore # pragma: no cover + +from pydantic import Field + +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.const import DEFAULT_SERVICE_NAME +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer +from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import ( + SUMMARY_METADATA_KEY, + contains_function_call_or_result, + extract_range, + locate_safe_reduction_index, + locate_summarization_boundary, +) +from semantic_kernel.exceptions.content_exceptions import ChatHistoryReducerException + +logger = logging.getLogger(__name__) + +DEFAULT_SUMMARIZATION_PROMPT = """ +Provide a concise and complete summarization of the entire dialog that does not exceed 5 sentences. + +This summary must always: +- Consider both user and assistant interactions +- Maintain continuity for the purpose of further dialog +- Include details from any existing summary +- Focus on the most significant aspects of the dialog + +This summary must never: +- Critique, correct, interpret, presume, or assume +- Identify faults, mistakes, misunderstanding, or correctness +- Analyze what has not occurred +- Exclude details from any existing summary +""" + + +@experimental_class +class ChatHistorySummarizationReducer(ChatHistoryReducer): + """A ChatHistory with logic to summarize older messages past a target count.""" + + service: ChatCompletionClientBase + summarization_instructions: str = Field( + default_factory=lambda: DEFAULT_SUMMARIZATION_PROMPT, + description="The summarization instructions.", + ) + use_single_summary: bool = Field(True, description="Whether to use a single summary message.") + fail_on_error: bool = Field(True, description="Raise error if summarization fails.") + service_id: str = Field( + default_factory=lambda: DEFAULT_SERVICE_NAME, description="The ID of the chat completion service." + ) + include_function_content_in_summary: bool = Field( + False, description="Whether to include function calls/results in the summary." + ) + execution_settings: PromptExecutionSettings | None = None + + def __init__( + self, + service: ChatCompletionClientBase, + target_count: int, + service_id: str | None = None, + threshold_count: int | None = None, + summarization_instructions: str | None = None, + use_single_summary: bool | None = None, + fail_on_error: bool | None = None, + include_function_content_in_summary: bool | None = None, + execution_settings: PromptExecutionSettings | None = None, + **kwargs: Any, + ): + """Initialize the ChatHistorySummarizationReducer. + + Args: + service (ChatCompletionClientBase): The chat completion service. + target_count (int): The target number of messages to retain after applying summarization. + service_id (str | None): The ID of the chat completion service. + threshold_count (int | None): The threshold beyond target_count required to trigger reduction. + summarization_instructions (str | None): The summarization instructions. + use_single_summary (bool | None): Whether to use a single summary message. + fail_on_error (bool | None): Raise error if summarization fails. + include_function_content_in_summary (bool | None): Whether to include function calls/results in the summary. + execution_settings (PromptExecutionSettings | None): The prompt execution settings. + **kwargs (Any): Additional keyword arguments. + """ + args: dict[str, Any] = { + "service": service, + "target_count": target_count, + } + if service_id is not None: + args["service_id"] = service_id + if threshold_count is not None: + args["threshold_count"] = threshold_count + if summarization_instructions is not None: + args["summarization_instructions"] = summarization_instructions + if use_single_summary is not None: + args["use_single_summary"] = use_single_summary + if fail_on_error is not None: + args["fail_on_error"] = fail_on_error + if include_function_content_in_summary is not None: + args["include_function_content_in_summary"] = include_function_content_in_summary + if execution_settings is not None: + args["execution_settings"] = execution_settings + + super().__init__(**args, **kwargs) + + async def reduce(self) -> Self | None: + """Summarize the older messages past the target message count.""" + history = self.messages + if len(history) <= self.target_count + (self.threshold_count or 0): + return None # No summarization needed + + logger.info("Performing chat history summarization check...") + + # 1. Identify where existing summary messages end + insertion_point = locate_summarization_boundary(history) + if insertion_point == len(history): + # fallback fix: force boundary to something reasonable + logger.warning("All messages are summaries, forcing boundary to 0.") + insertion_point = 0 + + # 2. Locate the safe reduction index + truncation_index = locate_safe_reduction_index( + history, + self.target_count, + self.threshold_count, + offset_count=insertion_point, + ) + if truncation_index is None: + logger.info("No valid truncation index found.") + return None + + # 3. Extract only the chunk of messages that need summarizing + # If include_function_content_in_summary=False, skip function calls/results + # Otherwise, keep them but never split pairs. + messages_to_summarize = extract_range( + history, + start=0 if self.use_single_summary else insertion_point, + end=truncation_index, + filter_func=(contains_function_call_or_result if not self.include_function_content_in_summary else None), + preserve_pairs=self.include_function_content_in_summary, + ) + + if not messages_to_summarize: + logger.info("No messages to summarize.") + return None + + try: + # 4. Summarize the extracted messages + summary_msg = await self._summarize(messages_to_summarize) + logger.info("Chat History Summarization completed.") + if not summary_msg: + return None + + # Mark the newly-created summary with metadata + summary_msg.metadata[SUMMARY_METADATA_KEY] = True + + # 5. Reassemble the new history + keep_existing_summaries = [] + if insertion_point > 0 and not self.use_single_summary: + keep_existing_summaries = history[:insertion_point] + + remainder = history[truncation_index:] + new_history = [*keep_existing_summaries, summary_msg, *remainder] + self.messages = new_history + + return self + + except Exception as ex: + logger.warning("Summarization failed, continuing without summary.") + if self.fail_on_error: + raise ChatHistoryReducerException("Chat History Summarization failed.") from ex + return None + + async def _summarize(self, messages: list[ChatMessageContent]) -> ChatMessageContent | None: + """Use the ChatCompletion service to generate a single summary message.""" + from semantic_kernel.contents.utils.author_role import AuthorRole + + chat_history = ChatHistory(messages=messages) + + role = ( + getattr(self.execution_settings, "instruction_role", AuthorRole.SYSTEM) + if self.execution_settings + else AuthorRole.SYSTEM + ) + + chat_history.add_message(ChatMessageContent(role=role, content=self.summarization_instructions)) + + execution_settings = self.execution_settings or self.service.get_prompt_execution_settings_class()( + service_id=self.service_id + ) + + return await self.service.get_chat_message_content(chat_history=chat_history, settings=execution_settings) + + def __eq__(self, other: object) -> bool: + """Check if two ChatHistorySummarizationReducer objects are equal.""" + if not isinstance(other, ChatHistorySummarizationReducer): + return False + return ( + self.threshold_count == other.threshold_count + and self.target_count == other.target_count + and self.use_single_summary == other.use_single_summary + and self.summarization_instructions == other.summarization_instructions + ) + + def __hash__(self) -> int: + """Hash the object based on its properties.""" + return hash(( + self.__class__.__name__, + self.threshold_count, + self.target_count, + self.summarization_instructions, + self.use_single_summary, + self.fail_on_error, + self.include_function_content_in_summary, + )) diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py new file mode 100644 index 000000000000..4faf28876748 --- /dev/null +++ b/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py @@ -0,0 +1,83 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +import sys +from typing import Any + +from semantic_kernel.utils.experimental_decorator import experimental_class + +if sys.version < "3.11": + from typing_extensions import Self # pragma: no cover +else: + from typing import Self # type: ignore # pragma: no cover + +from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer +from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import ( + extract_range, + locate_safe_reduction_index, +) + +logger = logging.getLogger(__name__) + + +@experimental_class +class ChatHistoryTruncationReducer(ChatHistoryReducer): + """A ChatHistory that supports truncation logic. + + Because this class inherits from ChatHistoryReducer (which in turn inherits from ChatHistory), + it can also be used anywhere a ChatHistory is expected, while adding truncation capability. + """ + + def __init__(self, target_count: int, threshold_count: int | None = None, **kwargs: Any): + """Initialize the truncation reducer.""" + args: dict[str, Any] = { + "target_count": target_count, + } + if threshold_count is not None: + args["threshold_count"] = threshold_count + super().__init__(**args, **kwargs) + + async def reduce(self) -> Self | None: + """Truncate the chat history to the target message count, avoiding orphaned calls. + + Returns: + The truncated list of messages if truncation occurred, or None otherwise. + """ + history = self.messages + if len(history) <= self.target_count + (self.threshold_count or 0): + # No need to reduce + return None + + logger.info("Performing chat history truncation check...") + + truncation_index = locate_safe_reduction_index(history, self.target_count, self.threshold_count) + if truncation_index is None: + logger.info( + f"No truncation index found. Target count: {self.target_count}, Threshold: {self.threshold_count}" + ) + return None + + logger.info(f"Truncating history to {truncation_index} messages.") + truncated_list = extract_range(history, start=truncation_index) + self.messages = truncated_list + return self + + def __eq__(self, other: object) -> bool: + """Compare equality based on truncation settings. + + (We don't factor in the actual ChatHistory messages themselves.) + + Returns: + True if the other object is a ChatHistoryTruncationReducer with the same truncation settings. + """ + if not isinstance(other, ChatHistoryTruncationReducer): + return False + return self.threshold_count == other.threshold_count and self.target_count == other.target_count + + def __hash__(self) -> int: + """Return a hash code based on truncation settings. + + Returns: + A hash code based on the truncation settings. + """ + return hash((self.__class__.__name__, self.threshold_count, self.target_count)) diff --git a/python/semantic_kernel/exceptions/agent_exceptions.py b/python/semantic_kernel/exceptions/agent_exceptions.py index 1c6b5bb897cf..0f13d6ddd368 100644 --- a/python/semantic_kernel/exceptions/agent_exceptions.py +++ b/python/semantic_kernel/exceptions/agent_exceptions.py @@ -38,3 +38,9 @@ class AgentChatException(AgentException): """An error occurred while invoking the agent chat.""" pass + + +class AgentChatHistoryReducerException(AgentException): + """An error occurred while reducing the chat history.""" + + pass diff --git a/python/semantic_kernel/exceptions/content_exceptions.py b/python/semantic_kernel/exceptions/content_exceptions.py index d9c3f5aa10c5..4ad619951f4f 100644 --- a/python/semantic_kernel/exceptions/content_exceptions.py +++ b/python/semantic_kernel/exceptions/content_exceptions.py @@ -39,7 +39,14 @@ class FunctionCallInvalidArgumentsException(ContentException): pass +class ChatHistoryReducerException(ContentException): + """An error occurred while reducing chat history.""" + + pass + + __all__ = [ + "ChatHistoryReducerException", "ContentAdditionException", "ContentException", "ContentInitializationError", diff --git a/python/tests/unit/agents/test_chat_completion_agent.py b/python/tests/unit/agents/test_chat_completion_agent.py index 191826aa23a9..01f9813acf83 100644 --- a/python/tests/unit/agents/test_chat_completion_agent.py +++ b/python/tests/unit/agents/test_chat_completion_agent.py @@ -206,7 +206,8 @@ def test_get_channel_keys(): agent = ChatCompletionAgent() keys = agent.get_channel_keys() - assert keys == [ChatHistoryChannel.__name__] + for key in keys: + assert isinstance(key, str) async def test_create_channel(): diff --git a/python/tests/unit/agents/test_chat_history_channel.py b/python/tests/unit/agents/test_chat_history_channel.py index acb563b9ca7c..4ba15f01a062 100644 --- a/python/tests/unit/agents/test_chat_history_channel.py +++ b/python/tests/unit/agents/test_chat_history_channel.py @@ -23,6 +23,9 @@ async def invoke_stream(self, history: list[ChatMessageContent]) -> AsyncIterabl for message in history: yield ChatMessageContent(role=AuthorRole.SYSTEM, content=f"Processed: {message.content}") + async def reduce_history(self, history: list[ChatMessageContent]) -> list[ChatMessageContent]: + return history + class MockNonChatHistoryHandler: """Mock agent to test incorrect instance handling.""" diff --git a/python/tests/unit/agents/test_sequential_strategy_selection.py b/python/tests/unit/agents/test_sequential_strategy_selection.py index 1a2db9d7409d..17754bd389fd 100644 --- a/python/tests/unit/agents/test_sequential_strategy_selection.py +++ b/python/tests/unit/agents/test_sequential_strategy_selection.py @@ -1,12 +1,13 @@ # Copyright (c) Microsoft. All rights reserved. -from unittest.mock import AsyncMock +from unittest.mock import AsyncMock, MagicMock import pytest from semantic_kernel.agents.agent import Agent from semantic_kernel.agents.channels.agent_channel import AgentChannel from semantic_kernel.agents.strategies.selection.sequential_selection_strategy import SequentialSelectionStrategy +from semantic_kernel.exceptions.agent_exceptions import AgentExecutionException class MockAgent(Agent): @@ -78,13 +79,47 @@ async def test_sequential_selection_exceeds_length(agents): selected_agent = await strategy.next(agents, []) assert selected_agent.id == "agent-0" + assert strategy._index == 0 + + selected_agent = await strategy.next(agents, []) + + assert selected_agent.id == "agent-1" assert strategy._index == 1 async def test_sequential_selection_empty_agents(): strategy = SequentialSelectionStrategy() - with pytest.raises(ValueError) as excinfo: + with pytest.raises(AgentExecutionException) as excinfo: await strategy.next([], []) - assert "No agents to select from" in str(excinfo.value) + assert "Agent Failure - No agents present to select." in str(excinfo.value) + + +async def test_sequential_selection_avoid_selecting_same_agent_twice(): + # Arrange + agent_0 = MagicMock(spec=Agent) + agent_0.id = "agent-0" + agent_0.name = "Agent0" + + agent_1 = MagicMock(spec=Agent) + agent_1.id = "agent-1" + agent_1.name = "Agent1" + + agents = [agent_0, agent_1] + + strategy = SequentialSelectionStrategy() + # Simulate that we've already selected an agent once: + strategy.has_selected = True + # Set the initial agent to the first agent + strategy.initial_agent = agent_0 + # Ensure the internal index is set to -1 + strategy._index = -1 + + # Act + selected_agent = await strategy.next(agents, []) + + # Assert + # According to the condition, we should skip selecting agent_0 again + assert selected_agent.id == "agent-1" + assert strategy._index == 1 diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py index 942322bf5153..05fa5773729a 100644 --- a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py +++ b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py @@ -27,11 +27,12 @@ # region init def test_azure_ai_inference_chat_completion_init(azure_ai_inference_unit_test_env, model_id) -> None: """Test initialization of AzureAIInferenceChatCompletion""" - azure_ai_inference = AzureAIInferenceChatCompletion(model_id) + azure_ai_inference = AzureAIInferenceChatCompletion(model_id, instruction_role="developer") assert azure_ai_inference.ai_model_id == model_id assert azure_ai_inference.service_id == model_id assert isinstance(azure_ai_inference.client, ChatCompletionsClient) + assert azure_ai_inference.instruction_role == "developer" @patch("azure.ai.inference.aio.ChatCompletionsClient.__init__", return_value=None) diff --git a/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py b/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py index 731f0b04d2d3..c512a38f1b10 100644 --- a/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py +++ b/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py @@ -17,7 +17,6 @@ from openai.types.chat.chat_completion_message import ChatCompletionMessage from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase -from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion from semantic_kernel.connectors.ai.open_ai.exceptions.content_filter_ai_exception import ( @@ -193,6 +192,30 @@ async def test_cmc( ) +@patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) +async def test_cmc_with_developer_instruction_role_propagates( + mock_create, + kernel: Kernel, + azure_openai_unit_test_env, + chat_history: ChatHistory, + mock_chat_completion_response: ChatCompletion, +) -> None: + mock_create.return_value = mock_chat_completion_response + chat_history.add_user_message("hello world") + complete_prompt_execution_settings = AzureChatPromptExecutionSettings(service_id="test_service_id") + + azure_chat_completion = AzureChatCompletion(instruction_role="developer") + await azure_chat_completion.get_chat_message_contents( + chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel + ) + mock_create.assert_awaited_once_with( + model=azure_openai_unit_test_env["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"], + stream=False, + messages=azure_chat_completion._prepare_chat_history_for_request(chat_history), + ) + assert azure_chat_completion.instruction_role == "developer" + + @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) async def test_cmc_with_logit_bias( mock_create, @@ -865,7 +888,7 @@ async def test_no_kernel_provided_throws_error( prompt = "some prompt that would trigger the content filtering" chat_history.add_user_message(prompt) complete_prompt_execution_settings = AzureChatPromptExecutionSettings( - function_call_behavior=FunctionCallBehavior.AutoInvokeKernelFunctions() + function_choice_behavior=FunctionChoiceBehavior.Auto() ) test_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") @@ -891,7 +914,7 @@ async def test_auto_invoke_false_no_kernel_provided_throws_error( prompt = "some prompt that would trigger the content filtering" chat_history.add_user_message(prompt) complete_prompt_execution_settings = AzureChatPromptExecutionSettings( - function_call_behavior=FunctionCallBehavior.EnableFunctions(auto_invoke=False, filters={}) + function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=False) ) test_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") diff --git a/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py b/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py index db432e4db8eb..c0b1000ae159 100644 --- a/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py +++ b/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py @@ -12,7 +12,6 @@ from openai.types.chat.chat_completion_chunk import ChoiceDelta as ChunkChoiceDelta from openai.types.chat.chat_completion_message import ChatCompletionMessage -from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( OpenAIChatPromptExecutionSettings, @@ -115,6 +114,30 @@ async def test_cmc_singular( ) +@patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) +async def test_cmc_singular_with_developer_instruction_propagates( + mock_create, + kernel: Kernel, + chat_history: ChatHistory, + mock_chat_completion_response: ChatCompletion, + openai_unit_test_env, +): + mock_create.return_value = mock_chat_completion_response + chat_history.add_user_message("hello world") + complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(service_id="test_service_id") + + openai_chat_completion = OpenAIChatCompletion(instruction_role="developer") + await openai_chat_completion.get_chat_message_content( + chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel + ) + mock_create.assert_awaited_once_with( + model=openai_unit_test_env["OPENAI_CHAT_MODEL_ID"], + stream=False, + messages=openai_chat_completion._prepare_chat_history_for_request(chat_history), + ) + assert openai_chat_completion.instruction_role == "developer" + + @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) async def test_cmc_prompt_execution_settings( mock_create, @@ -167,7 +190,7 @@ async def test_cmc_function_call_behavior( chat_history.add_user_message("hello world") orig_chat_history = deepcopy(chat_history) complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings( - service_id="test_service_id", function_call_behavior=FunctionCallBehavior.AutoInvokeKernelFunctions() + service_id="test_service_id", function_choice_behavior=FunctionChoiceBehavior.Auto() ) with patch( "semantic_kernel.kernel.Kernel.invoke_function_call", @@ -673,7 +696,7 @@ async def test_scmc_function_call_behavior( chat_history.add_user_message("hello world") orig_chat_history = deepcopy(chat_history) complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings( - service_id="test_service_id", function_call_behavior=FunctionCallBehavior.AutoInvokeKernelFunctions() + service_id="test_service_id", function_choice_behavior=FunctionChoiceBehavior.Auto() ) with patch( "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._process_function_call", diff --git a/python/tests/unit/connectors/ai/test_function_call_behavior.py b/python/tests/unit/connectors/ai/test_function_call_behavior.py deleted file mode 100644 index f9e27d6ad85c..000000000000 --- a/python/tests/unit/connectors/ai/test_function_call_behavior.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -from typing import TYPE_CHECKING -from unittest.mock import Mock - -import pytest - -from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior - -if TYPE_CHECKING: - from semantic_kernel.kernel import Kernel - - -@pytest.fixture -def function_call_behavior(): - return FunctionCallBehavior() - - -@pytest.fixture -def update_settings_callback(): - mock = Mock() - mock.return_value = None - return mock - - -def test_function_call_behavior(): - fcb = FunctionCallBehavior() - assert fcb is not None - assert fcb.enable_kernel_functions is True - assert fcb.max_auto_invoke_attempts == 5 - assert fcb.auto_invoke_kernel_functions is True - - -def test_function_call_behavior_get_set(function_call_behavior: FunctionCallBehavior): - function_call_behavior.enable_kernel_functions = False - assert function_call_behavior.enable_kernel_functions is False - function_call_behavior.max_auto_invoke_attempts = 10 - assert function_call_behavior.max_auto_invoke_attempts == 10 - assert function_call_behavior.auto_invoke_kernel_functions is True - function_call_behavior.auto_invoke_kernel_functions = False - assert function_call_behavior.auto_invoke_kernel_functions is False - assert function_call_behavior.max_auto_invoke_attempts == 0 - function_call_behavior.auto_invoke_kernel_functions = True - assert function_call_behavior.auto_invoke_kernel_functions is True - assert function_call_behavior.max_auto_invoke_attempts == 5 - - -def test_auto_invoke_kernel_functions(): - fcb = FunctionCallBehavior.AutoInvokeKernelFunctions() - assert fcb is not None - assert fcb.enable_kernel_functions is True - assert fcb.max_auto_invoke_attempts == 5 - assert fcb.auto_invoke_kernel_functions is True - - -def test_enable_kernel_functions(): - fcb = FunctionCallBehavior.EnableKernelFunctions() - assert fcb is not None - assert fcb.enable_kernel_functions is True - assert fcb.max_auto_invoke_attempts == 0 - assert fcb.auto_invoke_kernel_functions is False - - -def test_enable_functions(): - fcb = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters={"excluded_plugins": ["test"]}) - assert fcb is not None - assert fcb.enable_kernel_functions is True - assert fcb.max_auto_invoke_attempts == 5 - assert fcb.auto_invoke_kernel_functions is True - assert fcb.filters == {"excluded_plugins": ["test"]} - - -def test_required_function(): - fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test") - assert fcb is not None - assert fcb.enable_kernel_functions is True - assert fcb.max_auto_invoke_attempts == 1 - assert fcb.auto_invoke_kernel_functions is True - assert fcb.function_fully_qualified_name == "test" - - -def test_configure_default(function_call_behavior: FunctionCallBehavior, update_settings_callback, kernel: "Kernel"): - function_call_behavior.configure(kernel, update_settings_callback, None) - assert not update_settings_callback.called - - -def test_configure_kernel_functions(update_settings_callback, kernel: "Kernel"): - fcb = FunctionCallBehavior.AutoInvokeKernelFunctions() - fcb.configure(kernel, update_settings_callback, None) - assert update_settings_callback.called - - -def test_configure_kernel_functions_skip(update_settings_callback, kernel: "Kernel"): - fcb = FunctionCallBehavior.AutoInvokeKernelFunctions() - fcb.enable_kernel_functions = False - fcb.configure(kernel, update_settings_callback, None) - assert not update_settings_callback.called - - -def test_configure_enable_kernel_functions(update_settings_callback, kernel: "Kernel"): - fcb = FunctionCallBehavior.EnableKernelFunctions() - fcb.configure(kernel, update_settings_callback, None) - assert update_settings_callback.called - - -def test_configure_enable_kernel_functions_skip(update_settings_callback, kernel: "Kernel"): - fcb = FunctionCallBehavior.EnableKernelFunctions() - fcb.enable_kernel_functions = False - fcb.configure(kernel, update_settings_callback, None) - assert not update_settings_callback.called - - -def test_configure_enable_functions(update_settings_callback, kernel: "Kernel"): - fcb = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters={"excluded_plugins": ["test"]}) - fcb.configure(kernel, update_settings_callback, None) - assert update_settings_callback.called - - -def test_configure_enable_functions_skip(update_settings_callback, kernel: "Kernel"): - fcb = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters={"excluded_plugins": ["test"]}) - fcb.enable_kernel_functions = False - fcb.configure(kernel, update_settings_callback, None) - assert not update_settings_callback.called - - -def test_configure_required_function(update_settings_callback, kernel: "Kernel"): - fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test") - fcb.configure(kernel, update_settings_callback, None) - assert update_settings_callback.called - - -def test_configure_required_function_max_invoke_updated(update_settings_callback, kernel: "Kernel"): - fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test") - fcb.max_auto_invoke_attempts = 10 - fcb.configure(kernel, update_settings_callback, None) - assert update_settings_callback.called - assert fcb.max_auto_invoke_attempts == 1 - - -def test_configure_required_function_skip(update_settings_callback, kernel: "Kernel"): - fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test") - fcb.enable_kernel_functions = False - fcb.configure(kernel, update_settings_callback, None) - assert not update_settings_callback.called diff --git a/python/tests/unit/connectors/ai/test_function_choice_behavior.py b/python/tests/unit/connectors/ai/test_function_choice_behavior.py index 89e211881c08..58787be12988 100644 --- a/python/tests/unit/connectors/ai/test_function_choice_behavior.py +++ b/python/tests/unit/connectors/ai/test_function_choice_behavior.py @@ -8,12 +8,11 @@ if TYPE_CHECKING: from semantic_kernel.kernel import Kernel -from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior +from semantic_kernel.connectors.ai.function_calling_utils import _combine_filter_dicts from semantic_kernel.connectors.ai.function_choice_behavior import ( DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS, FunctionChoiceBehavior, FunctionChoiceType, - _combine_filter_dicts, ) from semantic_kernel.exceptions import ServiceInitializationError @@ -50,38 +49,6 @@ def test_function_choice_behavior_required(): assert behavior.filters == expected_filters -def test_from_function_call_behavior_kernel_functions(): - behavior = FunctionCallBehavior.AutoInvokeKernelFunctions() - new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior) - assert new_behavior.type_ == FunctionChoiceType.AUTO - assert new_behavior.auto_invoke_kernel_functions is True - - -def test_from_function_call_behavior_required(): - behavior = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="plugin1-func1") - new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior) - assert new_behavior.type_ == FunctionChoiceType.REQUIRED - assert new_behavior.auto_invoke_kernel_functions is True - assert new_behavior.filters == {"included_functions": ["plugin1-func1"]} - - -def test_from_function_call_behavior_enabled_functions(): - expected_filters = {"included_functions": ["plugin1-func1"]} - behavior = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters=expected_filters) - new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior) - assert new_behavior.type_ == FunctionChoiceType.AUTO - assert new_behavior.auto_invoke_kernel_functions is True - assert new_behavior.filters == expected_filters - - -def test_from_function_call_behavior(): - behavior = FunctionCallBehavior() - new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior) - assert new_behavior is not None - assert new_behavior.enable_kernel_functions == behavior.enable_kernel_functions - assert new_behavior.maximum_auto_invoke_attempts == behavior.max_auto_invoke_attempts - - @pytest.mark.parametrize(("type", "max_auto_invoke_attempts"), [("auto", 5), ("none", 0), ("required", 1)]) def test_auto_function_choice_behavior_from_dict(type: str, max_auto_invoke_attempts: int): data = { diff --git a/python/tests/unit/contents/test_chat_history_reducer_utils.py b/python/tests/unit/contents/test_chat_history_reducer_utils.py new file mode 100644 index 000000000000..b2f6ac2e282f --- /dev/null +++ b/python/tests/unit/contents/test_chat_history_reducer_utils.py @@ -0,0 +1,196 @@ +# Copyright (c) Microsoft. All rights reserved. + +import pytest + +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.function_call_content import FunctionCallContent +from semantic_kernel.contents.function_result_content import FunctionResultContent +from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import ( + SUMMARY_METADATA_KEY, + contains_function_call_or_result, + extract_range, + get_call_result_pairs, + locate_safe_reduction_index, + locate_summarization_boundary, +) +from semantic_kernel.contents.utils.author_role import AuthorRole + + +@pytest.fixture +def chat_messages_with_pairs(): + msgs = [] + + # 1) Summary message at index 0 (system) + msg_summary = ChatMessageContent(role=AuthorRole.SYSTEM, content="Summary so far.") + msg_summary.metadata[SUMMARY_METADATA_KEY] = True + msgs.append(msg_summary) + + # 2) Normal user message + msgs.append(ChatMessageContent(role=AuthorRole.USER, content="User says hello.")) + + # 3) Function call (call ID = "call1") + msg_func_call_1 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Function call #1") + func_call_content_1 = FunctionCallContent(id="call1", function_name="funcA", arguments={"param": "valA"}) + msg_func_call_1.items.append(func_call_content_1) + msgs.append(msg_func_call_1) + + # 4) Function result for call1 + msg_func_result_1 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Result for call #1") + func_result_content_1 = FunctionResultContent(id="call1", content="Function #1 result text") + msg_func_result_1.items.append(func_result_content_1) + msgs.append(msg_func_result_1) + + # 5) Another user message + msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Another user message.")) + + # 6) Another function call (call ID = "call2") + msg_func_call_2 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Function call #2") + func_call_content_2 = FunctionCallContent(id="call2", function_name="funcB", arguments={"param": "valB"}) + msg_func_call_2.items.append(func_call_content_2) + msgs.append(msg_func_call_2) + + # 7) Another user message (no result yet for "call2") + msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Wait, function result not yet?")) + + # 8) Unrelated function result (call ID = "callX" doesn't match any prior call) + msg_func_result_x = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Result for unknown call") + func_result_content_x = FunctionResultContent(id="callX", content="No matching call.") + msg_func_result_x.items.append(func_result_content_x) + msgs.append(msg_func_result_x) + + # 9) Function result for call2 + msg_func_result_2 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Result for call #2") + func_result_content_2 = FunctionResultContent(id="call2", content="Function #2 result text") + msg_func_result_2.items.append(func_result_content_2) + msgs.append(msg_func_result_2) + + return msgs + + +def test_get_call_result_pairs_fixture_has_pairs(chat_messages_with_pairs): + """ + Since 'chat_messages_with_pairs' includes function calls with IDs, + we expect pairs. Specifically: + - (2,3) for call1 + - (5,8) for call2 + """ + pairs = get_call_result_pairs(chat_messages_with_pairs) + assert (2, 3) in pairs, "Expected pair for (call1) in indexes (2,3)." + assert (5, 8) in pairs, "Expected pair for (call2) in indexes (5,8)." + assert len(pairs) == 2, "Fixture should produce exactly two matched call->result pairs." + + +@pytest.mark.parametrize( + "message_items,expected", + [ + ([], False), + ([FunctionCallContent(function_name="funcA", arguments={})], True), + ([FunctionResultContent(id="test", content="Result")], True), + ], +) +def test_contains_function_call_or_result(message_items, expected): + msg = ChatMessageContent(role=AuthorRole.USER, content="Test") + msg.items.extend(message_items) + assert contains_function_call_or_result(msg) == expected + + +def test_extract_range_preserve_pairs(chat_messages_with_pairs): + """ + Tests that extract_range with preserve_pairs=True keeps or skips + call/result pairs together. We'll slice from index=2 to index=9 + in the updated fixture. + """ + extracted = extract_range( + chat_messages_with_pairs, + start=2, + end=9, # exclusive of index=9 + preserve_pairs=True, + ) + + # Indices in range(2..9) => 2,3,4,5,6,7,8 + # The code should preserve both pairs if they're fully in the slice. + # Pairs are (2,3) and (5,8). They are indeed fully inside [2..9). + # So we expect to keep them plus indices 4,6,7. That totals 7 messages. + assert len(extracted) == 7 + + # Instead of asserting exact positional equality, just check we + # have the same set of messages from 2..9 (no duplicates or omissions). + expected_slice = chat_messages_with_pairs[2:9] # indexes 2..8 + assert set(extracted) == set(expected_slice), "Expected messages 2..8 to be returned." + + +def test_extract_range_preserve_pairs_call_outside_slice(chat_messages_with_pairs): + """ + If a function call is outside the start/end range but the result is inside, + we do NOT have to preserve that pair since it's partially out of range. + We'll pick start=4, end=9 => indices 4..8. + """ + extracted = extract_range(chat_messages_with_pairs, start=4, end=9, preserve_pairs=True) + + # Indices in range(4..9) => 4,5,6,7,8 + # Pairs: (2,3) is outside, (5,8) is fully inside. So (5,8) is kept together. + # The final set of messages is [4,5,6,7,8] => 5 total. + assert len(extracted) == 5 + + expected_slice = chat_messages_with_pairs[4:9] # indexes 4..8 + assert set(extracted) == set(expected_slice), "Expected messages 4..8 to be returned." + + # (2,3) do not appear, and that's correct since they're outside this slice. + + +def test_locate_summarization_boundary_empty(): + # Edge case: empty history => boundary = 0 + empty_history = [] + assert locate_summarization_boundary(empty_history) == 0 + + +def test_locate_safe_reduction_index_multiple_calls(chat_messages_with_pairs): + """ + If we set a small target_count, the code will attempt to find a safe + reduction index that doesn't orphan a function call/result pair. + """ + total_count = len(chat_messages_with_pairs) # 9 + target_count = 4 + idx = locate_safe_reduction_index( + chat_messages_with_pairs, + target_count=target_count, + threshold_count=0, + offset_count=0, + ) + # We expect a valid index because total_count (9) > target_count (4). + assert idx is not None and 0 < idx < total_count + + # Verify that from idx onward, we haven't split a matched call->result pair. + pairs = get_call_result_pairs(chat_messages_with_pairs) + for call_i, result_i in pairs: + if call_i >= idx: + # If the call is in the reduced set, the result must be in the reduced set: + assert result_i >= idx + if result_i >= idx: + # If the result is in the reduced set, the call must be in the reduced set: + assert call_i >= idx + + +def test_locate_safe_reduction_index_high_offset(chat_messages_with_pairs): + """ + If offset_count is large, we might not be able to reduce. Then the function + can return None if no valid reduction can be found after skipping the offset. + """ + target_count = 3 + threshold_count = 0 + offset_count = 5 + + idx = locate_safe_reduction_index( + chat_messages_with_pairs, + target_count=target_count, + threshold_count=threshold_count, + offset_count=offset_count, + ) + + # Possibly None if we cannot reduce after skipping the first 5 messages. + if idx is not None: + # Then it must be >= offset_count + assert idx >= offset_count + else: + # It's fine if it returns None, meaning no valid safe reduction was found. + pass diff --git a/python/tests/unit/contents/test_chat_history_summarization_reducer.py b/python/tests/unit/contents/test_chat_history_summarization_reducer.py new file mode 100644 index 000000000000..35e13c969522 --- /dev/null +++ b/python/tests/unit/contents/test_chat_history_summarization_reducer.py @@ -0,0 +1,202 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.const import DEFAULT_SERVICE_NAME +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import SUMMARY_METADATA_KEY +from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ( + ChatHistorySummarizationReducer, +) +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.exceptions.content_exceptions import ChatHistoryReducerException + + +@pytest.fixture +def mock_service(): + """Returns a mock ChatCompletionClientBase with required methods.""" + service = MagicMock(spec=ChatCompletionClientBase) + # Mock the get_prompt_execution_settings_class to return a placeholder + service.get_prompt_execution_settings_class.return_value = MagicMock(return_value=MagicMock(service_id="foo")) + # Mock the async call get_chat_message_content + service.get_chat_message_content = AsyncMock() + return service + + +@pytest.fixture +def chat_messages(): + """Returns a list of ChatMessageContent objects with default roles.""" + msgs = [] + + # Existing summary + summary_msg = ChatMessageContent(role=AuthorRole.SYSTEM, content="Prior summary.") + summary_msg.metadata[SUMMARY_METADATA_KEY] = True + msgs.append(summary_msg) + + # Normal user messages + msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Hello!")) + msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Hi there.")) + msgs.append(ChatMessageContent(role=AuthorRole.USER, content="What can you do?")) + msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="I can help with tasks.")) + msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Ok, let's do something.")) + return msgs + + +def test_summarization_reducer_init(mock_service): + reducer = ChatHistorySummarizationReducer( + service=mock_service, + target_count=10, + service_id="my_service", + threshold_count=5, + summarization_instructions="Custom instructions", + use_single_summary=False, + fail_on_error=False, + ) + + assert reducer.service == mock_service + assert reducer.target_count == 10 + assert reducer.service_id == "my_service" + assert reducer.threshold_count == 5 + assert reducer.summarization_instructions == "Custom instructions" + assert reducer.use_single_summary is False + assert reducer.fail_on_error is False + + +def test_summarization_reducer_defaults(mock_service): + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=5) + # Check default property values + assert reducer.threshold_count == 0 + assert reducer.summarization_instructions in reducer.summarization_instructions + assert reducer.use_single_summary is True + assert reducer.fail_on_error is True + assert reducer.service_id == DEFAULT_SERVICE_NAME + + +def test_summarization_reducer_eq_and_hash(mock_service): + r1 = ChatHistorySummarizationReducer(service=mock_service, target_count=5, threshold_count=2) + r2 = ChatHistorySummarizationReducer(service=mock_service, target_count=5, threshold_count=2) + r3 = ChatHistorySummarizationReducer(service=mock_service, target_count=6, threshold_count=2) + assert r1 == r2 + assert r1 != r3 + + # Test hash + assert hash(r1) == hash(r2) + assert hash(r1) != hash(r3) + + +async def test_summarization_reducer_reduce_no_need(chat_messages, mock_service): + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=10, threshold_count=0) + + # If len(history) <= target_count => None + result = await reducer.reduce() + assert result is None + mock_service.get_chat_message_content.assert_not_awaited() + + +async def test_summarization_reducer_reduce_needed(mock_service): + messages = [ + # A summary message (as in the original test) + ChatMessageContent(role=AuthorRole.SYSTEM, content="Existing summary", metadata={SUMMARY_METADATA_KEY: True}), + # Enough additional messages so total is > 4 + ChatMessageContent(role=AuthorRole.USER, content="User says hello"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"), + ChatMessageContent(role=AuthorRole.USER, content="User says more"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"), + ChatMessageContent(role=AuthorRole.USER, content="User says more"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"), + ] + + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=1) + reducer.messages = messages # Set the chat history + + # Mock that the service will return a single summary message + summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="This is a summary.") + mock_service.get_chat_message_content.return_value = summary_content + + result = await reducer.reduce() + assert result is not None, "We expect a shortened list with a new summary inserted." + assert len(result) <= 5, "The resulting list should be shortened to around target_count + threshold_count." + assert any(msg.metadata.get(SUMMARY_METADATA_KEY) for msg in result), ( + "We expect to see a newly inserted summary message." + ) + + +async def test_summarization_reducer_reduce_no_messages_to_summarize(mock_service): + # If we do use_single_summary=False, the older_range_start is insertion_point + # In that scenario, if insertion_point == older_range_end => no messages to summarize => return None + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=1, use_single_summary=False) + + # Provide just one message flagged as summary => insertion_point=0, so older_range_start=0, older_range_end=0 + only_summary = [ + ChatMessageContent(role=AuthorRole.SYSTEM, content="Only summary.", metadata={SUMMARY_METADATA_KEY: True}) + ] + + reducer.add_message(only_summary[0]) + result = await reducer.reduce() + assert result is None + mock_service.get_chat_message_content.assert_not_awaited() + + +async def test_summarization_reducer_reduce_summarizer_returns_none(mock_service): + # If the summarizer yields no messages, we return None + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3) + # Provide enough messages that summarization would normally occur + messages = [ + ChatMessageContent(role=AuthorRole.SYSTEM, content="Existing summary", metadata={SUMMARY_METADATA_KEY: True}), + ChatMessageContent(role=AuthorRole.USER, content="User asks something"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant replies"), + ChatMessageContent(role=AuthorRole.USER, content="Another user query"), + ] + reducer.messages = messages + + # Summarizer returns None + mock_service.get_chat_message_content.return_value = None + + result = await reducer.reduce() + assert result is None, "If the summarizer yields no message, we return None." + + +async def test_summarization_reducer_reduce_summarization_fails(mock_service): + # If summarization fails, we raise if fail_on_error=True + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, fail_on_error=True) + # Enough messages to trigger summarization + messages = [ + ChatMessageContent(role=AuthorRole.USER, content="Msg1"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Msg2"), + ChatMessageContent(role=AuthorRole.USER, content="Msg3"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Msg4"), + ] + reducer.messages = messages + + mock_service.get_chat_message_content.side_effect = Exception("Summarizer error") + + with pytest.raises(ChatHistoryReducerException, match="failed"): + await reducer.reduce() + + +async def test_summarization_reducer_reduce_summarization_fails_no_raise(chat_messages, mock_service): + # If summarization fails, but fail_on_error=False, we just log and return None + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, fail_on_error=False) + mock_service.get_chat_message_content.side_effect = Exception("Summarizer error") + reducer.messages = chat_messages + result = await reducer.reduce() + assert result is None + + +async def test_summarization_reducer_private_summarize(mock_service): + """Directly test the _summarize method for coverage.""" + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=5) + chat_messages = [ + ChatMessageContent(role=AuthorRole.USER, content="Message1"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Message2"), + ] + + summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Mock Summary") + mock_service.get_chat_message_content.return_value = summary_content + + actual_summary = await reducer._summarize(chat_messages) + assert actual_summary is not None, "We should get a summary message back." + assert actual_summary.content == "Mock Summary", "We expect the mock summary content." diff --git a/python/tests/unit/contents/test_chat_history_truncation_reducer.py b/python/tests/unit/contents/test_chat_history_truncation_reducer.py new file mode 100644 index 000000000000..7f94eccf8518 --- /dev/null +++ b/python/tests/unit/contents/test_chat_history_truncation_reducer.py @@ -0,0 +1,71 @@ +# Copyright (c) Microsoft. All rights reserved. + +import pytest + +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer +from semantic_kernel.contents.utils.author_role import AuthorRole + + +@pytest.fixture +def chat_messages(): + msgs = [] + msgs.append(ChatMessageContent(role=AuthorRole.SYSTEM, content="System message.")) + msgs.append(ChatMessageContent(role=AuthorRole.USER, content="User message 1")) + msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant message 1")) + msgs.append(ChatMessageContent(role=AuthorRole.USER, content="User message 2")) + msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant message 2")) + return msgs + + +def test_truncation_reducer_init(): + reducer = ChatHistoryTruncationReducer(target_count=5, threshold_count=2) + assert reducer.target_count == 5 + assert reducer.threshold_count == 2 + + +def test_truncation_reducer_defaults(): + reducer = ChatHistoryTruncationReducer(target_count=5) + assert reducer.threshold_count == 0 + + +def test_truncation_reducer_eq_and_hash(): + r1 = ChatHistoryTruncationReducer(target_count=5, threshold_count=2) + r2 = ChatHistoryTruncationReducer(target_count=5, threshold_count=2) + r3 = ChatHistoryTruncationReducer(target_count=5, threshold_count=1) + assert r1 == r2 + assert r1 != r3 + assert hash(r1) == hash(r2) + assert hash(r1) != hash(r3) + + +async def test_truncation_reducer_no_need(chat_messages): + # If total <= target + threshold => returns None + reducer = ChatHistoryTruncationReducer(target_count=5, threshold_count=0) + result = await reducer.reduce() + assert result is None + + +async def test_truncation_reducer_no_truncation_index_found(): + # If the safe reduction index < 0, returns None + # We'll craft a scenario where the number of messages is big, + # but the function can't find a safe index to cut + msgs = [ChatMessageContent(role=AuthorRole.USER, content="Msg")] * 10 + # Suppose threshold_count is huge, so effectively we can't reduce + reducer = ChatHistoryTruncationReducer(target_count=3, threshold_count=10) + reducer.messages = msgs + result = await reducer.reduce() + assert result is None + + +async def test_truncation_reducer_truncation(chat_messages): + # Force a smaller target so we do need to reduce + reducer = ChatHistoryTruncationReducer(target_count=2) + reducer.messages = chat_messages + result = await reducer.reduce() + # We expect only 2 messages remain after truncation + assert result is not None + assert len(result) == 2 + # They should be the last 2 messages + assert result[0] == chat_messages[-2] + assert result[1] == chat_messages[-1]