diff --git a/python/samples/concepts/README.md b/python/samples/concepts/README.md
index 3c62b4156cf7..22f0496e43e6 100644
--- a/python/samples/concepts/README.md
+++ b/python/samples/concepts/README.md
@@ -10,6 +10,7 @@
 - [Assistant Agent Retrieval](./agents/assistant_agent_retrieval.py)
 - [Assistant Agent Streaming](./agents/assistant_agent_streaming.py)
 - [Chat Completion Function Termination](./agents/chat_completion_function_termination.py)
+- [Chat Completion History Reducer](./agents/chat_completion_history_reducer.py)
 - [Mixed Chat Agents](./agents/mixed_chat_agents.py)
 - [Mixed Chat Agents Plugins](./agents/mixed_chat_agents_plugins.py)
 - [Mixed Chat Files](./agents/mixed_chat_files.py)
@@ -45,6 +46,9 @@
 - [Simple Chatbot Store Metadata](./chat_completion/simple_chatbot_store_metadata.py)
 - [Simple Chatbot Streaming](./chat_completion/simple_chatbot_streaming.py)
 - [Simple Chatbot with Image](./chat_completion/simple_chatbot_with_image.py)
+- [Simple Chatbot with Summary History Reducer Keeping Function Content](./chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py)
+- [Simple Chatbot with Summary History Reducer](./chat_completion/simple_chatbot_with_summary_history_reducer.py)
+- [Simple Chatbot with Truncation History Reducer](./chat_completion/simple_chatbot_with_truncation_history_reducer.py)
 
 ### ChatHistory - Using and serializing the [`ChatHistory`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/contents/chat_history.py)
 
diff --git a/python/samples/concepts/agents/chat_completion_history_reducer.py b/python/samples/concepts/agents/chat_completion_history_reducer.py
new file mode 100644
index 000000000000..1cdffefe7b78
--- /dev/null
+++ b/python/samples/concepts/agents/chat_completion_history_reducer.py
@@ -0,0 +1,298 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+import logging
+from typing import TYPE_CHECKING
+
+from semantic_kernel.agents import (
+    AgentGroupChat,
+    ChatCompletionAgent,
+)
+from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion, OpenAIChatCompletion
+from semantic_kernel.contents import AuthorRole, ChatHistory, ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ChatHistorySummarizationReducer
+from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
+from semantic_kernel.kernel import Kernel
+
+if TYPE_CHECKING:
+    from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
+
+#####################################################################
+# The following sample demonstrates how to implement a chat history #
+# reducer as part of the Semantic Kernel Agent Framework. It        #
+# covers two types of reducers: summarization reduction and a       #
+# truncation reduction. For this sample, the ChatCompletionAgent    #
+# is used.                                                          #
+#####################################################################
+
+
+# Initialize the logger for debugging and information messages
+logger = logging.getLogger(__name__)
+
+# Flag to determine whether to use Azure OpenAI services or OpenAI
+# Set this to True if using Azure OpenAI (requires appropriate configuration)
+use_azure_openai = True
+
+
+# Helper function to create and configure a Kernel with the desired chat completion service
+def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
+    """A helper function to create a kernel with a chat completion service."""
+    kernel = Kernel()
+    if use_azure_openai:
+        # Add Azure OpenAI service to the kernel
+        kernel.add_service(AzureChatCompletion(service_id=service_id))
+    else:
+        # Add OpenAI service to the kernel
+        kernel.add_service(OpenAIChatCompletion(service_id=service_id))
+    return kernel
+
+
+class HistoryReducerExample:
+    """
+    Demonstrates how to create a ChatCompletionAgent with a ChatHistoryReducer
+    (either truncation or summarization) and how to invoke that agent
+    multiple times while applying the history reduction.
+    """
+
+    # Agent-specific settings
+    TRANSLATOR_NAME = "NumeroTranslator"  # Name of the agent
+    TRANSLATOR_INSTRUCTIONS = "Add one to the latest user number and spell it in Spanish without explanation."
+
+    def create_truncating_agent(
+        self, reducer_msg_count: int, reducer_threshold: int
+    ) -> tuple[ChatCompletionAgent, "ChatHistoryReducer"]:
+        """
+        Creates a ChatCompletionAgent with a truncation-based history reducer.
+
+        Parameters:
+        - reducer_msg_count: Target number of messages to retain after truncation.
+        - reducer_threshold: Threshold number of messages to trigger truncation.
+
+        Returns:
+        - A configured ChatCompletionAgent instance with truncation enabled.
+        """
+        truncation_reducer = ChatHistoryTruncationReducer(
+            target_count=reducer_msg_count, threshold_count=reducer_threshold
+        )
+
+        return ChatCompletionAgent(
+            name=self.TRANSLATOR_NAME,
+            instructions=self.TRANSLATOR_INSTRUCTIONS,
+            kernel=_create_kernel_with_chat_completion("truncate_agent"),
+            history_reducer=truncation_reducer,
+        ), truncation_reducer
+
+    def create_summarizing_agent(
+        self, reducer_msg_count: int, reducer_threshold: int
+    ) -> tuple[ChatCompletionAgent, "ChatHistoryReducer"]:
+        """
+        Creates a ChatCompletionAgent with a summarization-based history reducer.
+
+        Parameters:
+        - reducer_msg_count: Target number of messages to retain after summarization.
+        - reducer_threshold: Threshold number of messages to trigger summarization.
+
+        Returns:
+        - A configured ChatCompletionAgent instance with summarization enabled.
+        """
+        kernel = _create_kernel_with_chat_completion("summarize_agent")
+
+        summarization_reducer = ChatHistorySummarizationReducer(
+            service=kernel.get_service(service_id="summarize_agent"),
+            target_count=reducer_msg_count,
+            threshold_count=reducer_threshold,
+        )
+
+        return ChatCompletionAgent(
+            name=self.TRANSLATOR_NAME,
+            instructions=self.TRANSLATOR_INSTRUCTIONS,
+            kernel=kernel,
+            history_reducer=summarization_reducer,
+        ), summarization_reducer
+
+    async def invoke_agent(self, agent: ChatCompletionAgent, chat_history: ChatHistory, message_count: int):
+        """
+        Demonstrates agent invocation with direct history management and reduction.
+
+        Parameters:
+        - agent: The ChatCompletionAgent to invoke.
+        - message_count: The number of messages to simulate in the conversation.
+        """
+
+        index = 1
+        while index <= message_count:
+            # Provide user input
+            user_message = ChatMessageContent(role=AuthorRole.USER, content=str(index))
+            chat_history.messages.append(user_message)
+            print(f"# User: '{index}'")
+
+            # Attempt history reduction if a reducer is present
+            is_reduced = False
+            if agent.history_reducer is not None:
+                reduced = await agent.history_reducer.reduce()
+                if reduced is not None:
+                    chat_history.messages.clear()
+                    chat_history.messages.extend(reduced)
+                    is_reduced = True
+                    print("@ (History was reduced!)")
+
+            # Invoke the agent and display its response
+            async for response in agent.invoke(chat_history):
+                chat_history.messages.append(response)
+                print(f"# {response.role} - {response.name}: '{response.content}'")
+
+            # The index is incremented by 2 because the agent is told to:
+            # "Add one to the latest user number and spell it in Spanish without explanation."
+            # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish)
+            index += 2
+            print(f"@ Message Count: {len(chat_history.messages)}\n")
+
+            # If history was reduced, and the chat history is of type `ChatHistorySummarizationReducer`,
+            # print summaries as it will contain the __summary__ metadata key.
+            if is_reduced and isinstance(chat_history, ChatHistorySummarizationReducer):
+                self._print_summaries_from_front(chat_history.messages)
+
+    async def invoke_chat(self, agent: ChatCompletionAgent, message_count: int):
+        """
+        Demonstrates agent invocation within a group chat.
+
+        Parameters:
+        - agent: The ChatCompletionAgent to invoke.
+        - message_count: The number of messages to simulate in the conversation.
+        """
+        chat = AgentGroupChat()  # Initialize a new group chat
+        last_history_count = 0
+
+        index = 1
+        while index <= message_count:
+            # Add user message to the chat
+            user_msg = ChatMessageContent(role=AuthorRole.USER, content=str(index))
+            await chat.add_chat_message(user_msg)
+            print(f"# User: '{index}'")
+
+            # Invoke the agent and display its response
+            async for message in chat.invoke(agent):
+                print(f"# {message.role} - {message.name or '*'}: '{message.content}'")
+
+            # The index is incremented by 2 because the agent is told to:
+            # "Add one to the latest user number and spell it in Spanish without explanation."
+            # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish)
+            index += 2
+
+            # Retrieve chat messages in descending order (newest first)
+            msgs = []
+            async for m in chat.get_chat_messages(agent):
+                msgs.append(m)
+
+            print(f"@ Message Count: {len(msgs)}\n")
+
+            # Check for reduction in message count and print summaries
+            if len(msgs) < last_history_count:
+                self._print_summaries_from_back(msgs)
+
+            last_history_count = len(msgs)
+
+    def _print_summaries_from_front(self, messages: list[ChatMessageContent]):
+        """
+        Prints summaries from the front of the message list.
+
+        Parameters:
+        - messages: List of chat messages to process.
+        """
+        summary_index = 0
+        while summary_index < len(messages):
+            msg = messages[summary_index]
+            if msg.metadata and msg.metadata.get("__summary__"):
+                print(f"\tSummary: {msg.content}")
+                summary_index += 1
+            else:
+                break
+
+    def _print_summaries_from_back(self, messages: list[ChatMessageContent]):
+        """
+        Prints summaries from the back of the message list.
+
+        Parameters:
+        - messages: List of chat messages to process.
+        """
+        summary_index = len(messages) - 1
+        while summary_index >= 0:
+            msg = messages[summary_index]
+            if msg.metadata and msg.metadata.get("__summary__"):
+                print(f"\tSummary: {msg.content}")
+                summary_index -= 1
+            else:
+                break
+
+
+# Main entry point for the script
+async def main():
+    # Initialize the example class
+    example = HistoryReducerExample()
+
+    # Demonstrate truncation-based reduction
+    trunc_agent, history_reducer = example.create_truncating_agent(
+        # reducer_msg_count:
+        # Purpose: Defines the target number of messages to retain after applying truncation or summarization.
+        # What it controls: This parameter determines how much of the most recent conversation history
+        #                   is preserved while discarding or summarizing older messages.
+        # Why change it?:
+        # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history
+        #   to maintain context.
+        # - Larger values: Use when retaining more conversational context is critical for accurate responses
+        #   or maintaining a richer dialogue.
+        reducer_msg_count=10,
+        # reducer_threshold:
+        # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds
+        #          reducer_msg_count by a small margin.
+        # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response)
+        #                   are not "orphaned" or lost during truncation or summarization.
+        # Why change it?:
+        # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older
+        #   pairs of messages sooner.
+        # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation,
+        #   especially for sensitive interactions like API function calls or complex responses.
+        reducer_threshold=10,
+    )
+    # print("===TruncatedAgentReduction Demo===")
+    # await example.invoke_agent(trunc_agent, chat_history=history_reducer, message_count=50)
+
+    # Demonstrate summarization-based reduction
+    sum_agent, history_reducer = example.create_summarizing_agent(
+        # Same configuration for summarization-based reduction
+        reducer_msg_count=10,  # Target number of messages to retain
+        reducer_threshold=10,  # Buffer to avoid premature reduction
+    )
+    print("\n===SummarizedAgentReduction Demo===")
+    await example.invoke_agent(sum_agent, chat_history=history_reducer, message_count=50)
+
+    # Demonstrate group chat with truncation
+    print("\n===TruncatedChatReduction Demo===")
+    trunc_agent.history_reducer.messages.clear()
+    await example.invoke_chat(trunc_agent, message_count=50)
+
+    # Demonstrate group chat with summarization
+    print("\n===SummarizedChatReduction Demo===")
+    sum_agent.history_reducer.messages.clear()
+    await example.invoke_chat(sum_agent, message_count=50)
+
+
+# Interaction between reducer_msg_count and reducer_threshold:
+# The combination of these values determines when reduction occurs and how much history is kept.
+# Example:
+# If reducer_msg_count = 10 and reducer_threshold = 5, history will not be truncated until the total message count
+# exceeds 15. This approach ensures flexibility in retaining conversational context while still adhering to memory
+# constraints.
+
+# Recommendations:
+# - Adjust for performance: Use a lower reducer_msg_count in environments with limited memory or when the assistant
+#   needs faster processing times.
+# - Context sensitivity: Increase reducer_msg_count and reducer_threshold in use cases where maintaining continuity
+#   across multiple interactions is essential (e.g., multi-turn conversations or complex workflows).
+# - Experiment: Start with the default values (10 and 10) and refine based on your application's behavior and the
+#   assistant's response quality.
+
+
+# Execute the main function if the script is run directly
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py
new file mode 100644
index 000000000000..338c76519b0e
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py
@@ -0,0 +1,156 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.contents import ChatHistorySummarizationReducer
+from semantic_kernel.core_plugins.time_plugin import TimePlugin
+from semantic_kernel.functions import KernelArguments
+
+# This sample shows how to create a chatbot using a kernel function and leverage a chat history
+# summarization reducer.
+# This sample uses the following main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history.
+#                           A Chat History Reducer is a subclass of ChatHistory that provides additional
+#                           functionality to reduce the history.
+# - a KernelFunction: This function will be a prompt function, meaning the function is composed of
+#                     a prompt and will be invoked by Semantic Kernel.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+# [NOTE]
+# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer.
+# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly.
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a kernel and register a prompt function.
+# The prompt here contains two variables: chat_history and user_input.
+# They will be replaced by the kernel with the actual values when the function is invoked.
+# [NOTE]
+# The chat_history, which is a ChatHistory object, will be serialized to a string internally
+# to create/render the final prompt.
+# Since this sample uses a chat completion service, the prompt will be deserialized back to
+# a ChatHistory object that gets passed to the chat completion service. This new chat history
+# object will contain the original messages and the user input.
+kernel = Kernel()
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    # You can attach the request settings to the function or
+    # pass the settings to the kernel.invoke method via the kernel arguments.
+    # If you specify the settings in both places, the settings in the kernel arguments will
+    # take precedence given the same service id.
+    # prompt_execution_settings=request_settings,
+)
+
+# Invoking a kernel function requires a service, so we add the chat completion service to the kernel.
+kernel.add_service(chat_completion_service)
+
+# The chat history reducer is responsible for summarizing the chat history.
+# It's a subclass of ChatHistory that provides additional functionality to reduce the history.
+# You may use it just like a regular ChatHistory object.
+summarization_reducer = ChatHistorySummarizationReducer(
+    service=kernel.get_service(),
+    # target_count:
+    # Purpose: Defines the target number of messages to retain after applying summarization.
+    # What it controls: This parameter determines how much of the most recent conversation history
+    #                   is preserved while discarding or summarizing older messages.
+    # Why change it?:
+    # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history
+    #   to maintain context.
+    # - Larger values: Use when retaining more conversational context is critical for accurate responses
+    #   or maintaining a richer dialogue.
+    target_count=3,
+    # threshold_count:
+    # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds
+    #          target_count by a small margin.
+    # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response)
+    #                   are not "orphaned" or lost during truncation or summarization.
+    # Why change it?:
+    # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older
+    #   pairs of messages sooner.
+    # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation,
+    #   especially for sensitive interactions like API function calls or complex responses.
+    threshold_count=2,
+)
+
+summarization_reducer.add_system_message(system_message)
+
+kernel.add_plugin(plugin=TimePlugin(), plugin_name="TimePlugin")
+
+request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto()
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except (KeyboardInterrupt, EOFError):
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    await summarization_reducer.reduce()
+
+    kernel_arguments = KernelArguments(
+        settings=request_settings,
+        chat_history=summarization_reducer,
+        user_input=user_input,
+    )
+    answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments)
+
+    if answer:
+        print(f"Mosscap:> {answer}")
+        summarization_reducer.add_user_message(user_input)
+        summarization_reducer.add_message(answer.value[0])
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py
new file mode 100644
index 000000000000..b5d0eae75d24
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py
@@ -0,0 +1,200 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.contents import ChatHistorySummarizationReducer
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.function_call_content import FunctionCallContent
+from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.core_plugins.time_plugin import TimePlugin
+from semantic_kernel.functions import KernelArguments
+
+# This sample shows how to create a chatbot using a kernel function and leverage a chat history
+# summarization reducer.
+# This sample uses the following main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history.
+#                           A Chat History Reducer is a subclass of ChatHistory that provides additional
+#                           functionality to reduce the history.
+#    - The Chat History Reducer configuration includes a flag `include_function_content_in_summary` that
+#      allows the reducer to include function call and result content in the summary.
+# - a KernelFunction: This function will be a prompt function, meaning the function is composed of
+#                     a prompt and will be invoked by Semantic Kernel.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+# [NOTE]
+# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer.
+# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly.
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a kernel and register a prompt function.
+# The prompt here contains two variables: chat_history and user_input.
+# They will be replaced by the kernel with the actual values when the function is invoked.
+# [NOTE]
+# The chat_history, which is a ChatHistory object, will be serialized to a string internally
+# to create/render the final prompt.
+# Since this sample uses a chat completion service, the prompt will be deserialized back to
+# a ChatHistory object that gets passed to the chat completion service. This new chat history
+# object will contain the original messages and the user input.
+kernel = Kernel()
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    # You can attach the request settings to the function or
+    # pass the settings to the kernel.invoke method via the kernel arguments.
+    # If you specify the settings in both places, the settings in the kernel arguments will
+    # take precedence given the same service id.
+    # prompt_execution_settings=request_settings,
+)
+
+# Invoking a kernel function requires a service, so we add the chat completion service to the kernel.
+kernel.add_service(chat_completion_service)
+
+# The chat history reducer is responsible for summarizing the chat history.
+# It's a subclass of ChatHistory that provides additional functionality to reduce the history.
+# You may use it just like a regular ChatHistory object.
+summarization_reducer = ChatHistorySummarizationReducer(
+    service=kernel.get_service(),
+    # target_count:
+    # Purpose: Defines the target number of messages to retain after applying summarization.
+    # What it controls: This parameter determines how much of the most recent conversation history
+    #                   is preserved while discarding or summarizing older messages.
+    # Why change it?:
+    # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history
+    #   to maintain context.
+    # - Larger values: Use when retaining more conversational context is critical for accurate responses
+    #   or maintaining a richer dialogue.
+    target_count=3,
+    # threshold_count:
+    # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds
+    #          target_count by a small margin.
+    # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response)
+    #                   are not "orphaned" or lost during truncation or summarization.
+    # Why change it?:
+    # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older
+    #   pairs of messages sooner.
+    # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation,
+    #   especially for sensitive interactions like API function calls or complex responses.
+    threshold_count=2,
+    include_function_content_in_summary=True,
+)
+
+summarization_reducer.add_system_message(system_message)
+
+kernel.add_plugin(plugin=TimePlugin(), plugin_name="TimePlugin")
+
+request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto()
+
+
+# The following sets are used to hold on to FunctionCallContent and FunctionResultContent items
+# that have been previously added to the chat history.
+processed_fccs: set[FunctionCallContent] = set()
+processed_frcs: set[FunctionResultContent] = set()
+
+
+async def chat() -> bool:
+    global processed_fccs, processed_frcs
+
+    try:
+        user_input = input("User:> ")
+    except (KeyboardInterrupt, EOFError):
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    await summarization_reducer.reduce()
+
+    kernel_arguments = KernelArguments(
+        settings=request_settings,
+        chat_history=summarization_reducer,
+        user_input=user_input,
+    )
+    answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments)
+
+    if answer:
+        print(f"Mosscap:> {answer}")
+        summarization_reducer.add_user_message(user_input)
+        summarization_reducer.add_message(answer.value[0])
+
+        # Get the chat history from the FunctionResult's metadata
+        chat_history: ChatHistory = answer.metadata.get("messages")
+        if chat_history:
+            # Process the chat history to extract FunctionCallContent and FunctionResultContent items
+            # that we haven't previously added to the chat history
+            fcc: list[FunctionCallContent] = []
+            frc: list[FunctionResultContent] = []
+            for msg in chat_history.messages:
+                if msg.items:
+                    for item in msg.items:
+                        match item:
+                            case FunctionCallContent():
+                                if item.id not in processed_fccs:
+                                    fcc.append(item)
+                            case FunctionResultContent():
+                                if item.id not in processed_frcs:
+                                    frc.append(item)
+
+            for i, item in enumerate(fcc):
+                summarization_reducer.add_assistant_message_list([item])
+                processed_fccs.add(item.id)
+                # Safely check if there's a matching FunctionResultContent
+                if i < len(frc):
+                    assert fcc[i].id == frc[i].id  # nosec
+                    summarization_reducer.add_tool_message_list([frc[i]])
+                    processed_frcs.add(item.id)
+
+        # Since this example is showing how to include FunctionCallContent and FunctionResultContent
+        # in the summary, we need to add them to the chat history and also to the processed sets.
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer.py b/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer.py
new file mode 100644
index 000000000000..075cbb8620c0
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer.py
@@ -0,0 +1,160 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel import Kernel
+from semantic_kernel.contents import ChatHistoryTruncationReducer
+from semantic_kernel.functions import KernelArguments
+
+# This sample shows how to create a chatbot using a kernel function and leverage a chat history
+# truncation reducer.
+# This sample uses the following two main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history.
+#                           A Chat History Reducer is a subclass of ChatHistory that provides additional
+#                           functionality to reduce the history.
+# - a KernelFunction: This function will be a prompt function, meaning the function is composed of
+#                     a prompt and will be invoked by Semantic Kernel.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+# [NOTE]
+# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer.
+# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly.
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a kernel and register a prompt function.
+# The prompt here contains two variables: chat_history and user_input.
+# They will be replaced by the kernel with the actual values when the function is invoked.
+# [NOTE]
+# The chat_history, which is a ChatHistory object, will be serialized to a string internally
+# to create/render the final prompt.
+# Since this sample uses a chat completion service, the prompt will be deserialized back to
+# a ChatHistory object that gets passed to the chat completion service. This new chat history
+# object will contain the original messages and the user input.
+kernel = Kernel()
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    # You can attach the request settings to the function or
+    # pass the settings to the kernel.invoke method via the kernel arguments.
+    # If you specify the settings in both places, the settings in the kernel arguments will
+    # take precedence given the same service id.
+    # prompt_execution_settings=request_settings,
+)
+
+# Invoking a kernel function requires a service, so we add the chat completion service to the kernel.
+kernel.add_service(chat_completion_service)
+
+# The chat history reducer is responsible for truncating the chat history.
+# It's a subclass of ChatHistory that provides additional functionality to reduce the history.
+# You may use it just like a regular ChatHistory object.
+truncation_reducer = ChatHistoryTruncationReducer(
+    service=kernel.get_service(),
+    # target_count:
+    # Purpose: Defines the target number of messages to retain after applying summarization.
+    # What it controls: This parameter determines how much of the most recent conversation history
+    #                   is preserved while discarding or summarizing older messages.
+    # Why change it?:
+    # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history
+    #   to maintain context.
+    # - Larger values: Use when retaining more conversational context is critical for accurate responses
+    #   or maintaining a richer dialogue.
+    target_count=3,
+    # threshold_count:
+    # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds
+    #          target_count by a small margin.
+    # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response)
+    #                   are not "orphaned" or lost during truncation or summarization.
+    # Why change it?:
+    # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older
+    #   pairs of messages sooner.
+    # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation,
+    #   especially for sensitive interactions like API function calls or complex responses.
+    threshold_count=2,
+)
+
+truncation_reducer.add_system_message(system_message)
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    # Attempt to reduce before adding the user message to the chat history.
+    await truncation_reducer.reduce()
+
+    # Get the chat message content from the chat completion service.
+    kernel_arguments = KernelArguments(
+        settings=request_settings,
+        # Use keyword arguments to pass the chat history and user input to the kernel function.
+        chat_history=truncation_reducer,
+        user_input=user_input,
+    )
+
+    answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments)
+    # Alternatively, you can invoke the function directly with the kernel as an argument:
+    # answer = await chat_function.invoke(kernel, kernel_arguments)
+    if answer:
+        print(f"Mosscap:> {answer}")
+        # Since the user_input is rendered by the template, it is not yet part of the chat history, so we add it here.
+        truncation_reducer.add_user_message(user_input)
+        # Add the chat message to the chat history to keep track of the conversation.
+        truncation_reducer.add_message(answer.value[0])
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py b/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py
index 6adde925a390..c556b7e9820c 100644
--- a/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py
+++ b/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py
@@ -120,14 +120,13 @@ async def main():
 
         chat_history.add_message(result)
         for item in result.items:
-            await chat._process_function_call(
+            await kernel.invoke_function_call(
                 function_call=item,
-                kernel=kernel,
                 chat_history=chat_history,
                 arguments=KernelArguments(),
                 function_call_count=1,
                 request_index=0,
-                function_call_behavior=settings.function_choice_behavior,
+                function_behavior=settings.function_choice_behavior,
             )
 
 
diff --git a/python/samples/concepts/reasoning/simple_reasoning_function_calling.py b/python/samples/concepts/reasoning/simple_reasoning_function_calling.py
index 238d69753f88..0da02adacefe 100644
--- a/python/samples/concepts/reasoning/simple_reasoning_function_calling.py
+++ b/python/samples/concepts/reasoning/simple_reasoning_function_calling.py
@@ -70,7 +70,9 @@
 Note: Unsupported features may be added in future updates.
 """
 
-chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.OPENAI)
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(
+    Services.OPENAI, instruction_role="developer"
+)
 
 # This is the system message that gives the chatbot its personality.
 developer_message = """
diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py
index 40dd127eda47..ee4d6d2dfa67 100644
--- a/python/samples/concepts/setup/chat_completion_services.py
+++ b/python/samples/concepts/setup/chat_completion_services.py
@@ -32,28 +32,50 @@ class Services(str, Enum):
 
 def get_chat_completion_service_and_request_settings(
     service_name: Services,
+    instruction_role: str | None = None,
 ) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]:
-    """Return service and request settings."""
+    """Return service and request settings.
+
+    Args:
+        service_name (Services): The service name.
+        instruction_role (str | None): The role to use for 'instruction' messages, for example,
+            'system' or 'developer'. Defaults to 'system'. Currently only supported for OpenAI reasoning models.
+    """
+    # Use lambdas or functions to delay instantiation
     chat_services = {
-        Services.OPENAI: get_openai_chat_completion_service_and_request_settings,
-        Services.AZURE_OPENAI: get_azure_openai_chat_completion_service_and_request_settings,
-        Services.AZURE_AI_INFERENCE: get_azure_ai_inference_chat_completion_service_and_request_settings,
-        Services.ANTHROPIC: get_anthropic_chat_completion_service_and_request_settings,
-        Services.BEDROCK: get_bedrock_chat_completion_service_and_request_settings,
-        Services.GOOGLE_AI: get_google_ai_chat_completion_service_and_request_settings,
-        Services.MISTRAL_AI: get_mistral_ai_chat_completion_service_and_request_settings,
-        Services.OLLAMA: get_ollama_chat_completion_service_and_request_settings,
-        Services.ONNX: get_onnx_chat_completion_service_and_request_settings,
-        Services.VERTEX_AI: get_vertex_ai_chat_completion_service_and_request_settings,
+        Services.OPENAI: lambda: get_openai_chat_completion_service_and_request_settings(
+            instruction_role=instruction_role
+        ),
+        Services.AZURE_OPENAI: lambda: get_azure_openai_chat_completion_service_and_request_settings(
+            instruction_role=instruction_role
+        ),
+        Services.AZURE_AI_INFERENCE: lambda: get_azure_ai_inference_chat_completion_service_and_request_settings(
+            instruction_role=instruction_role
+        ),
+        Services.ANTHROPIC: lambda: get_anthropic_chat_completion_service_and_request_settings(),
+        Services.BEDROCK: lambda: get_bedrock_chat_completion_service_and_request_settings(),
+        Services.GOOGLE_AI: lambda: get_google_ai_chat_completion_service_and_request_settings(),
+        Services.MISTRAL_AI: lambda: get_mistral_ai_chat_completion_service_and_request_settings(),
+        Services.OLLAMA: lambda: get_ollama_chat_completion_service_and_request_settings(),
+        Services.ONNX: lambda: get_onnx_chat_completion_service_and_request_settings(),
+        Services.VERTEX_AI: lambda: get_vertex_ai_chat_completion_service_and_request_settings(),
     }
+
+    # Call the appropriate lambda or function based on the service name
+    if service_name not in chat_services:
+        raise ValueError(f"Unsupported service name: {service_name}")
     return chat_services[service_name]()
 
 
-def get_openai_chat_completion_service_and_request_settings() -> tuple[
-    "ChatCompletionClientBase", "PromptExecutionSettings"
-]:
+def get_openai_chat_completion_service_and_request_settings(
+    instruction_role: str | None = None,
+) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]:
     """Return OpenAI chat completion service and request settings.
 
+    Args:
+        instruction_role (str | None): The role to use for 'instruction' messages, for example,
+            'developer' or 'system'. (Optional)
+
     The service credentials can be read by 3 ways:
     1. Via the constructor
     2. Via the environment variables
@@ -70,7 +92,7 @@ def get_openai_chat_completion_service_and_request_settings() -> tuple[
         OpenAIChatPromptExecutionSettings,
     )
 
-    chat_service = OpenAIChatCompletion(service_id=service_id)
+    chat_service = OpenAIChatCompletion(service_id=service_id, instruction_role=instruction_role)
     request_settings = OpenAIChatPromptExecutionSettings(
         service_id=service_id, max_tokens=2000, temperature=0.7, top_p=0.8
     )
@@ -78,11 +100,15 @@ def get_openai_chat_completion_service_and_request_settings() -> tuple[
     return chat_service, request_settings
 
 
-def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[
-    "ChatCompletionClientBase", "PromptExecutionSettings"
-]:
+def get_azure_openai_chat_completion_service_and_request_settings(
+    instruction_role: str | None = None,
+) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]:
     """Return Azure OpenAI chat completion service and request settings.
 
+    Args:
+        instruction_role (str | None): The role to use for 'instruction' messages, for example,
+            'developer' or 'system'. (Optional)
+
     The service credentials can be read by 3 ways:
     1. Via the constructor
     2. Via the environment variables
@@ -99,15 +125,15 @@ def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[
         AzureChatPromptExecutionSettings,
     )
 
-    chat_service = AzureChatCompletion(service_id=service_id)
+    chat_service = AzureChatCompletion(service_id=service_id, instruction_role=instruction_role)
     request_settings = AzureChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
-def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tuple[
-    "ChatCompletionClientBase", "PromptExecutionSettings"
-]:
+def get_azure_ai_inference_chat_completion_service_and_request_settings(
+    instruction_role: str | None = None,
+) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]:
     """Return Azure AI Inference chat completion service and request settings.
 
     The service credentials can be read by 3 ways:
@@ -129,6 +155,7 @@ def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tup
     chat_service = AzureAIInferenceChatCompletion(
         service_id=service_id,
         ai_model_id="id",  # The model ID is simply an identifier as the model id cannot be obtained programmatically.
+        instruction_role=instruction_role,
     )
     request_settings = AzureAIInferenceChatPromptExecutionSettings(service_id=service_id)
 
diff --git a/python/semantic_kernel/agents/agent.py b/python/semantic_kernel/agents/agent.py
index 71728feb8362..56cd115a7751 100644
--- a/python/semantic_kernel/agents/agent.py
+++ b/python/semantic_kernel/agents/agent.py
@@ -2,17 +2,21 @@
 
 import uuid
 from collections.abc import Iterable
-from typing import ClassVar
+from typing import TYPE_CHECKING, ClassVar
 
 from pydantic import Field
 
 from semantic_kernel.agents.channels.agent_channel import AgentChannel
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.kernel import Kernel
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 from semantic_kernel.utils.experimental_decorator import experimental_class
 from semantic_kernel.utils.naming import generate_random_ascii_name
 from semantic_kernel.utils.validation import AGENT_NAME_REGEX
 
+if TYPE_CHECKING:
+    from semantic_kernel.contents.chat_history import ChatHistory
+
 
 @experimental_class
 class Agent(KernelBaseModel):
@@ -37,6 +41,22 @@ class Agent(KernelBaseModel):
     instructions: str | None = None
     kernel: Kernel = Field(default_factory=Kernel)
     channel_type: ClassVar[type[AgentChannel] | None] = None
+    history_reducer: ChatHistoryReducer | None = None
+
+    async def reduce_history(self, history: "ChatHistory") -> bool:
+        """Perform the reduction on the provided history, returning True if reduction occurred."""
+        if self.history_reducer is None:
+            return False
+
+        self.history_reducer.messages = history.messages
+
+        new_messages = await self.history_reducer.reduce()
+        if new_messages is not None:
+            history.messages.clear()
+            history.messages.extend(new_messages)
+            return True
+
+        return False
 
     def get_channel_keys(self) -> Iterable[str]:
         """Get the channel keys.
@@ -46,7 +66,11 @@ def get_channel_keys(self) -> Iterable[str]:
         """
         if not self.channel_type:
             raise NotImplementedError("Unable to get channel keys. Channel type not configured.")
-        return [self.channel_type.__name__]
+        yield self.channel_type.__name__
+
+        if self.history_reducer is not None:
+            yield self.history_reducer.__class__.__name__
+            yield str(self.history_reducer.__hash__)
 
     async def create_channel(self) -> AgentChannel:
         """Create a channel.
diff --git a/python/semantic_kernel/agents/channels/chat_history_channel.py b/python/semantic_kernel/agents/channels/chat_history_channel.py
index 563efeaef610..057c005b3d3d 100644
--- a/python/semantic_kernel/agents/channels/chat_history_channel.py
+++ b/python/semantic_kernel/agents/channels/chat_history_channel.py
@@ -64,6 +64,9 @@ async def invoke(
                 f"Invalid channel binding for agent with id: `{id}` with name: ({type(agent).__name__})"
             )
 
+        # pre-process history reduction
+        await agent.reduce_history(self)
+
         message_count = len(self.messages)
         mutated_history = set()
         message_queue: Deque[ChatMessageContent] = deque()
@@ -119,6 +122,9 @@ async def invoke_stream(
                 f"Invalid channel binding for agent with id: `{id}` with name: ({type(agent).__name__})"
             )
 
+        # pre-process history reduction
+        await agent.reduce_history(self)
+
         message_count = len(self.messages)
 
         async for response_message in agent.invoke_stream(self):
diff --git a/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py b/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py
index 352787e81d8c..cbdb218ad616 100644
--- a/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py
+++ b/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py
@@ -12,6 +12,7 @@
 from semantic_kernel.const import DEFAULT_SERVICE_NAME
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.exceptions import KernelServiceNotFoundError
@@ -46,6 +47,7 @@ def __init__(
         description: str | None = None,
         instructions: str | None = None,
         execution_settings: PromptExecutionSettings | None = None,
+        history_reducer: ChatHistoryReducer | None = None,
     ) -> None:
         """Initialize a new instance of ChatCompletionAgent.
 
@@ -59,6 +61,7 @@ def __init__(
             description: The description of the agent. (optional)
             instructions: The instructions for the agent. (optional)
             execution_settings: The execution settings for the agent. (optional)
+            history_reducer: The history reducer for the agent. (optional)
         """
         if not service_id:
             service_id = DEFAULT_SERVICE_NAME
@@ -75,6 +78,8 @@ def __init__(
             args["id"] = id
         if kernel is not None:
             args["kernel"] = kernel
+        if history_reducer is not None:
+            args["history_reducer"] = history_reducer
         super().__init__(**args)
 
     @trace_agent_invocation
diff --git a/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py b/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py
index 3879fab95aca..65f7dfb2ae0b 100644
--- a/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py
+++ b/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py
@@ -1,6 +1,13 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
+import sys
+
+if sys.version_info >= (3, 12):
+    from typing import override  # pragma: no cover
+else:
+    from typing_extensions import override  # pragma: no cover
+
 from collections.abc import Callable
 from inspect import isawaitable
 from typing import TYPE_CHECKING, ClassVar
@@ -9,6 +16,7 @@
 
 from semantic_kernel.agents.strategies.selection.selection_strategy import SelectionStrategy
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.exceptions.agent_exceptions import AgentExecutionException
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.functions.kernel_function import KernelFunction
@@ -34,9 +42,11 @@ class KernelFunctionSelectionStrategy(SelectionStrategy):
     function: KernelFunction
     kernel: Kernel
     result_parser: Callable[..., str] = Field(default_factory=lambda: (lambda: ""))
+    history_reducer: ChatHistoryReducer | None = None
 
-    async def next(self, agents: list["Agent"], history: list[ChatMessageContent]) -> "Agent":
-        """Check if the agent should terminate.
+    @override
+    async def select_agent(self, agents: list["Agent"], history: list[ChatMessageContent]) -> "Agent":
+        """Select the next agent to interact with.
 
         Args:
             agents: The list of agents to select from.
@@ -48,6 +58,12 @@ async def next(self, agents: list["Agent"], history: list[ChatMessageContent]) -
         Raises:
             AgentExecutionException: If the strategy fails to execute the function or select the next agent
         """
+        if self.history_reducer is not None:
+            self.history_reducer.messages = history
+            reduced_history = await self.history_reducer.reduce()
+            if reduced_history is not None:
+                history = reduced_history.messages
+
         original_arguments = self.arguments or KernelArguments()
         execution_settings = original_arguments.execution_settings or {}
 
diff --git a/python/semantic_kernel/agents/strategies/selection/selection_strategy.py b/python/semantic_kernel/agents/strategies/selection/selection_strategy.py
index cef5625432c9..6f453a50a876 100644
--- a/python/semantic_kernel/agents/strategies/selection/selection_strategy.py
+++ b/python/semantic_kernel/agents/strategies/selection/selection_strategy.py
@@ -1,22 +1,29 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from abc import ABC, abstractmethod
+from abc import ABC
 from typing import TYPE_CHECKING
 
+from semantic_kernel.agents import Agent
+from semantic_kernel.exceptions.agent_exceptions import AgentExecutionException
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
 if TYPE_CHECKING:
-    from semantic_kernel.agents import Agent
     from semantic_kernel.contents.chat_message_content import ChatMessageContent
 
 
 @experimental_class
 class SelectionStrategy(KernelBaseModel, ABC):
-    """Contract for an agent selection strategy."""
+    """Base strategy class for selecting the next agent in a chat."""
 
-    @abstractmethod
-    async def next(self, agents: list["Agent"], history: list["ChatMessageContent"]) -> "Agent":
+    has_selected: bool = False
+    initial_agent: Agent | None = None
+
+    async def next(
+        self,
+        agents: list[Agent],
+        history: list["ChatMessageContent"],
+    ) -> Agent:
         """Select the next agent to interact with.
 
         Args:
@@ -24,6 +31,27 @@ async def next(self, agents: list["Agent"], history: list["ChatMessageContent"])
             history: The history of messages in the conversation.
 
         Returns:
-            The next agent to interact with.
+            The agent who takes the next turn.
+        """
+        if not agents and self.initial_agent is None:
+            raise AgentExecutionException("Agent Failure - No agents present to select.")
+
+        # If it's the first selection and we have an initial agent, use it
+        if not self.has_selected and self.initial_agent is not None:
+            agent = self.initial_agent
+        else:
+            agent = await self.select_agent(agents, history)
+
+        self.has_selected = True
+        return agent
+
+    async def select_agent(
+        self,
+        agents: list[Agent],
+        history: list["ChatMessageContent"],
+    ) -> Agent:
+        """Determines which agent goes next. Override for custom logic.
+
+        By default, this fallback returns the first agent in the list.
         """
-        ...
+        return agents[0]
diff --git a/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py b/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py
index 8304f405df7e..b60fc5f0f21f 100644
--- a/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py
+++ b/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py
@@ -1,5 +1,13 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import logging
+import sys
+
+if sys.version_info >= (3, 12):
+    from typing import override  # pragma: no cover
+else:
+    from typing_extensions import override  # pragma: no cover
+
 from typing import TYPE_CHECKING
 
 from pydantic import PrivateAttr
@@ -12,34 +20,61 @@
     from semantic_kernel.contents.chat_message_content import ChatMessageContent
 
 
+logger: logging.Logger = logging.getLogger(__name__)
+
+
 @experimental_class
 class SequentialSelectionStrategy(SelectionStrategy):
-    """A selection strategy that selects agents in a sequential order."""
+    """Round-robin turn-taking strategy. Agent order is based on the order in which they joined."""
+
+    _index: int = PrivateAttr(default=-1)
 
-    _index: int = PrivateAttr(default=0)
+    def reset(self) -> None:
+        """Reset selection to the initial/first agent."""
+        self._index = -1
 
-    def reset(self):
-        """Reset the index."""
-        self._index = 0
+    def _increment_index(self, agent_count: int) -> None:
+        """Increment the index in a circular manner."""
+        self._index = (self._index + 1) % agent_count
 
-    async def next(self, agents: list["Agent"], history: list["ChatMessageContent"]) -> "Agent":
-        """Select the next agent to interact with.
+    @override
+    async def select_agent(
+        self,
+        agents: list["Agent"],
+        history: list["ChatMessageContent"],
+    ) -> "Agent":
+        """Select the next agent in a round-robin fashion.
 
         Args:
             agents: The list of agents to select from.
             history: The history of messages in the conversation.
 
         Returns:
-            The next agent to interact with.
+            The agent who takes the next turn.
         """
-        if len(agents) == 0:
-            raise ValueError("No agents to select from")
-
         if self._index >= len(agents):
-            self.reset()
+            self._index = -1
 
-        agent = agents[self._index]
+        if (
+            self.has_selected
+            and self.initial_agent is not None
+            and len(agents) > 0
+            and agents[0] == self.initial_agent
+            and self._index < 0
+        ):
+            # Avoid selecting the same agent twice in a row
+            self._increment_index(len(agents))
 
-        self._index = (self._index + 1) % len(agents)
+        # Main index increment
+        self._increment_index(len(agents))
+
+        # Pick the agent
+        agent = agents[self._index]
 
+        logger.info(
+            "Selected agent at index %d (ID: %s, name: %s)",
+            self._index,
+            agent.id,
+            agent.name,
+        )
         return agent
diff --git a/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py b/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py
index f46cd79704ef..93c59e10ed84 100644
--- a/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py
+++ b/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py
@@ -9,6 +9,7 @@
 
 from semantic_kernel.agents.strategies.termination.termination_strategy import TerminationStrategy
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.functions.kernel_function import KernelFunction
 from semantic_kernel.kernel import Kernel
@@ -33,6 +34,7 @@ class KernelFunctionTerminationStrategy(TerminationStrategy):
     function: KernelFunction
     kernel: Kernel
     result_parser: Callable[..., bool] = Field(default_factory=lambda: (lambda: True))
+    history_reducer: ChatHistoryReducer | None = None
 
     async def should_agent_terminate(
         self,
@@ -48,6 +50,12 @@ async def should_agent_terminate(
         Returns:
             True if the agent should terminate, False otherwise
         """
+        if self.history_reducer is not None:
+            self.history_reducer.messages = history
+            reduced_history = await self.history_reducer.reduce()
+            if reduced_history is not None:
+                history = reduced_history.messages
+
         original_arguments = self.arguments or KernelArguments()
         execution_settings = original_arguments.execution_settings or {}
 
diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
index 8541fd0dc651..c18fcb30c732 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
@@ -5,7 +5,7 @@
 
 from pydantic import Field, model_validator
 
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
+from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError
 
diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py
index 87e967184234..f5baec134528 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py
@@ -4,7 +4,7 @@
 import logging
 import sys
 from collections.abc import AsyncGenerator, Callable
-from typing import Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
@@ -32,7 +32,6 @@
 )
 from semantic_kernel.connectors.ai.anthropic.settings.anthropic_settings import AnthropicSettings
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_history import ChatHistory
@@ -56,6 +55,9 @@
     trace_streaming_chat_completion,
 )
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+
 # map finish reasons from Anthropic to Semantic Kernel
 ANTHROPIC_TO_SEMANTIC_KERNEL_FINISH_REASON_MAP = {
     "end_turn": SemanticKernelFinishReason.STOP,
@@ -136,7 +138,7 @@ def service_url(self) -> str | None:
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_call_configuration
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/utils.py b/python/semantic_kernel/connectors/ai/anthropic/services/utils.py
index 31acecb0468f..e41905e1cc91 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/services/utils.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/services/utils.py
@@ -3,11 +3,9 @@
 import json
 import logging
 from collections.abc import Callable, Mapping
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
@@ -18,6 +16,11 @@
 logger: logging.Logger = logging.getLogger(__name__)
 
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
+
 def _format_user_message(message: ChatMessageContent) -> dict[str, Any]:
     """Format a user message to the expected object for the Anthropic client.
 
@@ -118,8 +121,8 @@ def _format_tool_message(message: ChatMessageContent) -> dict[str, Any]:
 
 
 def update_settings_from_function_call_configuration(
-    function_choice_configuration: FunctionCallChoiceConfiguration,
-    settings: PromptExecutionSettings,
+    function_choice_configuration: "FunctionCallChoiceConfiguration",
+    settings: "PromptExecutionSettings",
     type: FunctionChoiceType,
 ) -> None:
     """Update the settings from a FunctionChoiceConfiguration."""
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py
index 772ddb28e6c7..64e0806804e1 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py
@@ -52,6 +52,7 @@ def __init__(
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
         client: ChatCompletionsClient | EmbeddingsClient | None = None,
+        instruction_role: str | None = None,
         **kwargs: Any,
     ) -> None:
         """Initialize the Azure AI Inference Chat Completion service.
@@ -68,6 +69,7 @@ def __init__(
             env_file_path (str | None): The path to the environment file. (Optional)
             env_file_encoding (str | None): The encoding of the environment file. (Optional)
             client (ChatCompletionsClient | None): The Azure AI Inference client to use. (Optional)
+            instruction_role (str | None): The role to use for 'instruction' messages. (Optional)
             **kwargs: Additional keyword arguments.
 
         Raises:
@@ -100,11 +102,16 @@ def __init__(
                     user_agent=SEMANTIC_KERNEL_USER_AGENT,
                 )
 
-        super().__init__(
-            client=client,
-            managed_client=managed_client,
+        args: dict[str, Any] = {
+            "client": client,
+            "managed_client": managed_client,
             **kwargs,
-        )
+        }
+
+        if instruction_role:
+            args["instruction_role"] = instruction_role
+
+        super().__init__(**args)
 
     def __del__(self) -> None:
         """Close the client when the object is deleted."""
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index 8ac10561f142..9a43591938e6 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -30,7 +30,6 @@
 from semantic_kernel.connectors.ai.azure_ai_inference.services.utils import MESSAGE_CONVERTERS
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.contents.chat_history import ChatHistory
@@ -46,6 +45,7 @@
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
 logger: logging.Logger = logging.getLogger(__name__)
@@ -66,6 +66,7 @@ def __init__(
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
         client: ChatCompletionsClient | None = None,
+        instruction_role: str | None = None,
     ) -> None:
         """Initialize the Azure AI Inference Chat Completion service.
 
@@ -82,20 +83,29 @@ def __init__(
             env_file_path (str | None): The path to the environment file. (Optional)
             env_file_encoding (str | None): The encoding of the environment file. (Optional)
             client (ChatCompletionsClient | None): The Azure AI Inference client to use. (Optional)
+            instruction_role (str | None): The role to use for 'instruction' messages, for example, summarization
+                prompts could use `developer` or `system`. (Optional)
 
         Raises:
             ServiceInitializationError: If an error occurs during initialization.
         """
-        super().__init__(
-            ai_model_id=ai_model_id,
-            service_id=service_id or ai_model_id,
-            client_type=AzureAIInferenceClientType.ChatCompletions,
-            api_key=api_key,
-            endpoint=endpoint,
-            env_file_path=env_file_path,
-            env_file_encoding=env_file_encoding,
-            client=client,
-        )
+        args: dict[str, Any] = {
+            "ai_model_id": ai_model_id,
+            "api_key": api_key,
+            "client_type": AzureAIInferenceClientType.ChatCompletions,
+            "client": client,
+            "endpoint": endpoint,
+            "env_file_path": env_file_path,
+            "env_file_encoding": env_file_encoding,
+        }
+
+        if service_id:
+            args["service_id"] = service_id
+
+        if instruction_role:
+            args["instruction_role"] = instruction_role
+
+        super().__init__(**args)
 
     # region Overriding base class methods
 
@@ -179,7 +189,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_call_configuration
 
     @override
@@ -199,7 +209,13 @@ def _prepare_chat_history_for_request(
         chat_request_messages: list[ChatRequestMessage] = []
 
         for message in chat_history.messages:
-            chat_request_messages.append(MESSAGE_CONVERTERS[message.role](message))
+            # If instruction_role is 'developer' and the message role is 'system', change it to 'developer'
+            role = (
+                AuthorRole.DEVELOPER
+                if self.instruction_role == "developer" and message.role == AuthorRole.SYSTEM
+                else message.role
+            )
+            chat_request_messages.append(MESSAGE_CONVERTERS[role](message))
 
         return chat_request_messages
 
diff --git a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py
index c163b6ffda74..5c4f3e6cd192 100644
--- a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py
@@ -30,7 +30,6 @@
 )
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.contents.chat_message_content import ITEM_TYPES, ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -52,6 +51,7 @@
 )
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
     from semantic_kernel.contents.chat_history import ChatHistory
 
@@ -160,7 +160,7 @@ async def _inner_get_streaming_chat_message_contents(
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_choice_configuration
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py b/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py
index 6274bdb01ffe..7607696559c5 100644
--- a/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py
+++ b/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py
@@ -4,12 +4,10 @@
 import json
 from collections.abc import Callable, Mapping
 from functools import partial
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from semantic_kernel.connectors.ai.bedrock.bedrock_prompt_execution_settings import BedrockChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.const import DEFAULT_FULLY_QUALIFIED_NAME_SEPARATOR
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -20,6 +18,10 @@
 from semantic_kernel.contents.utils.finish_reason import FinishReason
 from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
 
 async def run_in_executor(executor, func, *args, **kwargs):
     """Run a function in an executor."""
@@ -177,8 +179,8 @@ def format_bedrock_function_name_to_kernel_function_fully_qualified_name(bedrock
 
 
 def update_settings_from_function_choice_configuration(
-    function_choice_configuration: FunctionCallChoiceConfiguration,
-    settings: PromptExecutionSettings,
+    function_choice_configuration: "FunctionCallChoiceConfiguration",
+    settings: "PromptExecutionSettings",
     type: FunctionChoiceType,
 ) -> None:
     """Update the settings from a FunctionChoiceConfiguration."""
diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
index de9edf36c268..5c527e994564 100644
--- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
+++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
@@ -9,14 +9,9 @@
 from typing import TYPE_CHECKING, Any, ClassVar
 
 from opentelemetry.trace import Span, Tracer, get_tracer, use_span
+from pydantic import Field
 
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
-from semantic_kernel.connectors.ai.function_calling_utils import (
-    merge_function_results,
-    merge_streaming_function_results,
-)
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.const import AUTO_FUNCTION_INVOCATION_SPAN_NAME
 from semantic_kernel.contents.annotation_content import AnnotationContent
 from semantic_kernel.contents.file_reference_content import FileReferenceContent
@@ -26,6 +21,7 @@
 from semantic_kernel.utils.telemetry.model_diagnostics.gen_ai_attributes import AVAILABLE_FUNCTIONS
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
     from semantic_kernel.contents.chat_history import ChatHistory
     from semantic_kernel.contents.chat_message_content import ChatMessageContent
@@ -41,6 +37,7 @@ class ChatCompletionClientBase(AIServiceClientBase, ABC):
 
     # Connectors that support function calling should set this to True
     SUPPORTS_FUNCTION_CALLING: ClassVar[bool] = False
+    instruction_role: str = Field(default_factory=lambda: "system", description="The role for instructions.")
 
     # region Internal methods to be implemented by the derived classes
 
@@ -102,6 +99,10 @@ async def get_chat_message_contents(
         Returns:
             A list of chat message contents representing the response(s) from the LLM.
         """
+        from semantic_kernel.connectors.ai.function_calling_utils import (
+            merge_function_results,
+        )
+
         # Create a copy of the settings to avoid modifying the original settings
         settings = copy.deepcopy(settings)
         # Later on, we already use the tools or equivalent settings, we cast here.
@@ -111,15 +112,6 @@ async def get_chat_message_contents(
         if not self.SUPPORTS_FUNCTION_CALLING:
             return await self._inner_get_chat_message_contents(chat_history, settings)
 
-        # For backwards compatibility we need to convert the `FunctionCallBehavior` to `FunctionChoiceBehavior`
-        # if this method is called with a `FunctionCallBehavior` object as part of the settings
-        if hasattr(settings, "function_call_behavior") and isinstance(
-            settings.function_call_behavior, FunctionCallBehavior
-        ):
-            settings.function_choice_behavior = FunctionChoiceBehavior.from_function_call_behavior(
-                settings.function_call_behavior
-            )
-
         kernel: "Kernel" = kwargs.get("kernel")  # type: ignore
         if settings.function_choice_behavior is not None:
             if kernel is None:
@@ -217,6 +209,10 @@ async def get_streaming_chat_message_contents(
         Yields:
             A stream representing the response(s) from the LLM.
         """
+        from semantic_kernel.connectors.ai.function_calling_utils import (
+            merge_streaming_function_results,
+        )
+
         # Create a copy of the settings to avoid modifying the original settings
         settings = copy.deepcopy(settings)
         # Later on, we already use the tools or equivalent settings, we cast here.
@@ -230,15 +226,6 @@ async def get_streaming_chat_message_contents(
                 yield streaming_chat_message_contents
             return
 
-        # For backwards compatibility we need to convert the `FunctionCallBehavior` to `FunctionChoiceBehavior`
-        # if this method is called with a `FunctionCallBehavior` object as part of the settings
-        if hasattr(settings, "function_call_behavior") and isinstance(
-            settings.function_call_behavior, FunctionCallBehavior
-        ):
-            settings.function_choice_behavior = FunctionChoiceBehavior.from_function_call_behavior(
-                settings.function_call_behavior
-            )
-
         kernel: "Kernel" = kwargs.get("kernel")  # type: ignore
         if settings.function_choice_behavior is not None:
             if kernel is None:
@@ -397,7 +384,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
 
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         """Return the callback function to update the settings from a function call configuration.
 
         Override this method to provide a custom callback function to
diff --git a/python/semantic_kernel/connectors/ai/function_call_behavior.py b/python/semantic_kernel/connectors/ai/function_call_behavior.py
deleted file mode 100644
index 913df72f7853..000000000000
--- a/python/semantic_kernel/connectors/ai/function_call_behavior.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-from collections.abc import Callable
-from typing import TYPE_CHECKING, Literal
-
-from pydantic.dataclasses import dataclass
-from typing_extensions import deprecated
-
-from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
-from semantic_kernel.kernel_pydantic import KernelBaseModel
-
-if TYPE_CHECKING:
-    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
-    from semantic_kernel.kernel import Kernel
-
-DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS = 5
-
-
-@dataclass
-class FunctionCallConfiguration:
-    """Class that holds the configured functions for function calling."""
-
-    available_functions: list["KernelFunctionMetadata"] | None = None
-    required_functions: list["KernelFunctionMetadata"] | None = None
-
-
-@deprecated("The `FunctionCallBehavior` class is deprecated; use `FunctionChoiceBehavior` instead.", category=None)
-class FunctionCallBehavior(KernelBaseModel):
-    """Class that controls function calling behavior.
-
-    DEPRECATED: This class has been replaced by FunctionChoiceBehavior.
-
-    Args:
-        enable_kernel_functions (bool): Enable kernel functions.
-        max_auto_invoke_attempts (int): The maximum number of auto invoke attempts.
-
-    Attributes:
-        enable_kernel_functions (bool): Enable kernel functions.
-        max_auto_invoke_attempts (int): The maximum number of auto invoke attempts.
-
-    Properties:
-        auto_invoke_kernel_functions: Check if the kernel functions should be auto-invoked.
-            Determined as max_auto_invoke_attempts > 0.
-
-    Methods:
-        configure: Configures the settings for the function call behavior,
-            the default version in this class, does nothing, use subclasses for different behaviors.
-
-    Class methods:
-        AutoInvokeKernelFunctions: Returns KernelFunctions class with auto_invoke enabled, all functions.
-        EnableKernelFunctions: Returns KernelFunctions class with auto_invoke disabled, all functions.
-        EnableFunctions: Set the enable kernel functions flag, filtered functions, auto_invoke optional.
-        RequiredFunction: Set the required function flag, auto_invoke optional.
-
-    """
-
-    enable_kernel_functions: bool = True
-    max_auto_invoke_attempts: int = DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS
-
-    @property
-    def auto_invoke_kernel_functions(self):
-        """Check if the kernel functions should be auto-invoked."""
-        return self.max_auto_invoke_attempts > 0
-
-    @auto_invoke_kernel_functions.setter
-    def auto_invoke_kernel_functions(self, value: bool):
-        """Set the auto_invoke_kernel_functions flag."""
-        if not value:
-            self.max_auto_invoke_attempts = 0
-        else:
-            if self.max_auto_invoke_attempts == 0:
-                self.max_auto_invoke_attempts = DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS
-
-    def configure(
-        self,
-        kernel: "Kernel",
-        update_settings_callback: Callable[..., None],
-        settings: "PromptExecutionSettings",
-    ) -> None:
-        """Configures the settings for the function call behavior.
-
-        Using the base ToolCallBehavior means that you manually have to set tool_choice and tools.
-
-        For different behaviors, use the subclasses of ToolCallBehavior:
-            KernelFunctions (all functions in the Kernel)
-            EnabledFunctions (filtered set of functions from the Kernel)
-            RequiredFunction (a single function)
-
-        By default, the update_settings_callback is called with FunctionCallConfiguration,
-        which contains a list of available functions or a list of required functions, it also
-        takes the PromptExecutionSettings object.
-
-        It should update the prompt execution settings with the available functions or required functions.
-
-        Alternatively you can override this class and add your own logic in the configure method.
-        """
-        return
-
-    @classmethod
-    @deprecated("Use the `FunctionChoiceBehavior` `Auto` class instead.")
-    def AutoInvokeKernelFunctions(cls) -> "KernelFunctions":
-        """Returns KernelFunctions class with auto_invoke enabled."""
-        return KernelFunctions(max_auto_invoke_attempts=DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS)
-
-    @classmethod
-    @deprecated("Use the `FunctionChoiceBehavior` `Auto` class method instead.")
-    def EnableKernelFunctions(cls) -> "KernelFunctions":
-        """Returns KernelFunctions class with auto_invoke disabled.
-
-        Function calls are enabled in this case, just not invoked.
-        """
-        return KernelFunctions(max_auto_invoke_attempts=0)
-
-    @classmethod
-    @deprecated("Use the `FunctionChoiceBehavior` `Auto` class method instead.")
-    def EnableFunctions(
-        cls,
-        auto_invoke: bool = False,
-        *,
-        filters: dict[
-            Literal["excluded_plugins", "included_plugins", "excluded_functions", "included_functions"], list[str]
-        ]
-        | None = {},
-    ) -> "EnabledFunctions":
-        """Set the enable kernel functions flag."""
-        return EnabledFunctions(
-            filters=filters, max_auto_invoke_attempts=DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS if auto_invoke else 0
-        )
-
-    @classmethod
-    @deprecated("Use the `FunctionChoiceBehavior` `Required` class method instead.")
-    def RequiredFunction(
-        cls,
-        auto_invoke: bool = False,
-        *,
-        function_fully_qualified_name: str,
-    ) -> "RequiredFunction":
-        """Set the required function flag."""
-        return RequiredFunction(
-            function_fully_qualified_name=function_fully_qualified_name,
-            max_auto_invoke_attempts=1 if auto_invoke else 0,
-        )
-
-
-@deprecated("Use the `FunctionChoiceBehavior` `Auto` class instead.")
-class KernelFunctions(FunctionCallBehavior):
-    """Function call behavior for making all kernel functions available for tool calls."""
-
-    def configure(
-        self,
-        kernel: "Kernel",
-        update_settings_callback: Callable[..., None],
-        settings: "PromptExecutionSettings",
-    ) -> None:
-        """Set the options for the tool call behavior in the settings."""
-        if self.enable_kernel_functions:
-            update_settings_callback(
-                FunctionCallConfiguration(available_functions=kernel.get_full_list_of_function_metadata()), settings
-            )
-
-
-@deprecated("Use the `FunctionChoiceBehavior` `Auto` class instead.")
-class EnabledFunctions(FunctionCallBehavior):
-    """Function call behavior for making a filtered set of functions available for tool calls."""
-
-    filters: dict[
-        Literal["excluded_plugins", "included_plugins", "excluded_functions", "included_functions"], list[str]
-    ]
-
-    def configure(
-        self,
-        kernel: "Kernel",
-        update_settings_callback: Callable[..., None],
-        settings: "PromptExecutionSettings",
-    ) -> None:
-        """Set the options for the tool call behavior in the settings."""
-        if self.enable_kernel_functions:
-            update_settings_callback(
-                FunctionCallConfiguration(available_functions=kernel.get_list_of_function_metadata(self.filters)),
-                settings,
-            )
-
-
-@deprecated("Use the `FunctionChoiceBehavior` `Required` class instead.")
-class RequiredFunction(FunctionCallBehavior):
-    """Function call behavior for making a single function available for tool calls."""
-
-    function_fully_qualified_name: str
-
-    def configure(
-        self,
-        kernel: "Kernel",
-        update_settings_callback: Callable[..., None],
-        settings: "PromptExecutionSettings",
-    ) -> None:
-        """Set the options for the tool call behavior in the settings."""
-        if not self.enable_kernel_functions:
-            return
-        # since using this always calls this single function, we do not want to allow repeated calls
-        if self.max_auto_invoke_attempts > 1:
-            self.max_auto_invoke_attempts = 1
-        update_settings_callback(
-            FunctionCallConfiguration(
-                required_functions=kernel.get_list_of_function_metadata({
-                    "included_functions": [self.function_fully_qualified_name]
-                })
-            ),
-            settings,
-        )
diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py
index c7ab3dba6b39..7a5c2950c4e0 100644
--- a/python/semantic_kernel/connectors/ai/function_calling_utils.py
+++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py
@@ -3,9 +3,6 @@
 from collections import OrderedDict
 from typing import TYPE_CHECKING, Any
 
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.function_result_content import FunctionResultContent
-from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
 
@@ -15,6 +12,8 @@
         FunctionChoiceType,
     )
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+    from semantic_kernel.contents.chat_message_content import ChatMessageContent
+    from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
     from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
 
 
@@ -80,13 +79,16 @@ def _combine_filter_dicts(*dicts: dict[str, list[str]]) -> dict:
 
 
 def merge_function_results(
-    messages: list[ChatMessageContent],
-) -> list[ChatMessageContent]:
+    messages: list["ChatMessageContent"],
+) -> list["ChatMessageContent"]:
     """Combine multiple function result content types to one chat message content type.
 
     This method combines the FunctionResultContent items from separate ChatMessageContent messages,
     and is used in the event that the `context.terminate = True` condition is met.
     """
+    from semantic_kernel.contents.chat_message_content import ChatMessageContent
+    from semantic_kernel.contents.function_result_content import FunctionResultContent
+
     items: list[Any] = []
     for message in messages:
         items.extend([item for item in message.items if isinstance(item, FunctionResultContent)])
@@ -99,10 +101,10 @@ def merge_function_results(
 
 
 def merge_streaming_function_results(
-    messages: list[ChatMessageContent | StreamingChatMessageContent],
+    messages: list["ChatMessageContent | StreamingChatMessageContent"],
     ai_model_id: str,
     function_invoke_attempt: int,
-) -> list[StreamingChatMessageContent]:
+) -> list["StreamingChatMessageContent"]:
     """Combine multiple streaming function result content types to one streaming chat message content type.
 
     This method combines the FunctionResultContent items from separate StreamingChatMessageContent messages,
@@ -116,6 +118,9 @@ def merge_streaming_function_results(
     Returns:
         The combined streaming chat message content type.
     """
+    from semantic_kernel.contents.function_result_content import FunctionResultContent
+    from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+
     items: list[Any] = []
     for message in messages:
         items.extend([item for item in message.items if isinstance(item, FunctionResultContent)])
diff --git a/python/semantic_kernel/connectors/ai/function_choice_behavior.py b/python/semantic_kernel/connectors/ai/function_choice_behavior.py
index 759274d632f2..f32a57e26952 100644
--- a/python/semantic_kernel/connectors/ai/function_choice_behavior.py
+++ b/python/semantic_kernel/connectors/ai/function_choice_behavior.py
@@ -2,18 +2,14 @@
 
 import logging
 from collections.abc import Callable
-from enum import Enum
 from typing import TYPE_CHECKING, Literal, TypeVar
 
-from typing_extensions import deprecated
-
-from semantic_kernel.connectors.ai.function_calling_utils import _combine_filter_dicts
+from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType
 from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
 if TYPE_CHECKING:
-    from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
     from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
     from semantic_kernel.kernel import Kernel
@@ -27,15 +23,6 @@
 _T = TypeVar("_T", bound="FunctionChoiceBehavior")
 
 
-@experimental_class
-class FunctionChoiceType(Enum):
-    """The type of function choice behavior."""
-
-    AUTO = "auto"
-    NONE = "none"
-    REQUIRED = "required"
-
-
 @experimental_class
 class FunctionChoiceBehavior(KernelBaseModel):
     """Class that controls function choice behavior.
@@ -75,31 +62,6 @@ class FunctionChoiceBehavior(KernelBaseModel):
     ) = None
     type_: FunctionChoiceType | None = None
 
-    @classmethod
-    @deprecated("The `FunctionCallBehavior` class is deprecated; use `FunctionChoiceBehavior` instead.")
-    def from_function_call_behavior(cls: type[_T], behavior: "FunctionCallBehavior") -> _T:
-        """Create a FunctionChoiceBehavior from a FunctionCallBehavior."""
-        from semantic_kernel.connectors.ai.function_call_behavior import (
-            EnabledFunctions,
-            KernelFunctions,
-            RequiredFunction,
-        )
-
-        if isinstance(behavior, (EnabledFunctions, KernelFunctions)):
-            return cls.Auto(
-                auto_invoke=behavior.auto_invoke_kernel_functions,
-                filters=behavior.filters if hasattr(behavior, "filters") else None,
-            )
-        if isinstance(behavior, (RequiredFunction)):
-            return cls.Required(
-                auto_invoke=behavior.auto_invoke_kernel_functions,
-                filters={"included_functions": [behavior.function_fully_qualified_name]},
-            )
-        return cls(
-            enable_kernel_functions=behavior.enable_kernel_functions,
-            maximum_auto_invoke_attempts=behavior.max_auto_invoke_attempts,
-        )
-
     @property
     def auto_invoke_kernel_functions(self):
         """Return True if auto_invoke_kernel_functions is enabled."""
@@ -218,6 +180,8 @@ def Required(
     @classmethod
     def from_dict(cls: type[_T], data: dict) -> _T:
         """Create a FunctionChoiceBehavior from a dictionary."""
+        from semantic_kernel.connectors.ai.function_calling_utils import _combine_filter_dicts
+
         type_map = {
             "auto": cls.Auto,
             "none": cls.NoneInvoke,
diff --git a/python/semantic_kernel/connectors/ai/function_choice_type.py b/python/semantic_kernel/connectors/ai/function_choice_type.py
new file mode 100644
index 000000000000..d4bc2b3a598f
--- /dev/null
+++ b/python/semantic_kernel/connectors/ai/function_choice_type.py
@@ -0,0 +1,14 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from enum import Enum
+
+from semantic_kernel.utils.experimental_decorator import experimental_class
+
+
+@experimental_class
+class FunctionChoiceType(Enum):
+    """The type of function choice behavior."""
+
+    AUTO = "auto"
+    NONE = "none"
+    REQUIRED = "required"
diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py
index df8f64cf4c6c..b7005c3c1f5d 100644
--- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py
@@ -5,6 +5,11 @@
 from collections.abc import AsyncGenerator, Callable
 from typing import TYPE_CHECKING, Any, ClassVar
 
+if sys.version_info >= (3, 12):
+    from typing import override  # pragma: no cover
+else:
+    from typing_extensions import override  # pragma: no cover
+
 import google.generativeai as genai
 from google.generativeai import GenerativeModel
 from google.generativeai.protos import Candidate, Content
@@ -13,7 +18,6 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.google.google_ai.google_ai_prompt_execution_settings import (
     GoogleAIChatPromptExecutionSettings,
@@ -50,13 +54,9 @@
 )
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
-if sys.version_info >= (3, 12):
-    from typing import override  # pragma: no cover
-else:
-    from typing_extensions import override  # pragma: no cover
-
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -186,7 +186,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_choice_configuration
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py
index 6086d0167694..77c88526eedb 100644
--- a/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py
+++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py
@@ -2,11 +2,10 @@
 
 import json
 import logging
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from google.generativeai.protos import Blob, Candidate, FunctionCall, FunctionResponse, Part
 
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.google.google_ai.google_ai_prompt_execution_settings import (
     GoogleAIChatPromptExecutionSettings,
@@ -15,7 +14,6 @@
     FUNCTION_CHOICE_TYPE_TO_GOOGLE_FUNCTION_CALLING_MODE,
     GEMINI_FUNCTION_NAME_SEPARATOR,
 )
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
@@ -25,6 +23,10 @@
 from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError
 from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -148,8 +150,8 @@ def kernel_function_metadata_to_google_ai_function_call_format(metadata: KernelF
 
 
 def update_settings_from_function_choice_configuration(
-    function_choice_configuration: FunctionCallChoiceConfiguration,
-    settings: PromptExecutionSettings,
+    function_choice_configuration: "FunctionCallChoiceConfiguration",
+    settings: "PromptExecutionSettings",
     type: FunctionChoiceType,
 ) -> None:
     """Update the settings from a FunctionChoiceConfiguration."""
diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py
index 18f5b2feb6ca..f3211066d466 100644
--- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py
+++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py
@@ -2,12 +2,12 @@
 
 import json
 import logging
+from typing import TYPE_CHECKING
 
 from google.cloud.aiplatform_v1beta1.types.content import Blob, Candidate, Part
 from google.cloud.aiplatform_v1beta1.types.tool import FunctionCall, FunctionResponse
 from vertexai.generative_models import FunctionDeclaration, Tool, ToolConfig
 
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.google.shared_utils import (
     FUNCTION_CHOICE_TYPE_TO_GOOGLE_FUNCTION_CALLING_MODE,
@@ -16,7 +16,6 @@
 from semantic_kernel.connectors.ai.google.vertex_ai.vertex_ai_prompt_execution_settings import (
     VertexAIChatPromptExecutionSettings,
 )
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
@@ -26,6 +25,10 @@
 from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError
 from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -149,8 +152,8 @@ def kernel_function_metadata_to_vertex_ai_function_call_format(metadata: KernelF
 
 
 def update_settings_from_function_choice_configuration(
-    function_choice_configuration: FunctionCallChoiceConfiguration,
-    settings: PromptExecutionSettings,
+    function_choice_configuration: "FunctionCallChoiceConfiguration",
+    settings: "PromptExecutionSettings",
     type: FunctionChoiceType,
 ) -> None:
     """Update the settings from a FunctionChoiceConfiguration."""
diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py
index 6372c71c5b1c..bd7c1346accf 100644
--- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py
@@ -2,7 +2,12 @@
 
 import sys
 from collections.abc import AsyncGenerator, AsyncIterable, Callable
-from typing import Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar
+
+if sys.version_info >= (3, 12):
+    from typing import override  # pragma: no cover
+else:
+    from typing_extensions import override  # pragma: no cover
 
 import vertexai
 from google.cloud.aiplatform_v1beta1.types.content import Content
@@ -11,7 +16,6 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.google.shared_utils import (
     filter_system_message,
@@ -29,7 +33,6 @@
     VertexAIChatPromptExecutionSettings,
 )
 from semantic_kernel.connectors.ai.google.vertex_ai.vertex_ai_settings import VertexAISettings
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ITEM_TYPES, ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -48,10 +51,9 @@
     trace_streaming_chat_completion,
 )
 
-if sys.version_info >= (3, 12):
-    from typing import override  # pragma: no cover
-else:
-    from typing_extensions import override  # pragma: no cover
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
 
 class VertexAIChatCompletion(VertexAIBase, ChatCompletionClientBase):
@@ -181,7 +183,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_choice_configuration
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py
index b374235225a4..2405897a6c39 100644
--- a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py
@@ -3,7 +3,7 @@
 import logging
 import sys
 from collections.abc import AsyncGenerator, Callable
-from typing import Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
@@ -24,7 +24,6 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.mistral_ai.prompt_execution_settings.mistral_ai_prompt_execution_settings import (
@@ -32,7 +31,6 @@
 )
 from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_base import MistralAIBase
 from semantic_kernel.connectors.ai.mistral_ai.settings.mistral_ai_settings import MistralAISettings
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents import (
     ChatMessageContent,
     FunctionCallContent,
@@ -50,6 +48,10 @@
     trace_streaming_chat_completion,
 )
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -315,7 +317,7 @@ def update_settings_from_function_call_configuration_mistral(
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return self.update_settings_from_function_call_configuration_mistral
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py
index baf2d04f2914..103133af2c9f 100644
--- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py
@@ -17,7 +17,6 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import OllamaChatPromptExecutionSettings
 from semantic_kernel.connectors.ai.ollama.ollama_settings import OllamaSettings
@@ -45,6 +44,7 @@
 )
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
 CMC_TYPE = TypeVar("CMC_TYPE", bound=ChatMessageContent)
@@ -137,7 +137,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_choice_configuration
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/ollama/services/utils.py b/python/semantic_kernel/connectors/ai/ollama/services/utils.py
index 7cf0e6e225cb..9745a4063484 100644
--- a/python/semantic_kernel/connectors/ai/ollama/services/utils.py
+++ b/python/semantic_kernel/connectors/ai/ollama/services/utils.py
@@ -2,19 +2,22 @@
 
 import json
 from collections.abc import Callable, Mapping
+from typing import TYPE_CHECKING
 
 from ollama._types import Message
 
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
 from semantic_kernel.contents.image_content import ImageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
 
 def _format_system_message(message: ChatMessageContent) -> Message:
     """Format a system message to the expected object for the client.
@@ -110,8 +113,8 @@ def _format_tool_message(message: ChatMessageContent) -> Message:
 
 
 def update_settings_from_function_choice_configuration(
-    function_choice_configuration: FunctionCallChoiceConfiguration,
-    settings: PromptExecutionSettings,
+    function_choice_configuration: "FunctionCallChoiceConfiguration",
+    settings: "PromptExecutionSettings",
     type: FunctionChoiceType,
 ) -> None:
     """Update the settings from a FunctionChoiceConfiguration.
diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
index 425639d6a291..d2b37d44bb40 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
@@ -1,17 +1,10 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
-import sys
 from typing import Annotated, Any, Literal
 
-if sys.version_info >= (3, 11):
-    from typing import Self  # pragma: no cover
-else:
-    from typing_extensions import Self  # pragma: no cover
-
 from pydantic import BaseModel, Field, field_validator, model_validator
 
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError
 
@@ -73,7 +66,6 @@ class OpenAIChatPromptExecutionSettings(OpenAIPromptExecutionSettings):
     messages: Annotated[
         list[dict[str, Any]] | None, Field(description="Do not set this manually. It is set by the service.")
     ] = None
-    function_call_behavior: Annotated[FunctionCallBehavior | None, Field(exclude=True)] = None
     parallel_tool_calls: bool | None = True
     tools: Annotated[
         list[dict[str, Any]] | None,
@@ -153,32 +145,6 @@ def validate_response_format_and_set_flag(cls, values: Any) -> Any:
 
         return values
 
-    @model_validator(mode="before")
-    @classmethod
-    def validate_function_calling_behaviors(cls, data: Any) -> Any:
-        """Check if function_call_behavior is set and if so, move to use function_choice_behavior instead."""
-        # In an attempt to phase out the use of `function_call_behavior` in favor of `function_choice_behavior`,
-        # we are syncing the `function_call_behavior` with `function_choice_behavior` if the former is set.
-        # This allows us to make decisions off of `function_choice_behavior`. Anytime the `function_call_behavior`
-        # is updated, this validation will run to ensure the `function_choice_behavior` stays in sync.
-        from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
-
-        if isinstance(data, dict) and "function_call_behavior" in data.get("extension_data", {}):
-            data["function_choice_behavior"] = FunctionChoiceBehavior.from_function_call_behavior(
-                data.get("extension_data", {}).get("function_call_behavior")
-            )
-        return data
-
-    @field_validator("function_call_behavior", mode="after")
-    @classmethod
-    def check_for_function_call_behavior(cls, v) -> Self:
-        """Check if function_choice_behavior is set, if not, set it to default."""
-        if v is not None:
-            logger.warning(
-                "The `function_call_behavior` parameter is deprecated. Please use the `function_choice_behavior` parameter instead."  # noqa: E501
-            )
-        return v
-
 
 class OpenAIEmbeddingPromptExecutionSettings(PromptExecutionSettings):
     """Specific settings for the text embedding endpoint."""
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
index 03289fd45d58..2549f9027961 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
@@ -54,6 +54,7 @@ def __init__(
         async_client: AsyncAzureOpenAI | None = None,
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
+        instruction_role: str | None = None,
     ) -> None:
         """Initialize an AzureChatCompletion service.
 
@@ -77,6 +78,8 @@ def __init__(
             async_client (AsyncAzureOpenAI | None): An existing client to use. (Optional)
             env_file_path (str | None): Use the environment settings file as a fallback to using env vars.
             env_file_encoding (str | None): The encoding of the environment settings file, defaults to 'utf-8'.
+            instruction_role (str | None): The role to use for 'instruction' messages, for example, summarization
+                prompts could use `developer` or `system`. (Optional)
         """
         try:
             azure_openai_settings = AzureOpenAISettings.create(
@@ -108,6 +111,7 @@ def __init__(
             default_headers=default_headers,
             ai_model_type=OpenAIModelTypes.CHAT,
             client=async_client,
+            instruction_role=instruction_role,
         )
 
     @classmethod
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py
index 93662af62579..da50e4ee56b6 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py
@@ -36,6 +36,7 @@ def __init__(
         token_endpoint: str | None = None,
         default_headers: Mapping[str, str] | None = None,
         client: AsyncAzureOpenAI | None = None,
+        instruction_role: str | None = None,
     ) -> None:
         """Internal class for configuring a connection to an Azure OpenAI service.
 
@@ -56,6 +57,8 @@ def __init__(
             token_endpoint (str): Azure AD token endpoint use to get the token. (Optional)
             default_headers (Union[Mapping[str, str], None]): Default headers for HTTP requests. (Optional)
             client (AsyncAzureOpenAI): An existing client to use. (Optional)
+            instruction_role (str | None): The role to use for 'instruction' messages, for example, summarization
+                prompts could use `developer` or `system`. (Optional)
 
         """
         # Merge APP_INFO into the headers if it exists
@@ -95,6 +98,8 @@ def __init__(
         }
         if service_id:
             args["service_id"] = service_id
+        if instruction_role:
+            args["instruction_role"] = instruction_role
         super().__init__(**args)
 
     def to_dict(self) -> dict[str, str]:
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py
index c643f11859a7..6d59561377ba 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py
@@ -30,6 +30,7 @@ def __init__(
         async_client: AsyncOpenAI | None = None,
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
+        instruction_role: str | None = None,
     ) -> None:
         """Initialize an OpenAIChatCompletion service.
 
@@ -47,6 +48,7 @@ def __init__(
             env_file_path (str | None): Use the environment settings file as a fallback
                 to environment variables. (Optional)
             env_file_encoding (str | None): The encoding of the environment settings file. (Optional)
+            instruction_role (str | None): The role to use for 'instruction' messages, for example,
         """
         try:
             openai_settings = OpenAISettings.create(
@@ -72,6 +74,7 @@ def __init__(
             ai_model_type=OpenAIModelTypes.CHAT,
             default_headers=default_headers,
             client=async_client,
+            instruction_role=instruction_role,
         )
 
     @classmethod
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
index 0c1e843c5d47..605b78812ae5 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
@@ -19,17 +19,16 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIChatPromptExecutionSettings,
 )
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIHandler
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+from semantic_kernel.contents.annotation_content import AnnotationContent
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.file_reference_content import FileReferenceContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.streaming_text_content import StreamingTextContent
@@ -46,6 +45,7 @@
 )
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
     from semantic_kernel.functions.kernel_arguments import KernelArguments
     from semantic_kernel.kernel import Kernel
@@ -150,7 +150,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_call_configuration
 
     @override
@@ -267,6 +267,41 @@ def _get_function_call_from_chat_choice(self, choice: Choice | ChunkChoice) -> l
         # When you enable asynchronous content filtering in Azure OpenAI, you may receive empty deltas
         return []
 
+    def _prepare_chat_history_for_request(
+        self,
+        chat_history: "ChatHistory",
+        role_key: str = "role",
+        content_key: str = "content",
+    ) -> Any:
+        """Prepare the chat history for a request.
+
+        Allowing customization of the key names for role/author, and optionally overriding the role.
+
+        ChatRole.TOOL messages need to be formatted different than system/user/assistant messages:
+            They require a "tool_call_id" and (function) "name" key, and the "metadata" key should
+            be removed. The "encoding" key should also be removed.
+
+        Override this method to customize the formatting of the chat history for a request.
+
+        Args:
+            chat_history (ChatHistory): The chat history to prepare.
+            role_key (str): The key name for the role/author.
+            content_key (str): The key name for the content/message.
+
+        Returns:
+            prepared_chat_history (Any): The prepared chat history for a request.
+        """
+        return [
+            {
+                **message.to_dict(role_key=role_key, content_key=content_key),
+                role_key: "developer"
+                if self.instruction_role == "developer" and message.to_dict(role_key=role_key)[role_key] == "system"
+                else message.to_dict(role_key=role_key)[role_key],
+            }
+            for message in chat_history.messages
+            if not isinstance(message, (AnnotationContent, FileReferenceContent))
+        ]
+
     # endregion
 
     # region function calling
@@ -279,15 +314,9 @@ async def _process_function_call(
         arguments: "KernelArguments | None",
         function_call_count: int,
         request_index: int,
-        function_call_behavior: FunctionChoiceBehavior | FunctionCallBehavior,
+        function_call_behavior: FunctionChoiceBehavior,
     ) -> "AutoFunctionInvocationContext | None":
         """Processes the tool calls in the result and update the chat history."""
-        # deprecated and might not even be used anymore, hard to trigger directly
-        if isinstance(function_call_behavior, FunctionCallBehavior):  # pragma: no cover
-            # We need to still support a `FunctionCallBehavior` input so it doesn't break current
-            # customers. Map from `FunctionCallBehavior` -> `FunctionChoiceBehavior`
-            function_call_behavior = FunctionChoiceBehavior.from_function_call_behavior(function_call_behavior)
-
         return await kernel.invoke_function_call(
             function_call=function_call,
             chat_history=chat_history,
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py
index 7ead64865445..d3d72795665b 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py
@@ -29,6 +29,7 @@ def __init__(
         service_id: str | None = None,
         default_headers: Mapping[str, str] | None = None,
         client: AsyncOpenAI | None = None,
+        instruction_role: str | None = None,
     ) -> None:
         """Initialize a client for OpenAI services.
 
@@ -48,6 +49,8 @@ def __init__(
             default_headers (Mapping[str, str]): Default headers
                 for HTTP requests. (Optional)
             client (AsyncOpenAI): An existing OpenAI client, optional.
+            instruction_role (str): The role to use for 'instruction'
+                messages, for example, summarization prompts could use `developer` or `system`. (Optional)
 
         """
         # Merge APP_INFO into the headers if it exists
@@ -71,6 +74,8 @@ def __init__(
         }
         if service_id:
             args["service_id"] = service_id
+        if instruction_role:
+            args["instruction_role"] = instruction_role
         super().__init__(**args)
 
     def to_dict(self) -> dict[str, str]:
diff --git a/python/semantic_kernel/contents/__init__.py b/python/semantic_kernel/contents/__init__.py
index 352a5915cc68..c326115ccd86 100644
--- a/python/semantic_kernel/contents/__init__.py
+++ b/python/semantic_kernel/contents/__init__.py
@@ -6,6 +6,8 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ChatHistorySummarizationReducer
+from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
 from semantic_kernel.contents.image_content import ImageContent
 from semantic_kernel.contents.streaming_annotation_content import StreamingAnnotationContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
@@ -20,6 +22,8 @@
     "AudioContent",
     "AuthorRole",
     "ChatHistory",
+    "ChatHistorySummarizationReducer",
+    "ChatHistoryTruncationReducer",
     "ChatMessageContent",
     "FinishReason",
     "FunctionCallContent",
diff --git a/python/semantic_kernel/contents/function_call_content.py b/python/semantic_kernel/contents/function_call_content.py
index 08b9c9e19757..7067311f4c8a 100644
--- a/python/semantic_kernel/contents/function_call_content.py
+++ b/python/semantic_kernel/contents/function_call_content.py
@@ -221,4 +221,13 @@ def to_dict(self) -> dict[str, str | Any]:
 
     def __hash__(self) -> int:
         """Return the hash of the function call content."""
-        return hash((self.tag, self.id, self.index, self.name, self.function_name, self.plugin_name, self.arguments))
+        args_hashable = frozenset(self.arguments.items()) if isinstance(self.arguments, Mapping) else None
+        return hash((
+            self.tag,
+            self.id,
+            self.index,
+            self.name,
+            self.function_name,
+            self.plugin_name,
+            args_hashable,
+        ))
diff --git a/python/semantic_kernel/contents/history_reducer/__init__.py b/python/semantic_kernel/contents/history_reducer/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py
new file mode 100644
index 000000000000..bc05c705ceda
--- /dev/null
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py
@@ -0,0 +1,31 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import sys
+from abc import ABC, abstractmethod
+
+if sys.version < "3.11":
+    from typing_extensions import Self  # pragma: no cover
+else:
+    from typing import Self  # type: ignore # pragma: no cover
+
+from pydantic import Field
+
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.utils.experimental_decorator import experimental_class
+
+
+@experimental_class
+class ChatHistoryReducer(ChatHistory, ABC):
+    """Defines a contract for reducing chat history."""
+
+    target_count: int = Field(..., gt=0, description="Target message count.")
+    threshold_count: int = Field(0, ge=0, description="Threshold count to avoid orphaning messages.")
+
+    @abstractmethod
+    async def reduce(self) -> Self | None:
+        """Reduce the chat history in some way (e.g., truncate, summarize).
+
+        Returns:
+            A possibly shorter list of messages, or None if no change is needed.
+        """
+        ...
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py b/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py
new file mode 100644
index 000000000000..6742c0b56816
--- /dev/null
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py
@@ -0,0 +1,211 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import logging
+from collections.abc import Callable
+
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.function_call_content import FunctionCallContent
+from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
+from semantic_kernel.utils.experimental_decorator import experimental_function
+
+logger = logging.getLogger(__name__)
+
+
+SUMMARY_METADATA_KEY = "__summary__"
+
+
+@experimental_function
+def get_call_result_pairs(history: list[ChatMessageContent]) -> list[tuple[int, int]]:
+    """Identify all (FunctionCallContent, FunctionResultContent) pairs in the history.
+
+    Return a list of (call_index, result_index) pairs for safe referencing.
+    """
+    pairs: list[tuple[int, int]] = []  # Correct type: list of tuples with integers
+    call_ids_seen: dict[str, int] = {}  # Map call IDs (str) to their indices (int)
+
+    # Gather all function-call IDs and their indices.
+    for i, msg in enumerate(history):
+        for item in msg.items:
+            if isinstance(item, FunctionCallContent) and item.id is not None:
+                call_ids_seen[item.id] = i
+
+    # Now, match each FunctionResultContent to the earliest call ID with the same ID.
+    for j, msg in enumerate(history):
+        for item in msg.items:
+            if isinstance(item, FunctionResultContent) and item.id is not None:
+                call_id = item.id
+                if call_id in call_ids_seen:
+                    call_index = call_ids_seen[call_id]
+                    pairs.append((call_index, j))
+                    # Remove the call ID so we don't match it a second time
+                    del call_ids_seen[call_id]
+                    break
+
+    return pairs
+
+
+@experimental_function
+def locate_summarization_boundary(history: list[ChatMessageContent]) -> int:
+    """Identify the index of the first message that is not a summary message.
+
+    This is indicated by the presence of the SUMMARY_METADATA_KEY in the message metadata.
+
+    Returns:
+        The insertion point index for normal history messages (i.e., after all summary messages).
+    """
+    for idx, msg in enumerate(history):
+        if not msg.metadata or SUMMARY_METADATA_KEY not in msg.metadata:
+            return idx
+    return len(history)
+
+
+@experimental_function
+def locate_safe_reduction_index(
+    history: list[ChatMessageContent],
+    target_count: int,
+    threshold_count: int = 0,
+    offset_count: int = 0,
+) -> int | None:
+    """Identify the index of the first message at or beyond the specified target_count.
+
+    This index does not orphan sensitive content (function calls/results).
+
+    This method ensures that the presence of a function-call always follows with its result,
+    so the function-call and its function-result are never separated.
+
+    In addition, it attempts to locate a user message within the threshold window so that
+    context with the subsequent assistant response is preserved.
+
+    Args:
+        history: The entire chat history.
+        target_count: The desired message count after reduction.
+        threshold_count: The threshold beyond target_count required to trigger reduction.
+                         If total messages <= (target_count + threshold_count), no reduction occurs.
+        offset_count: Optional number of messages to skip at the start (e.g. existing summary messages).
+
+    Returns:
+        The index that identifies the starting point for a reduced history that does not orphan
+        sensitive content. Returns None if reduction is not needed.
+    """
+    total_count = len(history)
+    threshold_index = total_count - (threshold_count or 0) - target_count
+    if threshold_index <= offset_count:
+        return None
+
+    message_index = total_count - target_count
+
+    # Move backward to avoid cutting function calls / results
+    while message_index >= offset_count:
+        if not any(
+            isinstance(item, (FunctionCallContent, FunctionResultContent)) for item in history[message_index].items
+        ):
+            break
+        message_index -= 1
+
+    # This is our initial target truncation index
+    target_index = message_index
+
+    # Attempt to see if there's a user message in the threshold window
+    while message_index >= threshold_index:
+        if history[message_index].role == AuthorRole.USER:
+            return message_index
+        message_index -= 1
+
+    return target_index
+
+
+@experimental_function
+def extract_range(
+    history: list[ChatMessageContent],
+    start: int,
+    end: int | None = None,
+    filter_func: Callable[[ChatMessageContent], bool] | None = None,
+    preserve_pairs: bool = False,
+) -> list[ChatMessageContent]:
+    """Extract a range of messages from the source history, skipping any message for which we do not want to keep.
+
+    For example, function calls/results, if desired.
+
+    Args:
+        history: The source history.
+        start: The index of the first message to extract (inclusive).
+        end: The index of the last message to extract (exclusive). If None, extracts through end.
+        filter_func: A function that takes a ChatMessageContent and returns True if the message should
+                        be skipped, False otherwise.
+        preserve_pairs: If True, ensures that function call and result pairs are either both kept or both skipped.
+
+    Returns:
+        A list of extracted messages.
+    """
+    if end is None:
+        end = len(history)
+
+    sliced = list(range(start, end))
+
+    # If we need to preserve call->result pairs, gather them
+    pair_map = {}
+    if preserve_pairs:
+        pairs = get_call_result_pairs(history)
+        # store in a dict for quick membership checking
+        # call_idx -> result_idx, and also result_idx -> call_idx
+        for cidx, ridx in pairs:
+            pair_map[cidx] = ridx
+            pair_map[ridx] = cidx
+
+    extracted: list[ChatMessageContent] = []
+    i = 0
+    while i < len(sliced):
+        idx = sliced[i]
+        msg = history[idx]
+
+        # If filter_func excludes it, skip it
+        if filter_func and filter_func(msg):
+            i += 1
+            continue
+
+        # If preserve_pairs is on, and there's a paired index, skip or include them both
+        if preserve_pairs and idx in pair_map:
+            paired_idx = pair_map[idx]
+            # If the pair is within [start, end), we must keep or skip them together
+            if start <= paired_idx < end:
+                # Check if the pair or itself fails filter_func
+                if filter_func and (filter_func(history[paired_idx]) or filter_func(msg)):
+                    # skip both
+                    i += 1
+                    # Also skip the paired index if it's in our current slice
+                    if paired_idx in sliced:
+                        # remove it from the slice so we don't process it again
+                        sliced.remove(paired_idx)
+                    continue
+                # keep both
+                extracted.append(msg)
+                if paired_idx > idx:
+                    # We'll skip the pair in the normal iteration by removing from slice
+                    # but add it to extracted right now
+                    extracted.append(history[paired_idx])
+                    if paired_idx in sliced:
+                        sliced.remove(paired_idx)
+                else:
+                    # if paired_idx < idx, it might appear later, so skip for now
+                    # but we may have already processed it if i was the 2nd item
+                    # either way, do not add duplicates
+                    pass
+                i += 1
+                continue
+            # If the paired_idx is outside [start, end), there's no conflict
+            # so we can just do normal logic
+            extracted.append(msg)
+            i += 1
+        else:
+            # keep it if filter_func not triggered
+            extracted.append(msg)
+            i += 1
+
+    return extracted
+
+
+@experimental_function
+def contains_function_call_or_result(msg: ChatMessageContent) -> bool:
+    """Return True if the message has any function call or function result."""
+    return any(isinstance(item, (FunctionCallContent, FunctionResultContent)) for item in msg.items)
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py
new file mode 100644
index 000000000000..1feaf1a839ad
--- /dev/null
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py
@@ -0,0 +1,226 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import logging
+import sys
+from typing import Any
+
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+from semantic_kernel.utils.experimental_decorator import experimental_class
+
+if sys.version < "3.11":
+    from typing_extensions import Self  # pragma: no cover
+else:
+    from typing import Self  # type: ignore # pragma: no cover
+
+from pydantic import Field
+
+from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.const import DEFAULT_SERVICE_NAME
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
+from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import (
+    SUMMARY_METADATA_KEY,
+    contains_function_call_or_result,
+    extract_range,
+    locate_safe_reduction_index,
+    locate_summarization_boundary,
+)
+from semantic_kernel.exceptions.content_exceptions import ChatHistoryReducerException
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_SUMMARIZATION_PROMPT = """
+Provide a concise and complete summarization of the entire dialog that does not exceed 5 sentences.
+
+This summary must always:
+- Consider both user and assistant interactions
+- Maintain continuity for the purpose of further dialog
+- Include details from any existing summary
+- Focus on the most significant aspects of the dialog
+
+This summary must never:
+- Critique, correct, interpret, presume, or assume
+- Identify faults, mistakes, misunderstanding, or correctness
+- Analyze what has not occurred
+- Exclude details from any existing summary
+"""
+
+
+@experimental_class
+class ChatHistorySummarizationReducer(ChatHistoryReducer):
+    """A ChatHistory with logic to summarize older messages past a target count."""
+
+    service: ChatCompletionClientBase
+    summarization_instructions: str = Field(
+        default_factory=lambda: DEFAULT_SUMMARIZATION_PROMPT,
+        description="The summarization instructions.",
+    )
+    use_single_summary: bool = Field(True, description="Whether to use a single summary message.")
+    fail_on_error: bool = Field(True, description="Raise error if summarization fails.")
+    service_id: str = Field(
+        default_factory=lambda: DEFAULT_SERVICE_NAME, description="The ID of the chat completion service."
+    )
+    include_function_content_in_summary: bool = Field(
+        False, description="Whether to include function calls/results in the summary."
+    )
+    execution_settings: PromptExecutionSettings | None = None
+
+    def __init__(
+        self,
+        service: ChatCompletionClientBase,
+        target_count: int,
+        service_id: str | None = None,
+        threshold_count: int | None = None,
+        summarization_instructions: str | None = None,
+        use_single_summary: bool | None = None,
+        fail_on_error: bool | None = None,
+        include_function_content_in_summary: bool | None = None,
+        execution_settings: PromptExecutionSettings | None = None,
+        **kwargs: Any,
+    ):
+        """Initialize the ChatHistorySummarizationReducer.
+
+        Args:
+            service (ChatCompletionClientBase): The chat completion service.
+            target_count (int): The target number of messages to retain after applying summarization.
+            service_id (str | None): The ID of the chat completion service.
+            threshold_count (int | None): The threshold beyond target_count required to trigger reduction.
+            summarization_instructions (str | None): The summarization instructions.
+            use_single_summary (bool | None): Whether to use a single summary message.
+            fail_on_error (bool | None): Raise error if summarization fails.
+            include_function_content_in_summary (bool | None): Whether to include function calls/results in the summary.
+            execution_settings (PromptExecutionSettings | None): The prompt execution settings.
+            **kwargs (Any): Additional keyword arguments.
+        """
+        args: dict[str, Any] = {
+            "service": service,
+            "target_count": target_count,
+        }
+        if service_id is not None:
+            args["service_id"] = service_id
+        if threshold_count is not None:
+            args["threshold_count"] = threshold_count
+        if summarization_instructions is not None:
+            args["summarization_instructions"] = summarization_instructions
+        if use_single_summary is not None:
+            args["use_single_summary"] = use_single_summary
+        if fail_on_error is not None:
+            args["fail_on_error"] = fail_on_error
+        if include_function_content_in_summary is not None:
+            args["include_function_content_in_summary"] = include_function_content_in_summary
+        if execution_settings is not None:
+            args["execution_settings"] = execution_settings
+
+        super().__init__(**args, **kwargs)
+
+    async def reduce(self) -> Self | None:
+        """Summarize the older messages past the target message count."""
+        history = self.messages
+        if len(history) <= self.target_count + (self.threshold_count or 0):
+            return None  # No summarization needed
+
+        logger.info("Performing chat history summarization check...")
+
+        # 1. Identify where existing summary messages end
+        insertion_point = locate_summarization_boundary(history)
+        if insertion_point == len(history):
+            # fallback fix: force boundary to something reasonable
+            logger.warning("All messages are summaries, forcing boundary to 0.")
+            insertion_point = 0
+
+        # 2. Locate the safe reduction index
+        truncation_index = locate_safe_reduction_index(
+            history,
+            self.target_count,
+            self.threshold_count,
+            offset_count=insertion_point,
+        )
+        if truncation_index is None:
+            logger.info("No valid truncation index found.")
+            return None
+
+        # 3. Extract only the chunk of messages that need summarizing
+        #    If include_function_content_in_summary=False, skip function calls/results
+        #    Otherwise, keep them but never split pairs.
+        messages_to_summarize = extract_range(
+            history,
+            start=0 if self.use_single_summary else insertion_point,
+            end=truncation_index,
+            filter_func=(contains_function_call_or_result if not self.include_function_content_in_summary else None),
+            preserve_pairs=self.include_function_content_in_summary,
+        )
+
+        if not messages_to_summarize:
+            logger.info("No messages to summarize.")
+            return None
+
+        try:
+            # 4. Summarize the extracted messages
+            summary_msg = await self._summarize(messages_to_summarize)
+            logger.info("Chat History Summarization completed.")
+            if not summary_msg:
+                return None
+
+            # Mark the newly-created summary with metadata
+            summary_msg.metadata[SUMMARY_METADATA_KEY] = True
+
+            # 5. Reassemble the new history
+            keep_existing_summaries = []
+            if insertion_point > 0 and not self.use_single_summary:
+                keep_existing_summaries = history[:insertion_point]
+
+            remainder = history[truncation_index:]
+            new_history = [*keep_existing_summaries, summary_msg, *remainder]
+            self.messages = new_history
+
+            return self
+
+        except Exception as ex:
+            logger.warning("Summarization failed, continuing without summary.")
+            if self.fail_on_error:
+                raise ChatHistoryReducerException("Chat History Summarization failed.") from ex
+            return None
+
+    async def _summarize(self, messages: list[ChatMessageContent]) -> ChatMessageContent | None:
+        """Use the ChatCompletion service to generate a single summary message."""
+        from semantic_kernel.contents.utils.author_role import AuthorRole
+
+        chat_history = ChatHistory(messages=messages)
+
+        role = (
+            getattr(self.execution_settings, "instruction_role", AuthorRole.SYSTEM)
+            if self.execution_settings
+            else AuthorRole.SYSTEM
+        )
+
+        chat_history.add_message(ChatMessageContent(role=role, content=self.summarization_instructions))
+
+        execution_settings = self.execution_settings or self.service.get_prompt_execution_settings_class()(
+            service_id=self.service_id
+        )
+
+        return await self.service.get_chat_message_content(chat_history=chat_history, settings=execution_settings)
+
+    def __eq__(self, other: object) -> bool:
+        """Check if two ChatHistorySummarizationReducer objects are equal."""
+        if not isinstance(other, ChatHistorySummarizationReducer):
+            return False
+        return (
+            self.threshold_count == other.threshold_count
+            and self.target_count == other.target_count
+            and self.use_single_summary == other.use_single_summary
+            and self.summarization_instructions == other.summarization_instructions
+        )
+
+    def __hash__(self) -> int:
+        """Hash the object based on its properties."""
+        return hash((
+            self.__class__.__name__,
+            self.threshold_count,
+            self.target_count,
+            self.summarization_instructions,
+            self.use_single_summary,
+            self.fail_on_error,
+            self.include_function_content_in_summary,
+        ))
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py
new file mode 100644
index 000000000000..4faf28876748
--- /dev/null
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py
@@ -0,0 +1,83 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import logging
+import sys
+from typing import Any
+
+from semantic_kernel.utils.experimental_decorator import experimental_class
+
+if sys.version < "3.11":
+    from typing_extensions import Self  # pragma: no cover
+else:
+    from typing import Self  # type: ignore # pragma: no cover
+
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
+from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import (
+    extract_range,
+    locate_safe_reduction_index,
+)
+
+logger = logging.getLogger(__name__)
+
+
+@experimental_class
+class ChatHistoryTruncationReducer(ChatHistoryReducer):
+    """A ChatHistory that supports truncation logic.
+
+    Because this class inherits from ChatHistoryReducer (which in turn inherits from ChatHistory),
+    it can also be used anywhere a ChatHistory is expected, while adding truncation capability.
+    """
+
+    def __init__(self, target_count: int, threshold_count: int | None = None, **kwargs: Any):
+        """Initialize the truncation reducer."""
+        args: dict[str, Any] = {
+            "target_count": target_count,
+        }
+        if threshold_count is not None:
+            args["threshold_count"] = threshold_count
+        super().__init__(**args, **kwargs)
+
+    async def reduce(self) -> Self | None:
+        """Truncate the chat history to the target message count, avoiding orphaned calls.
+
+        Returns:
+            The truncated list of messages if truncation occurred, or None otherwise.
+        """
+        history = self.messages
+        if len(history) <= self.target_count + (self.threshold_count or 0):
+            # No need to reduce
+            return None
+
+        logger.info("Performing chat history truncation check...")
+
+        truncation_index = locate_safe_reduction_index(history, self.target_count, self.threshold_count)
+        if truncation_index is None:
+            logger.info(
+                f"No truncation index found. Target count: {self.target_count}, Threshold: {self.threshold_count}"
+            )
+            return None
+
+        logger.info(f"Truncating history to {truncation_index} messages.")
+        truncated_list = extract_range(history, start=truncation_index)
+        self.messages = truncated_list
+        return self
+
+    def __eq__(self, other: object) -> bool:
+        """Compare equality based on truncation settings.
+
+        (We don't factor in the actual ChatHistory messages themselves.)
+
+        Returns:
+            True if the other object is a ChatHistoryTruncationReducer with the same truncation settings.
+        """
+        if not isinstance(other, ChatHistoryTruncationReducer):
+            return False
+        return self.threshold_count == other.threshold_count and self.target_count == other.target_count
+
+    def __hash__(self) -> int:
+        """Return a hash code based on truncation settings.
+
+        Returns:
+            A hash code based on the truncation settings.
+        """
+        return hash((self.__class__.__name__, self.threshold_count, self.target_count))
diff --git a/python/semantic_kernel/exceptions/agent_exceptions.py b/python/semantic_kernel/exceptions/agent_exceptions.py
index 1c6b5bb897cf..0f13d6ddd368 100644
--- a/python/semantic_kernel/exceptions/agent_exceptions.py
+++ b/python/semantic_kernel/exceptions/agent_exceptions.py
@@ -38,3 +38,9 @@ class AgentChatException(AgentException):
     """An error occurred while invoking the agent chat."""
 
     pass
+
+
+class AgentChatHistoryReducerException(AgentException):
+    """An error occurred while reducing the chat history."""
+
+    pass
diff --git a/python/semantic_kernel/exceptions/content_exceptions.py b/python/semantic_kernel/exceptions/content_exceptions.py
index d9c3f5aa10c5..4ad619951f4f 100644
--- a/python/semantic_kernel/exceptions/content_exceptions.py
+++ b/python/semantic_kernel/exceptions/content_exceptions.py
@@ -39,7 +39,14 @@ class FunctionCallInvalidArgumentsException(ContentException):
     pass
 
 
+class ChatHistoryReducerException(ContentException):
+    """An error occurred while reducing chat history."""
+
+    pass
+
+
 __all__ = [
+    "ChatHistoryReducerException",
     "ContentAdditionException",
     "ContentException",
     "ContentInitializationError",
diff --git a/python/tests/unit/agents/test_chat_completion_agent.py b/python/tests/unit/agents/test_chat_completion_agent.py
index 191826aa23a9..01f9813acf83 100644
--- a/python/tests/unit/agents/test_chat_completion_agent.py
+++ b/python/tests/unit/agents/test_chat_completion_agent.py
@@ -206,7 +206,8 @@ def test_get_channel_keys():
     agent = ChatCompletionAgent()
     keys = agent.get_channel_keys()
 
-    assert keys == [ChatHistoryChannel.__name__]
+    for key in keys:
+        assert isinstance(key, str)
 
 
 async def test_create_channel():
diff --git a/python/tests/unit/agents/test_chat_history_channel.py b/python/tests/unit/agents/test_chat_history_channel.py
index acb563b9ca7c..4ba15f01a062 100644
--- a/python/tests/unit/agents/test_chat_history_channel.py
+++ b/python/tests/unit/agents/test_chat_history_channel.py
@@ -23,6 +23,9 @@ async def invoke_stream(self, history: list[ChatMessageContent]) -> AsyncIterabl
         for message in history:
             yield ChatMessageContent(role=AuthorRole.SYSTEM, content=f"Processed: {message.content}")
 
+    async def reduce_history(self, history: list[ChatMessageContent]) -> list[ChatMessageContent]:
+        return history
+
 
 class MockNonChatHistoryHandler:
     """Mock agent to test incorrect instance handling."""
diff --git a/python/tests/unit/agents/test_sequential_strategy_selection.py b/python/tests/unit/agents/test_sequential_strategy_selection.py
index 1a2db9d7409d..17754bd389fd 100644
--- a/python/tests/unit/agents/test_sequential_strategy_selection.py
+++ b/python/tests/unit/agents/test_sequential_strategy_selection.py
@@ -1,12 +1,13 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from unittest.mock import AsyncMock
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
 from semantic_kernel.agents.agent import Agent
 from semantic_kernel.agents.channels.agent_channel import AgentChannel
 from semantic_kernel.agents.strategies.selection.sequential_selection_strategy import SequentialSelectionStrategy
+from semantic_kernel.exceptions.agent_exceptions import AgentExecutionException
 
 
 class MockAgent(Agent):
@@ -78,13 +79,47 @@ async def test_sequential_selection_exceeds_length(agents):
     selected_agent = await strategy.next(agents, [])
 
     assert selected_agent.id == "agent-0"
+    assert strategy._index == 0
+
+    selected_agent = await strategy.next(agents, [])
+
+    assert selected_agent.id == "agent-1"
     assert strategy._index == 1
 
 
 async def test_sequential_selection_empty_agents():
     strategy = SequentialSelectionStrategy()
 
-    with pytest.raises(ValueError) as excinfo:
+    with pytest.raises(AgentExecutionException) as excinfo:
         await strategy.next([], [])
 
-    assert "No agents to select from" in str(excinfo.value)
+    assert "Agent Failure - No agents present to select." in str(excinfo.value)
+
+
+async def test_sequential_selection_avoid_selecting_same_agent_twice():
+    # Arrange
+    agent_0 = MagicMock(spec=Agent)
+    agent_0.id = "agent-0"
+    agent_0.name = "Agent0"
+
+    agent_1 = MagicMock(spec=Agent)
+    agent_1.id = "agent-1"
+    agent_1.name = "Agent1"
+
+    agents = [agent_0, agent_1]
+
+    strategy = SequentialSelectionStrategy()
+    # Simulate that we've already selected an agent once:
+    strategy.has_selected = True
+    # Set the initial agent to the first agent
+    strategy.initial_agent = agent_0
+    # Ensure the internal index is set to -1
+    strategy._index = -1
+
+    # Act
+    selected_agent = await strategy.next(agents, [])
+
+    # Assert
+    # According to the condition, we should skip selecting agent_0 again
+    assert selected_agent.id == "agent-1"
+    assert strategy._index == 1
diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
index 942322bf5153..05fa5773729a 100644
--- a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
+++ b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
@@ -27,11 +27,12 @@
 # region init
 def test_azure_ai_inference_chat_completion_init(azure_ai_inference_unit_test_env, model_id) -> None:
     """Test initialization of AzureAIInferenceChatCompletion"""
-    azure_ai_inference = AzureAIInferenceChatCompletion(model_id)
+    azure_ai_inference = AzureAIInferenceChatCompletion(model_id, instruction_role="developer")
 
     assert azure_ai_inference.ai_model_id == model_id
     assert azure_ai_inference.service_id == model_id
     assert isinstance(azure_ai_inference.client, ChatCompletionsClient)
+    assert azure_ai_inference.instruction_role == "developer"
 
 
 @patch("azure.ai.inference.aio.ChatCompletionsClient.__init__", return_value=None)
diff --git a/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py b/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
index 731f0b04d2d3..c512a38f1b10 100644
--- a/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
+++ b/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
@@ -17,7 +17,6 @@
 from openai.types.chat.chat_completion_message import ChatCompletionMessage
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
 from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
 from semantic_kernel.connectors.ai.open_ai.exceptions.content_filter_ai_exception import (
@@ -193,6 +192,30 @@ async def test_cmc(
     )
 
 
+@patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
+async def test_cmc_with_developer_instruction_role_propagates(
+    mock_create,
+    kernel: Kernel,
+    azure_openai_unit_test_env,
+    chat_history: ChatHistory,
+    mock_chat_completion_response: ChatCompletion,
+) -> None:
+    mock_create.return_value = mock_chat_completion_response
+    chat_history.add_user_message("hello world")
+    complete_prompt_execution_settings = AzureChatPromptExecutionSettings(service_id="test_service_id")
+
+    azure_chat_completion = AzureChatCompletion(instruction_role="developer")
+    await azure_chat_completion.get_chat_message_contents(
+        chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel
+    )
+    mock_create.assert_awaited_once_with(
+        model=azure_openai_unit_test_env["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
+        stream=False,
+        messages=azure_chat_completion._prepare_chat_history_for_request(chat_history),
+    )
+    assert azure_chat_completion.instruction_role == "developer"
+
+
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
 async def test_cmc_with_logit_bias(
     mock_create,
@@ -865,7 +888,7 @@ async def test_no_kernel_provided_throws_error(
     prompt = "some prompt that would trigger the content filtering"
     chat_history.add_user_message(prompt)
     complete_prompt_execution_settings = AzureChatPromptExecutionSettings(
-        function_call_behavior=FunctionCallBehavior.AutoInvokeKernelFunctions()
+        function_choice_behavior=FunctionChoiceBehavior.Auto()
     )
 
     test_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
@@ -891,7 +914,7 @@ async def test_auto_invoke_false_no_kernel_provided_throws_error(
     prompt = "some prompt that would trigger the content filtering"
     chat_history.add_user_message(prompt)
     complete_prompt_execution_settings = AzureChatPromptExecutionSettings(
-        function_call_behavior=FunctionCallBehavior.EnableFunctions(auto_invoke=False, filters={})
+        function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=False)
     )
 
     test_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
diff --git a/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py b/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py
index db432e4db8eb..c0b1000ae159 100644
--- a/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py
+++ b/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py
@@ -12,7 +12,6 @@
 from openai.types.chat.chat_completion_chunk import ChoiceDelta as ChunkChoiceDelta
 from openai.types.chat.chat_completion_message import ChatCompletionMessage
 
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIChatPromptExecutionSettings,
@@ -115,6 +114,30 @@ async def test_cmc_singular(
     )
 
 
+@patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
+async def test_cmc_singular_with_developer_instruction_propagates(
+    mock_create,
+    kernel: Kernel,
+    chat_history: ChatHistory,
+    mock_chat_completion_response: ChatCompletion,
+    openai_unit_test_env,
+):
+    mock_create.return_value = mock_chat_completion_response
+    chat_history.add_user_message("hello world")
+    complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(service_id="test_service_id")
+
+    openai_chat_completion = OpenAIChatCompletion(instruction_role="developer")
+    await openai_chat_completion.get_chat_message_content(
+        chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel
+    )
+    mock_create.assert_awaited_once_with(
+        model=openai_unit_test_env["OPENAI_CHAT_MODEL_ID"],
+        stream=False,
+        messages=openai_chat_completion._prepare_chat_history_for_request(chat_history),
+    )
+    assert openai_chat_completion.instruction_role == "developer"
+
+
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
 async def test_cmc_prompt_execution_settings(
     mock_create,
@@ -167,7 +190,7 @@ async def test_cmc_function_call_behavior(
     chat_history.add_user_message("hello world")
     orig_chat_history = deepcopy(chat_history)
     complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(
-        service_id="test_service_id", function_call_behavior=FunctionCallBehavior.AutoInvokeKernelFunctions()
+        service_id="test_service_id", function_choice_behavior=FunctionChoiceBehavior.Auto()
     )
     with patch(
         "semantic_kernel.kernel.Kernel.invoke_function_call",
@@ -673,7 +696,7 @@ async def test_scmc_function_call_behavior(
     chat_history.add_user_message("hello world")
     orig_chat_history = deepcopy(chat_history)
     complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(
-        service_id="test_service_id", function_call_behavior=FunctionCallBehavior.AutoInvokeKernelFunctions()
+        service_id="test_service_id", function_choice_behavior=FunctionChoiceBehavior.Auto()
     )
     with patch(
         "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._process_function_call",
diff --git a/python/tests/unit/connectors/ai/test_function_call_behavior.py b/python/tests/unit/connectors/ai/test_function_call_behavior.py
deleted file mode 100644
index f9e27d6ad85c..000000000000
--- a/python/tests/unit/connectors/ai/test_function_call_behavior.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-from typing import TYPE_CHECKING
-from unittest.mock import Mock
-
-import pytest
-
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
-
-if TYPE_CHECKING:
-    from semantic_kernel.kernel import Kernel
-
-
-@pytest.fixture
-def function_call_behavior():
-    return FunctionCallBehavior()
-
-
-@pytest.fixture
-def update_settings_callback():
-    mock = Mock()
-    mock.return_value = None
-    return mock
-
-
-def test_function_call_behavior():
-    fcb = FunctionCallBehavior()
-    assert fcb is not None
-    assert fcb.enable_kernel_functions is True
-    assert fcb.max_auto_invoke_attempts == 5
-    assert fcb.auto_invoke_kernel_functions is True
-
-
-def test_function_call_behavior_get_set(function_call_behavior: FunctionCallBehavior):
-    function_call_behavior.enable_kernel_functions = False
-    assert function_call_behavior.enable_kernel_functions is False
-    function_call_behavior.max_auto_invoke_attempts = 10
-    assert function_call_behavior.max_auto_invoke_attempts == 10
-    assert function_call_behavior.auto_invoke_kernel_functions is True
-    function_call_behavior.auto_invoke_kernel_functions = False
-    assert function_call_behavior.auto_invoke_kernel_functions is False
-    assert function_call_behavior.max_auto_invoke_attempts == 0
-    function_call_behavior.auto_invoke_kernel_functions = True
-    assert function_call_behavior.auto_invoke_kernel_functions is True
-    assert function_call_behavior.max_auto_invoke_attempts == 5
-
-
-def test_auto_invoke_kernel_functions():
-    fcb = FunctionCallBehavior.AutoInvokeKernelFunctions()
-    assert fcb is not None
-    assert fcb.enable_kernel_functions is True
-    assert fcb.max_auto_invoke_attempts == 5
-    assert fcb.auto_invoke_kernel_functions is True
-
-
-def test_enable_kernel_functions():
-    fcb = FunctionCallBehavior.EnableKernelFunctions()
-    assert fcb is not None
-    assert fcb.enable_kernel_functions is True
-    assert fcb.max_auto_invoke_attempts == 0
-    assert fcb.auto_invoke_kernel_functions is False
-
-
-def test_enable_functions():
-    fcb = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters={"excluded_plugins": ["test"]})
-    assert fcb is not None
-    assert fcb.enable_kernel_functions is True
-    assert fcb.max_auto_invoke_attempts == 5
-    assert fcb.auto_invoke_kernel_functions is True
-    assert fcb.filters == {"excluded_plugins": ["test"]}
-
-
-def test_required_function():
-    fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test")
-    assert fcb is not None
-    assert fcb.enable_kernel_functions is True
-    assert fcb.max_auto_invoke_attempts == 1
-    assert fcb.auto_invoke_kernel_functions is True
-    assert fcb.function_fully_qualified_name == "test"
-
-
-def test_configure_default(function_call_behavior: FunctionCallBehavior, update_settings_callback, kernel: "Kernel"):
-    function_call_behavior.configure(kernel, update_settings_callback, None)
-    assert not update_settings_callback.called
-
-
-def test_configure_kernel_functions(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.AutoInvokeKernelFunctions()
-    fcb.configure(kernel, update_settings_callback, None)
-    assert update_settings_callback.called
-
-
-def test_configure_kernel_functions_skip(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.AutoInvokeKernelFunctions()
-    fcb.enable_kernel_functions = False
-    fcb.configure(kernel, update_settings_callback, None)
-    assert not update_settings_callback.called
-
-
-def test_configure_enable_kernel_functions(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.EnableKernelFunctions()
-    fcb.configure(kernel, update_settings_callback, None)
-    assert update_settings_callback.called
-
-
-def test_configure_enable_kernel_functions_skip(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.EnableKernelFunctions()
-    fcb.enable_kernel_functions = False
-    fcb.configure(kernel, update_settings_callback, None)
-    assert not update_settings_callback.called
-
-
-def test_configure_enable_functions(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters={"excluded_plugins": ["test"]})
-    fcb.configure(kernel, update_settings_callback, None)
-    assert update_settings_callback.called
-
-
-def test_configure_enable_functions_skip(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters={"excluded_plugins": ["test"]})
-    fcb.enable_kernel_functions = False
-    fcb.configure(kernel, update_settings_callback, None)
-    assert not update_settings_callback.called
-
-
-def test_configure_required_function(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test")
-    fcb.configure(kernel, update_settings_callback, None)
-    assert update_settings_callback.called
-
-
-def test_configure_required_function_max_invoke_updated(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test")
-    fcb.max_auto_invoke_attempts = 10
-    fcb.configure(kernel, update_settings_callback, None)
-    assert update_settings_callback.called
-    assert fcb.max_auto_invoke_attempts == 1
-
-
-def test_configure_required_function_skip(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test")
-    fcb.enable_kernel_functions = False
-    fcb.configure(kernel, update_settings_callback, None)
-    assert not update_settings_callback.called
diff --git a/python/tests/unit/connectors/ai/test_function_choice_behavior.py b/python/tests/unit/connectors/ai/test_function_choice_behavior.py
index 89e211881c08..58787be12988 100644
--- a/python/tests/unit/connectors/ai/test_function_choice_behavior.py
+++ b/python/tests/unit/connectors/ai/test_function_choice_behavior.py
@@ -8,12 +8,11 @@
 if TYPE_CHECKING:
     from semantic_kernel.kernel import Kernel
 
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
+from semantic_kernel.connectors.ai.function_calling_utils import _combine_filter_dicts
 from semantic_kernel.connectors.ai.function_choice_behavior import (
     DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS,
     FunctionChoiceBehavior,
     FunctionChoiceType,
-    _combine_filter_dicts,
 )
 from semantic_kernel.exceptions import ServiceInitializationError
 
@@ -50,38 +49,6 @@ def test_function_choice_behavior_required():
     assert behavior.filters == expected_filters
 
 
-def test_from_function_call_behavior_kernel_functions():
-    behavior = FunctionCallBehavior.AutoInvokeKernelFunctions()
-    new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior)
-    assert new_behavior.type_ == FunctionChoiceType.AUTO
-    assert new_behavior.auto_invoke_kernel_functions is True
-
-
-def test_from_function_call_behavior_required():
-    behavior = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="plugin1-func1")
-    new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior)
-    assert new_behavior.type_ == FunctionChoiceType.REQUIRED
-    assert new_behavior.auto_invoke_kernel_functions is True
-    assert new_behavior.filters == {"included_functions": ["plugin1-func1"]}
-
-
-def test_from_function_call_behavior_enabled_functions():
-    expected_filters = {"included_functions": ["plugin1-func1"]}
-    behavior = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters=expected_filters)
-    new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior)
-    assert new_behavior.type_ == FunctionChoiceType.AUTO
-    assert new_behavior.auto_invoke_kernel_functions is True
-    assert new_behavior.filters == expected_filters
-
-
-def test_from_function_call_behavior():
-    behavior = FunctionCallBehavior()
-    new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior)
-    assert new_behavior is not None
-    assert new_behavior.enable_kernel_functions == behavior.enable_kernel_functions
-    assert new_behavior.maximum_auto_invoke_attempts == behavior.max_auto_invoke_attempts
-
-
 @pytest.mark.parametrize(("type", "max_auto_invoke_attempts"), [("auto", 5), ("none", 0), ("required", 1)])
 def test_auto_function_choice_behavior_from_dict(type: str, max_auto_invoke_attempts: int):
     data = {
diff --git a/python/tests/unit/contents/test_chat_history_reducer_utils.py b/python/tests/unit/contents/test_chat_history_reducer_utils.py
new file mode 100644
index 000000000000..b2f6ac2e282f
--- /dev/null
+++ b/python/tests/unit/contents/test_chat_history_reducer_utils.py
@@ -0,0 +1,196 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import pytest
+
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.function_call_content import FunctionCallContent
+from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import (
+    SUMMARY_METADATA_KEY,
+    contains_function_call_or_result,
+    extract_range,
+    get_call_result_pairs,
+    locate_safe_reduction_index,
+    locate_summarization_boundary,
+)
+from semantic_kernel.contents.utils.author_role import AuthorRole
+
+
+@pytest.fixture
+def chat_messages_with_pairs():
+    msgs = []
+
+    # 1) Summary message at index 0 (system)
+    msg_summary = ChatMessageContent(role=AuthorRole.SYSTEM, content="Summary so far.")
+    msg_summary.metadata[SUMMARY_METADATA_KEY] = True
+    msgs.append(msg_summary)
+
+    # 2) Normal user message
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="User says hello."))
+
+    # 3) Function call (call ID = "call1")
+    msg_func_call_1 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Function call #1")
+    func_call_content_1 = FunctionCallContent(id="call1", function_name="funcA", arguments={"param": "valA"})
+    msg_func_call_1.items.append(func_call_content_1)
+    msgs.append(msg_func_call_1)
+
+    # 4) Function result for call1
+    msg_func_result_1 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Result for call #1")
+    func_result_content_1 = FunctionResultContent(id="call1", content="Function #1 result text")
+    msg_func_result_1.items.append(func_result_content_1)
+    msgs.append(msg_func_result_1)
+
+    # 5) Another user message
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Another user message."))
+
+    # 6) Another function call (call ID = "call2")
+    msg_func_call_2 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Function call #2")
+    func_call_content_2 = FunctionCallContent(id="call2", function_name="funcB", arguments={"param": "valB"})
+    msg_func_call_2.items.append(func_call_content_2)
+    msgs.append(msg_func_call_2)
+
+    # 7) Another user message (no result yet for "call2")
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Wait, function result not yet?"))
+
+    # 8) Unrelated function result (call ID = "callX" doesn't match any prior call)
+    msg_func_result_x = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Result for unknown call")
+    func_result_content_x = FunctionResultContent(id="callX", content="No matching call.")
+    msg_func_result_x.items.append(func_result_content_x)
+    msgs.append(msg_func_result_x)
+
+    # 9) Function result for call2
+    msg_func_result_2 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Result for call #2")
+    func_result_content_2 = FunctionResultContent(id="call2", content="Function #2 result text")
+    msg_func_result_2.items.append(func_result_content_2)
+    msgs.append(msg_func_result_2)
+
+    return msgs
+
+
+def test_get_call_result_pairs_fixture_has_pairs(chat_messages_with_pairs):
+    """
+    Since 'chat_messages_with_pairs' includes function calls with IDs,
+    we expect pairs. Specifically:
+      - (2,3) for call1
+      - (5,8) for call2
+    """
+    pairs = get_call_result_pairs(chat_messages_with_pairs)
+    assert (2, 3) in pairs, "Expected pair for (call1) in indexes (2,3)."
+    assert (5, 8) in pairs, "Expected pair for (call2) in indexes (5,8)."
+    assert len(pairs) == 2, "Fixture should produce exactly two matched call->result pairs."
+
+
+@pytest.mark.parametrize(
+    "message_items,expected",
+    [
+        ([], False),
+        ([FunctionCallContent(function_name="funcA", arguments={})], True),
+        ([FunctionResultContent(id="test", content="Result")], True),
+    ],
+)
+def test_contains_function_call_or_result(message_items, expected):
+    msg = ChatMessageContent(role=AuthorRole.USER, content="Test")
+    msg.items.extend(message_items)
+    assert contains_function_call_or_result(msg) == expected
+
+
+def test_extract_range_preserve_pairs(chat_messages_with_pairs):
+    """
+    Tests that extract_range with preserve_pairs=True keeps or skips
+    call/result pairs together. We'll slice from index=2 to index=9
+    in the updated fixture.
+    """
+    extracted = extract_range(
+        chat_messages_with_pairs,
+        start=2,
+        end=9,  # exclusive of index=9
+        preserve_pairs=True,
+    )
+
+    # Indices in range(2..9) => 2,3,4,5,6,7,8
+    # The code should preserve both pairs if they're fully in the slice.
+    # Pairs are (2,3) and (5,8). They are indeed fully inside [2..9).
+    # So we expect to keep them plus indices 4,6,7. That totals 7 messages.
+    assert len(extracted) == 7
+
+    # Instead of asserting exact positional equality, just check we
+    # have the same set of messages from 2..9 (no duplicates or omissions).
+    expected_slice = chat_messages_with_pairs[2:9]  # indexes 2..8
+    assert set(extracted) == set(expected_slice), "Expected messages 2..8 to be returned."
+
+
+def test_extract_range_preserve_pairs_call_outside_slice(chat_messages_with_pairs):
+    """
+    If a function call is outside the start/end range but the result is inside,
+    we do NOT have to preserve that pair since it's partially out of range.
+    We'll pick start=4, end=9 => indices 4..8.
+    """
+    extracted = extract_range(chat_messages_with_pairs, start=4, end=9, preserve_pairs=True)
+
+    # Indices in range(4..9) => 4,5,6,7,8
+    # Pairs: (2,3) is outside, (5,8) is fully inside. So (5,8) is kept together.
+    # The final set of messages is [4,5,6,7,8] => 5 total.
+    assert len(extracted) == 5
+
+    expected_slice = chat_messages_with_pairs[4:9]  # indexes 4..8
+    assert set(extracted) == set(expected_slice), "Expected messages 4..8 to be returned."
+
+    # (2,3) do not appear, and that's correct since they're outside this slice.
+
+
+def test_locate_summarization_boundary_empty():
+    # Edge case: empty history => boundary = 0
+    empty_history = []
+    assert locate_summarization_boundary(empty_history) == 0
+
+
+def test_locate_safe_reduction_index_multiple_calls(chat_messages_with_pairs):
+    """
+    If we set a small target_count, the code will attempt to find a safe
+    reduction index that doesn't orphan a function call/result pair.
+    """
+    total_count = len(chat_messages_with_pairs)  # 9
+    target_count = 4
+    idx = locate_safe_reduction_index(
+        chat_messages_with_pairs,
+        target_count=target_count,
+        threshold_count=0,
+        offset_count=0,
+    )
+    # We expect a valid index because total_count (9) > target_count (4).
+    assert idx is not None and 0 < idx < total_count
+
+    # Verify that from idx onward, we haven't split a matched call->result pair.
+    pairs = get_call_result_pairs(chat_messages_with_pairs)
+    for call_i, result_i in pairs:
+        if call_i >= idx:
+            # If the call is in the reduced set, the result must be in the reduced set:
+            assert result_i >= idx
+        if result_i >= idx:
+            # If the result is in the reduced set, the call must be in the reduced set:
+            assert call_i >= idx
+
+
+def test_locate_safe_reduction_index_high_offset(chat_messages_with_pairs):
+    """
+    If offset_count is large, we might not be able to reduce. Then the function
+    can return None if no valid reduction can be found after skipping the offset.
+    """
+    target_count = 3
+    threshold_count = 0
+    offset_count = 5
+
+    idx = locate_safe_reduction_index(
+        chat_messages_with_pairs,
+        target_count=target_count,
+        threshold_count=threshold_count,
+        offset_count=offset_count,
+    )
+
+    # Possibly None if we cannot reduce after skipping the first 5 messages.
+    if idx is not None:
+        # Then it must be >= offset_count
+        assert idx >= offset_count
+    else:
+        # It's fine if it returns None, meaning no valid safe reduction was found.
+        pass
diff --git a/python/tests/unit/contents/test_chat_history_summarization_reducer.py b/python/tests/unit/contents/test_chat_history_summarization_reducer.py
new file mode 100644
index 000000000000..35e13c969522
--- /dev/null
+++ b/python/tests/unit/contents/test_chat_history_summarization_reducer.py
@@ -0,0 +1,202 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.const import DEFAULT_SERVICE_NAME
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import SUMMARY_METADATA_KEY
+from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import (
+    ChatHistorySummarizationReducer,
+)
+from semantic_kernel.contents.utils.author_role import AuthorRole
+from semantic_kernel.exceptions.content_exceptions import ChatHistoryReducerException
+
+
+@pytest.fixture
+def mock_service():
+    """Returns a mock ChatCompletionClientBase with required methods."""
+    service = MagicMock(spec=ChatCompletionClientBase)
+    # Mock the get_prompt_execution_settings_class to return a placeholder
+    service.get_prompt_execution_settings_class.return_value = MagicMock(return_value=MagicMock(service_id="foo"))
+    # Mock the async call get_chat_message_content
+    service.get_chat_message_content = AsyncMock()
+    return service
+
+
+@pytest.fixture
+def chat_messages():
+    """Returns a list of ChatMessageContent objects with default roles."""
+    msgs = []
+
+    # Existing summary
+    summary_msg = ChatMessageContent(role=AuthorRole.SYSTEM, content="Prior summary.")
+    summary_msg.metadata[SUMMARY_METADATA_KEY] = True
+    msgs.append(summary_msg)
+
+    # Normal user messages
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Hello!"))
+    msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Hi there."))
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="What can you do?"))
+    msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="I can help with tasks."))
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Ok, let's do something."))
+    return msgs
+
+
+def test_summarization_reducer_init(mock_service):
+    reducer = ChatHistorySummarizationReducer(
+        service=mock_service,
+        target_count=10,
+        service_id="my_service",
+        threshold_count=5,
+        summarization_instructions="Custom instructions",
+        use_single_summary=False,
+        fail_on_error=False,
+    )
+
+    assert reducer.service == mock_service
+    assert reducer.target_count == 10
+    assert reducer.service_id == "my_service"
+    assert reducer.threshold_count == 5
+    assert reducer.summarization_instructions == "Custom instructions"
+    assert reducer.use_single_summary is False
+    assert reducer.fail_on_error is False
+
+
+def test_summarization_reducer_defaults(mock_service):
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=5)
+    # Check default property values
+    assert reducer.threshold_count == 0
+    assert reducer.summarization_instructions in reducer.summarization_instructions
+    assert reducer.use_single_summary is True
+    assert reducer.fail_on_error is True
+    assert reducer.service_id == DEFAULT_SERVICE_NAME
+
+
+def test_summarization_reducer_eq_and_hash(mock_service):
+    r1 = ChatHistorySummarizationReducer(service=mock_service, target_count=5, threshold_count=2)
+    r2 = ChatHistorySummarizationReducer(service=mock_service, target_count=5, threshold_count=2)
+    r3 = ChatHistorySummarizationReducer(service=mock_service, target_count=6, threshold_count=2)
+    assert r1 == r2
+    assert r1 != r3
+
+    # Test hash
+    assert hash(r1) == hash(r2)
+    assert hash(r1) != hash(r3)
+
+
+async def test_summarization_reducer_reduce_no_need(chat_messages, mock_service):
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=10, threshold_count=0)
+
+    # If len(history) <= target_count => None
+    result = await reducer.reduce()
+    assert result is None
+    mock_service.get_chat_message_content.assert_not_awaited()
+
+
+async def test_summarization_reducer_reduce_needed(mock_service):
+    messages = [
+        # A summary message (as in the original test)
+        ChatMessageContent(role=AuthorRole.SYSTEM, content="Existing summary", metadata={SUMMARY_METADATA_KEY: True}),
+        # Enough additional messages so total is > 4
+        ChatMessageContent(role=AuthorRole.USER, content="User says hello"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
+    ]
+
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=1)
+    reducer.messages = messages  # Set the chat history
+
+    # Mock that the service will return a single summary message
+    summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="This is a summary.")
+    mock_service.get_chat_message_content.return_value = summary_content
+
+    result = await reducer.reduce()
+    assert result is not None, "We expect a shortened list with a new summary inserted."
+    assert len(result) <= 5, "The resulting list should be shortened to around target_count + threshold_count."
+    assert any(msg.metadata.get(SUMMARY_METADATA_KEY) for msg in result), (
+        "We expect to see a newly inserted summary message."
+    )
+
+
+async def test_summarization_reducer_reduce_no_messages_to_summarize(mock_service):
+    # If we do use_single_summary=False, the older_range_start is insertion_point
+    # In that scenario, if insertion_point == older_range_end => no messages to summarize => return None
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=1, use_single_summary=False)
+
+    # Provide just one message flagged as summary => insertion_point=0, so older_range_start=0, older_range_end=0
+    only_summary = [
+        ChatMessageContent(role=AuthorRole.SYSTEM, content="Only summary.", metadata={SUMMARY_METADATA_KEY: True})
+    ]
+
+    reducer.add_message(only_summary[0])
+    result = await reducer.reduce()
+    assert result is None
+    mock_service.get_chat_message_content.assert_not_awaited()
+
+
+async def test_summarization_reducer_reduce_summarizer_returns_none(mock_service):
+    # If the summarizer yields no messages, we return None
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3)
+    # Provide enough messages that summarization would normally occur
+    messages = [
+        ChatMessageContent(role=AuthorRole.SYSTEM, content="Existing summary", metadata={SUMMARY_METADATA_KEY: True}),
+        ChatMessageContent(role=AuthorRole.USER, content="User asks something"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant replies"),
+        ChatMessageContent(role=AuthorRole.USER, content="Another user query"),
+    ]
+    reducer.messages = messages
+
+    # Summarizer returns None
+    mock_service.get_chat_message_content.return_value = None
+
+    result = await reducer.reduce()
+    assert result is None, "If the summarizer yields no message, we return None."
+
+
+async def test_summarization_reducer_reduce_summarization_fails(mock_service):
+    # If summarization fails, we raise if fail_on_error=True
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, fail_on_error=True)
+    # Enough messages to trigger summarization
+    messages = [
+        ChatMessageContent(role=AuthorRole.USER, content="Msg1"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Msg2"),
+        ChatMessageContent(role=AuthorRole.USER, content="Msg3"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Msg4"),
+    ]
+    reducer.messages = messages
+
+    mock_service.get_chat_message_content.side_effect = Exception("Summarizer error")
+
+    with pytest.raises(ChatHistoryReducerException, match="failed"):
+        await reducer.reduce()
+
+
+async def test_summarization_reducer_reduce_summarization_fails_no_raise(chat_messages, mock_service):
+    # If summarization fails, but fail_on_error=False, we just log and return None
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, fail_on_error=False)
+    mock_service.get_chat_message_content.side_effect = Exception("Summarizer error")
+    reducer.messages = chat_messages
+    result = await reducer.reduce()
+    assert result is None
+
+
+async def test_summarization_reducer_private_summarize(mock_service):
+    """Directly test the _summarize method for coverage."""
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=5)
+    chat_messages = [
+        ChatMessageContent(role=AuthorRole.USER, content="Message1"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Message2"),
+    ]
+
+    summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Mock Summary")
+    mock_service.get_chat_message_content.return_value = summary_content
+
+    actual_summary = await reducer._summarize(chat_messages)
+    assert actual_summary is not None, "We should get a summary message back."
+    assert actual_summary.content == "Mock Summary", "We expect the mock summary content."
diff --git a/python/tests/unit/contents/test_chat_history_truncation_reducer.py b/python/tests/unit/contents/test_chat_history_truncation_reducer.py
new file mode 100644
index 000000000000..7f94eccf8518
--- /dev/null
+++ b/python/tests/unit/contents/test_chat_history_truncation_reducer.py
@@ -0,0 +1,71 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import pytest
+
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
+from semantic_kernel.contents.utils.author_role import AuthorRole
+
+
+@pytest.fixture
+def chat_messages():
+    msgs = []
+    msgs.append(ChatMessageContent(role=AuthorRole.SYSTEM, content="System message."))
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="User message 1"))
+    msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant message 1"))
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="User message 2"))
+    msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant message 2"))
+    return msgs
+
+
+def test_truncation_reducer_init():
+    reducer = ChatHistoryTruncationReducer(target_count=5, threshold_count=2)
+    assert reducer.target_count == 5
+    assert reducer.threshold_count == 2
+
+
+def test_truncation_reducer_defaults():
+    reducer = ChatHistoryTruncationReducer(target_count=5)
+    assert reducer.threshold_count == 0
+
+
+def test_truncation_reducer_eq_and_hash():
+    r1 = ChatHistoryTruncationReducer(target_count=5, threshold_count=2)
+    r2 = ChatHistoryTruncationReducer(target_count=5, threshold_count=2)
+    r3 = ChatHistoryTruncationReducer(target_count=5, threshold_count=1)
+    assert r1 == r2
+    assert r1 != r3
+    assert hash(r1) == hash(r2)
+    assert hash(r1) != hash(r3)
+
+
+async def test_truncation_reducer_no_need(chat_messages):
+    # If total <= target + threshold => returns None
+    reducer = ChatHistoryTruncationReducer(target_count=5, threshold_count=0)
+    result = await reducer.reduce()
+    assert result is None
+
+
+async def test_truncation_reducer_no_truncation_index_found():
+    # If the safe reduction index < 0, returns None
+    # We'll craft a scenario where the number of messages is big,
+    # but the function can't find a safe index to cut
+    msgs = [ChatMessageContent(role=AuthorRole.USER, content="Msg")] * 10
+    # Suppose threshold_count is huge, so effectively we can't reduce
+    reducer = ChatHistoryTruncationReducer(target_count=3, threshold_count=10)
+    reducer.messages = msgs
+    result = await reducer.reduce()
+    assert result is None
+
+
+async def test_truncation_reducer_truncation(chat_messages):
+    # Force a smaller target so we do need to reduce
+    reducer = ChatHistoryTruncationReducer(target_count=2)
+    reducer.messages = chat_messages
+    result = await reducer.reduce()
+    # We expect only 2 messages remain after truncation
+    assert result is not None
+    assert len(result) == 2
+    # They should be the last 2 messages
+    assert result[0] == chat_messages[-2]
+    assert result[1] == chat_messages[-1]