From 4d29656780a16b0a121163cf88653b041b89e15b Mon Sep 17 00:00:00 2001
From: Evan Mattson <35585003+moonbox3@users.noreply.github.com>
Date: Fri, 24 Jan 2025 07:30:54 +0900
Subject: [PATCH] Python: Introduce the chat history reducer (#10190)

### Motivation and Context

The SK Python framework has been missing the ability to configure a chat
history reducer of type `ChatHistoryTruncationReducer` and
`ChatHistorySummarizationReducer` which have existed in the .Net SK
Agent framework for some time.

The goal of this PR is to introduce the chat history reducers and allow
them for use for not only the agent framework, but also anything else
that uses a chat history (chat completion, for example). The
ChatHistoryReducer extends the ChatHistory class, and so it's simple to
include a reducer and logic to reduce messages as one manages the chat
history either in an agent framework setting or in a chat completion
setting.

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

### Description

This PR:
- Introduces the chat history reducer functionality in Python -- both
the `ChatHistoryTruncationReducer` and
`ChatHistorySummarizationReducer`.
- Add unit tests for code coverage.
- Adds a sample `Chat Completion History Reducer` to show how to
configure both reducers and what each parameter does.
- Add chat completion samples showing how to manage a chat history
reducer, including how to specify that function call content and
function result content should be part of the summarization payload.
- Updates the Agent SelectionStrategy, KernelFunctionSelectionStrategy
and KernelFunctionTermination strategy to use the reducer.
- Additionally updates the classes above to use a new `select_agent`
abstract method so that one can define an initial agent to run in a
particular scenario.
- Removes the deprecated `FunctionCallBehavior` class, and removes some
nasty circular dependencies that we had lurking in the code base for
some time. This `FunctionCallBehavior` has been marked with a
deprecation warning for 6+ months now. All samples and docs have moved
over to use `FunctionChoiceBehavior` - developers using
`FunctionCallBehavior` should have had enough time to switch.
- Closes #7969
- Closes #10102

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [X] The code builds clean without any errors or warnings
- [X] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [X] All unit tests pass, and I have added new tests where possible
- [X] I didn't break anyone :smile:
---
 python/samples/concepts/README.md             |   4 +
 .../agents/chat_completion_history_reducer.py | 298 ++++++++++++++++++
 ...le_chatbot_with_summary_history_reducer.py | 156 +++++++++
 ...mmary_history_reducer_keep_func_content.py | 200 ++++++++++++
 ...chatbot_with_truncation_history_reducer.py | 160 ++++++++++
 ...nai_function_calling_with_custom_plugin.py |   5 +-
 .../simple_reasoning_function_calling.py      |   4 +-
 .../setup/chat_completion_services.py         |  71 +++--
 python/semantic_kernel/agents/agent.py        |  28 +-
 .../agents/channels/chat_history_channel.py   |   6 +
 .../chat_completion/chat_completion_agent.py  |   5 +
 .../kernel_function_selection_strategy.py     |  20 +-
 .../selection/selection_strategy.py           |  42 ++-
 .../sequential_selection_strategy.py          |  63 +++-
 .../kernel_function_termination_strategy.py   |   8 +
 .../anthropic_prompt_execution_settings.py    |   2 +-
 .../services/anthropic_chat_completion.py     |   8 +-
 .../connectors/ai/anthropic/services/utils.py |  13 +-
 .../services/azure_ai_inference_base.py       |  15 +-
 .../azure_ai_inference_chat_completion.py     |  42 ++-
 .../services/bedrock_chat_completion.py       |   4 +-
 .../bedrock/services/model_provider/utils.py  |  12 +-
 .../ai/chat_completion_client_base.py         |  39 +--
 .../connectors/ai/function_call_behavior.py   | 209 ------------
 .../connectors/ai/function_calling_utils.py   |  19 +-
 .../connectors/ai/function_choice_behavior.py |  42 +--
 .../connectors/ai/function_choice_type.py     |  14 +
 .../services/google_ai_chat_completion.py     |  14 +-
 .../ai/google/google_ai/services/utils.py     |  12 +-
 .../ai/google/vertex_ai/services/utils.py     |  11 +-
 .../services/vertex_ai_chat_completion.py     |  18 +-
 .../services/mistral_ai_chat_completion.py    |  10 +-
 .../ollama/services/ollama_chat_completion.py |   4 +-
 .../connectors/ai/ollama/services/utils.py    |  11 +-
 .../open_ai_prompt_execution_settings.py      |  34 --
 .../open_ai/services/azure_chat_completion.py |   4 +
 .../ai/open_ai/services/azure_config_base.py  |   5 +
 .../services/open_ai_chat_completion.py       |   3 +
 .../services/open_ai_chat_completion_base.py  |  51 ++-
 .../open_ai/services/open_ai_config_base.py   |   5 +
 python/semantic_kernel/contents/__init__.py   |   4 +
 .../contents/function_call_content.py         |  11 +-
 .../contents/history_reducer/__init__.py      |   0
 .../history_reducer/chat_history_reducer.py   |  31 ++
 .../chat_history_reducer_utils.py             | 211 +++++++++++++
 .../chat_history_summarization_reducer.py     | 226 +++++++++++++
 .../chat_history_truncation_reducer.py        |  83 +++++
 .../exceptions/agent_exceptions.py            |   6 +
 .../exceptions/content_exceptions.py          |   7 +
 .../unit/agents/test_chat_completion_agent.py |   3 +-
 .../unit/agents/test_chat_history_channel.py  |   3 +
 .../test_sequential_strategy_selection.py     |  41 ++-
 ...test_azure_ai_inference_chat_completion.py |   3 +-
 .../services/test_azure_chat_completion.py    |  29 +-
 .../test_openai_chat_completion_base.py       |  29 +-
 .../ai/test_function_call_behavior.py         | 144 ---------
 .../ai/test_function_choice_behavior.py       |  35 +-
 .../test_chat_history_reducer_utils.py        | 196 ++++++++++++
 ...test_chat_history_summarization_reducer.py | 202 ++++++++++++
 .../test_chat_history_truncation_reducer.py   |  71 +++++
 60 files changed, 2372 insertions(+), 634 deletions(-)
 create mode 100644 python/samples/concepts/agents/chat_completion_history_reducer.py
 create mode 100644 python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py
 create mode 100644 python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py
 create mode 100644 python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer.py
 delete mode 100644 python/semantic_kernel/connectors/ai/function_call_behavior.py
 create mode 100644 python/semantic_kernel/connectors/ai/function_choice_type.py
 create mode 100644 python/semantic_kernel/contents/history_reducer/__init__.py
 create mode 100644 python/semantic_kernel/contents/history_reducer/chat_history_reducer.py
 create mode 100644 python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py
 create mode 100644 python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py
 create mode 100644 python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py
 delete mode 100644 python/tests/unit/connectors/ai/test_function_call_behavior.py
 create mode 100644 python/tests/unit/contents/test_chat_history_reducer_utils.py
 create mode 100644 python/tests/unit/contents/test_chat_history_summarization_reducer.py
 create mode 100644 python/tests/unit/contents/test_chat_history_truncation_reducer.py

diff --git a/python/samples/concepts/README.md b/python/samples/concepts/README.md
index 3c62b4156cf7..22f0496e43e6 100644
--- a/python/samples/concepts/README.md
+++ b/python/samples/concepts/README.md
@@ -10,6 +10,7 @@
 - [Assistant Agent Retrieval](./agents/assistant_agent_retrieval.py)
 - [Assistant Agent Streaming](./agents/assistant_agent_streaming.py)
 - [Chat Completion Function Termination](./agents/chat_completion_function_termination.py)
+- [Chat Completion History Reducer](./agents/chat_completion_history_reducer.py)
 - [Mixed Chat Agents](./agents/mixed_chat_agents.py)
 - [Mixed Chat Agents Plugins](./agents/mixed_chat_agents_plugins.py)
 - [Mixed Chat Files](./agents/mixed_chat_files.py)
@@ -45,6 +46,9 @@
 - [Simple Chatbot Store Metadata](./chat_completion/simple_chatbot_store_metadata.py)
 - [Simple Chatbot Streaming](./chat_completion/simple_chatbot_streaming.py)
 - [Simple Chatbot with Image](./chat_completion/simple_chatbot_with_image.py)
+- [Simple Chatbot with Summary History Reducer Keeping Function Content](./chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py)
+- [Simple Chatbot with Summary History Reducer](./chat_completion/simple_chatbot_with_summary_history_reducer.py)
+- [Simple Chatbot with Truncation History Reducer](./chat_completion/simple_chatbot_with_truncation_history_reducer.py)
 
 ### ChatHistory - Using and serializing the [`ChatHistory`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/contents/chat_history.py)
 
diff --git a/python/samples/concepts/agents/chat_completion_history_reducer.py b/python/samples/concepts/agents/chat_completion_history_reducer.py
new file mode 100644
index 000000000000..1cdffefe7b78
--- /dev/null
+++ b/python/samples/concepts/agents/chat_completion_history_reducer.py
@@ -0,0 +1,298 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+import logging
+from typing import TYPE_CHECKING
+
+from semantic_kernel.agents import (
+    AgentGroupChat,
+    ChatCompletionAgent,
+)
+from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion, OpenAIChatCompletion
+from semantic_kernel.contents import AuthorRole, ChatHistory, ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ChatHistorySummarizationReducer
+from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
+from semantic_kernel.kernel import Kernel
+
+if TYPE_CHECKING:
+    from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
+
+#####################################################################
+# The following sample demonstrates how to implement a chat history #
+# reducer as part of the Semantic Kernel Agent Framework. It        #
+# covers two types of reducers: summarization reduction and a       #
+# truncation reduction. For this sample, the ChatCompletionAgent    #
+# is used.                                                          #
+#####################################################################
+
+
+# Initialize the logger for debugging and information messages
+logger = logging.getLogger(__name__)
+
+# Flag to determine whether to use Azure OpenAI services or OpenAI
+# Set this to True if using Azure OpenAI (requires appropriate configuration)
+use_azure_openai = True
+
+
+# Helper function to create and configure a Kernel with the desired chat completion service
+def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
+    """A helper function to create a kernel with a chat completion service."""
+    kernel = Kernel()
+    if use_azure_openai:
+        # Add Azure OpenAI service to the kernel
+        kernel.add_service(AzureChatCompletion(service_id=service_id))
+    else:
+        # Add OpenAI service to the kernel
+        kernel.add_service(OpenAIChatCompletion(service_id=service_id))
+    return kernel
+
+
+class HistoryReducerExample:
+    """
+    Demonstrates how to create a ChatCompletionAgent with a ChatHistoryReducer
+    (either truncation or summarization) and how to invoke that agent
+    multiple times while applying the history reduction.
+    """
+
+    # Agent-specific settings
+    TRANSLATOR_NAME = "NumeroTranslator"  # Name of the agent
+    TRANSLATOR_INSTRUCTIONS = "Add one to the latest user number and spell it in Spanish without explanation."
+
+    def create_truncating_agent(
+        self, reducer_msg_count: int, reducer_threshold: int
+    ) -> tuple[ChatCompletionAgent, "ChatHistoryReducer"]:
+        """
+        Creates a ChatCompletionAgent with a truncation-based history reducer.
+
+        Parameters:
+        - reducer_msg_count: Target number of messages to retain after truncation.
+        - reducer_threshold: Threshold number of messages to trigger truncation.
+
+        Returns:
+        - A configured ChatCompletionAgent instance with truncation enabled.
+        """
+        truncation_reducer = ChatHistoryTruncationReducer(
+            target_count=reducer_msg_count, threshold_count=reducer_threshold
+        )
+
+        return ChatCompletionAgent(
+            name=self.TRANSLATOR_NAME,
+            instructions=self.TRANSLATOR_INSTRUCTIONS,
+            kernel=_create_kernel_with_chat_completion("truncate_agent"),
+            history_reducer=truncation_reducer,
+        ), truncation_reducer
+
+    def create_summarizing_agent(
+        self, reducer_msg_count: int, reducer_threshold: int
+    ) -> tuple[ChatCompletionAgent, "ChatHistoryReducer"]:
+        """
+        Creates a ChatCompletionAgent with a summarization-based history reducer.
+
+        Parameters:
+        - reducer_msg_count: Target number of messages to retain after summarization.
+        - reducer_threshold: Threshold number of messages to trigger summarization.
+
+        Returns:
+        - A configured ChatCompletionAgent instance with summarization enabled.
+        """
+        kernel = _create_kernel_with_chat_completion("summarize_agent")
+
+        summarization_reducer = ChatHistorySummarizationReducer(
+            service=kernel.get_service(service_id="summarize_agent"),
+            target_count=reducer_msg_count,
+            threshold_count=reducer_threshold,
+        )
+
+        return ChatCompletionAgent(
+            name=self.TRANSLATOR_NAME,
+            instructions=self.TRANSLATOR_INSTRUCTIONS,
+            kernel=kernel,
+            history_reducer=summarization_reducer,
+        ), summarization_reducer
+
+    async def invoke_agent(self, agent: ChatCompletionAgent, chat_history: ChatHistory, message_count: int):
+        """
+        Demonstrates agent invocation with direct history management and reduction.
+
+        Parameters:
+        - agent: The ChatCompletionAgent to invoke.
+        - message_count: The number of messages to simulate in the conversation.
+        """
+
+        index = 1
+        while index <= message_count:
+            # Provide user input
+            user_message = ChatMessageContent(role=AuthorRole.USER, content=str(index))
+            chat_history.messages.append(user_message)
+            print(f"# User: '{index}'")
+
+            # Attempt history reduction if a reducer is present
+            is_reduced = False
+            if agent.history_reducer is not None:
+                reduced = await agent.history_reducer.reduce()
+                if reduced is not None:
+                    chat_history.messages.clear()
+                    chat_history.messages.extend(reduced)
+                    is_reduced = True
+                    print("@ (History was reduced!)")
+
+            # Invoke the agent and display its response
+            async for response in agent.invoke(chat_history):
+                chat_history.messages.append(response)
+                print(f"# {response.role} - {response.name}: '{response.content}'")
+
+            # The index is incremented by 2 because the agent is told to:
+            # "Add one to the latest user number and spell it in Spanish without explanation."
+            # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish)
+            index += 2
+            print(f"@ Message Count: {len(chat_history.messages)}\n")
+
+            # If history was reduced, and the chat history is of type `ChatHistorySummarizationReducer`,
+            # print summaries as it will contain the __summary__ metadata key.
+            if is_reduced and isinstance(chat_history, ChatHistorySummarizationReducer):
+                self._print_summaries_from_front(chat_history.messages)
+
+    async def invoke_chat(self, agent: ChatCompletionAgent, message_count: int):
+        """
+        Demonstrates agent invocation within a group chat.
+
+        Parameters:
+        - agent: The ChatCompletionAgent to invoke.
+        - message_count: The number of messages to simulate in the conversation.
+        """
+        chat = AgentGroupChat()  # Initialize a new group chat
+        last_history_count = 0
+
+        index = 1
+        while index <= message_count:
+            # Add user message to the chat
+            user_msg = ChatMessageContent(role=AuthorRole.USER, content=str(index))
+            await chat.add_chat_message(user_msg)
+            print(f"# User: '{index}'")
+
+            # Invoke the agent and display its response
+            async for message in chat.invoke(agent):
+                print(f"# {message.role} - {message.name or '*'}: '{message.content}'")
+
+            # The index is incremented by 2 because the agent is told to:
+            # "Add one to the latest user number and spell it in Spanish without explanation."
+            # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish)
+            index += 2
+
+            # Retrieve chat messages in descending order (newest first)
+            msgs = []
+            async for m in chat.get_chat_messages(agent):
+                msgs.append(m)
+
+            print(f"@ Message Count: {len(msgs)}\n")
+
+            # Check for reduction in message count and print summaries
+            if len(msgs) < last_history_count:
+                self._print_summaries_from_back(msgs)
+
+            last_history_count = len(msgs)
+
+    def _print_summaries_from_front(self, messages: list[ChatMessageContent]):
+        """
+        Prints summaries from the front of the message list.
+
+        Parameters:
+        - messages: List of chat messages to process.
+        """
+        summary_index = 0
+        while summary_index < len(messages):
+            msg = messages[summary_index]
+            if msg.metadata and msg.metadata.get("__summary__"):
+                print(f"\tSummary: {msg.content}")
+                summary_index += 1
+            else:
+                break
+
+    def _print_summaries_from_back(self, messages: list[ChatMessageContent]):
+        """
+        Prints summaries from the back of the message list.
+
+        Parameters:
+        - messages: List of chat messages to process.
+        """
+        summary_index = len(messages) - 1
+        while summary_index >= 0:
+            msg = messages[summary_index]
+            if msg.metadata and msg.metadata.get("__summary__"):
+                print(f"\tSummary: {msg.content}")
+                summary_index -= 1
+            else:
+                break
+
+
+# Main entry point for the script
+async def main():
+    # Initialize the example class
+    example = HistoryReducerExample()
+
+    # Demonstrate truncation-based reduction
+    trunc_agent, history_reducer = example.create_truncating_agent(
+        # reducer_msg_count:
+        # Purpose: Defines the target number of messages to retain after applying truncation or summarization.
+        # What it controls: This parameter determines how much of the most recent conversation history
+        #                   is preserved while discarding or summarizing older messages.
+        # Why change it?:
+        # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history
+        #   to maintain context.
+        # - Larger values: Use when retaining more conversational context is critical for accurate responses
+        #   or maintaining a richer dialogue.
+        reducer_msg_count=10,
+        # reducer_threshold:
+        # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds
+        #          reducer_msg_count by a small margin.
+        # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response)
+        #                   are not "orphaned" or lost during truncation or summarization.
+        # Why change it?:
+        # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older
+        #   pairs of messages sooner.
+        # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation,
+        #   especially for sensitive interactions like API function calls or complex responses.
+        reducer_threshold=10,
+    )
+    # print("===TruncatedAgentReduction Demo===")
+    # await example.invoke_agent(trunc_agent, chat_history=history_reducer, message_count=50)
+
+    # Demonstrate summarization-based reduction
+    sum_agent, history_reducer = example.create_summarizing_agent(
+        # Same configuration for summarization-based reduction
+        reducer_msg_count=10,  # Target number of messages to retain
+        reducer_threshold=10,  # Buffer to avoid premature reduction
+    )
+    print("\n===SummarizedAgentReduction Demo===")
+    await example.invoke_agent(sum_agent, chat_history=history_reducer, message_count=50)
+
+    # Demonstrate group chat with truncation
+    print("\n===TruncatedChatReduction Demo===")
+    trunc_agent.history_reducer.messages.clear()
+    await example.invoke_chat(trunc_agent, message_count=50)
+
+    # Demonstrate group chat with summarization
+    print("\n===SummarizedChatReduction Demo===")
+    sum_agent.history_reducer.messages.clear()
+    await example.invoke_chat(sum_agent, message_count=50)
+
+
+# Interaction between reducer_msg_count and reducer_threshold:
+# The combination of these values determines when reduction occurs and how much history is kept.
+# Example:
+# If reducer_msg_count = 10 and reducer_threshold = 5, history will not be truncated until the total message count
+# exceeds 15. This approach ensures flexibility in retaining conversational context while still adhering to memory
+# constraints.
+
+# Recommendations:
+# - Adjust for performance: Use a lower reducer_msg_count in environments with limited memory or when the assistant
+#   needs faster processing times.
+# - Context sensitivity: Increase reducer_msg_count and reducer_threshold in use cases where maintaining continuity
+#   across multiple interactions is essential (e.g., multi-turn conversations or complex workflows).
+# - Experiment: Start with the default values (10 and 10) and refine based on your application's behavior and the
+#   assistant's response quality.
+
+
+# Execute the main function if the script is run directly
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py
new file mode 100644
index 000000000000..338c76519b0e
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer.py
@@ -0,0 +1,156 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.contents import ChatHistorySummarizationReducer
+from semantic_kernel.core_plugins.time_plugin import TimePlugin
+from semantic_kernel.functions import KernelArguments
+
+# This sample shows how to create a chatbot using a kernel function and leverage a chat history
+# summarization reducer.
+# This sample uses the following main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history.
+#                           A Chat History Reducer is a subclass of ChatHistory that provides additional
+#                           functionality to reduce the history.
+# - a KernelFunction: This function will be a prompt function, meaning the function is composed of
+#                     a prompt and will be invoked by Semantic Kernel.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+# [NOTE]
+# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer.
+# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly.
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a kernel and register a prompt function.
+# The prompt here contains two variables: chat_history and user_input.
+# They will be replaced by the kernel with the actual values when the function is invoked.
+# [NOTE]
+# The chat_history, which is a ChatHistory object, will be serialized to a string internally
+# to create/render the final prompt.
+# Since this sample uses a chat completion service, the prompt will be deserialized back to
+# a ChatHistory object that gets passed to the chat completion service. This new chat history
+# object will contain the original messages and the user input.
+kernel = Kernel()
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    # You can attach the request settings to the function or
+    # pass the settings to the kernel.invoke method via the kernel arguments.
+    # If you specify the settings in both places, the settings in the kernel arguments will
+    # take precedence given the same service id.
+    # prompt_execution_settings=request_settings,
+)
+
+# Invoking a kernel function requires a service, so we add the chat completion service to the kernel.
+kernel.add_service(chat_completion_service)
+
+# The chat history reducer is responsible for summarizing the chat history.
+# It's a subclass of ChatHistory that provides additional functionality to reduce the history.
+# You may use it just like a regular ChatHistory object.
+summarization_reducer = ChatHistorySummarizationReducer(
+    service=kernel.get_service(),
+    # target_count:
+    # Purpose: Defines the target number of messages to retain after applying summarization.
+    # What it controls: This parameter determines how much of the most recent conversation history
+    #                   is preserved while discarding or summarizing older messages.
+    # Why change it?:
+    # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history
+    #   to maintain context.
+    # - Larger values: Use when retaining more conversational context is critical for accurate responses
+    #   or maintaining a richer dialogue.
+    target_count=3,
+    # threshold_count:
+    # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds
+    #          target_count by a small margin.
+    # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response)
+    #                   are not "orphaned" or lost during truncation or summarization.
+    # Why change it?:
+    # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older
+    #   pairs of messages sooner.
+    # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation,
+    #   especially for sensitive interactions like API function calls or complex responses.
+    threshold_count=2,
+)
+
+summarization_reducer.add_system_message(system_message)
+
+kernel.add_plugin(plugin=TimePlugin(), plugin_name="TimePlugin")
+
+request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto()
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except (KeyboardInterrupt, EOFError):
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    await summarization_reducer.reduce()
+
+    kernel_arguments = KernelArguments(
+        settings=request_settings,
+        chat_history=summarization_reducer,
+        user_input=user_input,
+    )
+    answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments)
+
+    if answer:
+        print(f"Mosscap:> {answer}")
+        summarization_reducer.add_user_message(user_input)
+        summarization_reducer.add_message(answer.value[0])
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py
new file mode 100644
index 000000000000..b5d0eae75d24
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py
@@ -0,0 +1,200 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.contents import ChatHistorySummarizationReducer
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.function_call_content import FunctionCallContent
+from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.core_plugins.time_plugin import TimePlugin
+from semantic_kernel.functions import KernelArguments
+
+# This sample shows how to create a chatbot using a kernel function and leverage a chat history
+# summarization reducer.
+# This sample uses the following main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history.
+#                           A Chat History Reducer is a subclass of ChatHistory that provides additional
+#                           functionality to reduce the history.
+#    - The Chat History Reducer configuration includes a flag `include_function_content_in_summary` that
+#      allows the reducer to include function call and result content in the summary.
+# - a KernelFunction: This function will be a prompt function, meaning the function is composed of
+#                     a prompt and will be invoked by Semantic Kernel.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+# [NOTE]
+# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer.
+# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly.
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a kernel and register a prompt function.
+# The prompt here contains two variables: chat_history and user_input.
+# They will be replaced by the kernel with the actual values when the function is invoked.
+# [NOTE]
+# The chat_history, which is a ChatHistory object, will be serialized to a string internally
+# to create/render the final prompt.
+# Since this sample uses a chat completion service, the prompt will be deserialized back to
+# a ChatHistory object that gets passed to the chat completion service. This new chat history
+# object will contain the original messages and the user input.
+kernel = Kernel()
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    # You can attach the request settings to the function or
+    # pass the settings to the kernel.invoke method via the kernel arguments.
+    # If you specify the settings in both places, the settings in the kernel arguments will
+    # take precedence given the same service id.
+    # prompt_execution_settings=request_settings,
+)
+
+# Invoking a kernel function requires a service, so we add the chat completion service to the kernel.
+kernel.add_service(chat_completion_service)
+
+# The chat history reducer is responsible for summarizing the chat history.
+# It's a subclass of ChatHistory that provides additional functionality to reduce the history.
+# You may use it just like a regular ChatHistory object.
+summarization_reducer = ChatHistorySummarizationReducer(
+    service=kernel.get_service(),
+    # target_count:
+    # Purpose: Defines the target number of messages to retain after applying summarization.
+    # What it controls: This parameter determines how much of the most recent conversation history
+    #                   is preserved while discarding or summarizing older messages.
+    # Why change it?:
+    # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history
+    #   to maintain context.
+    # - Larger values: Use when retaining more conversational context is critical for accurate responses
+    #   or maintaining a richer dialogue.
+    target_count=3,
+    # threshold_count:
+    # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds
+    #          target_count by a small margin.
+    # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response)
+    #                   are not "orphaned" or lost during truncation or summarization.
+    # Why change it?:
+    # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older
+    #   pairs of messages sooner.
+    # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation,
+    #   especially for sensitive interactions like API function calls or complex responses.
+    threshold_count=2,
+    include_function_content_in_summary=True,
+)
+
+summarization_reducer.add_system_message(system_message)
+
+kernel.add_plugin(plugin=TimePlugin(), plugin_name="TimePlugin")
+
+request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto()
+
+
+# The following sets are used to hold on to FunctionCallContent and FunctionResultContent items
+# that have been previously added to the chat history.
+processed_fccs: set[FunctionCallContent] = set()
+processed_frcs: set[FunctionResultContent] = set()
+
+
+async def chat() -> bool:
+    global processed_fccs, processed_frcs
+
+    try:
+        user_input = input("User:> ")
+    except (KeyboardInterrupt, EOFError):
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    await summarization_reducer.reduce()
+
+    kernel_arguments = KernelArguments(
+        settings=request_settings,
+        chat_history=summarization_reducer,
+        user_input=user_input,
+    )
+    answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments)
+
+    if answer:
+        print(f"Mosscap:> {answer}")
+        summarization_reducer.add_user_message(user_input)
+        summarization_reducer.add_message(answer.value[0])
+
+        # Get the chat history from the FunctionResult's metadata
+        chat_history: ChatHistory = answer.metadata.get("messages")
+        if chat_history:
+            # Process the chat history to extract FunctionCallContent and FunctionResultContent items
+            # that we haven't previously added to the chat history
+            fcc: list[FunctionCallContent] = []
+            frc: list[FunctionResultContent] = []
+            for msg in chat_history.messages:
+                if msg.items:
+                    for item in msg.items:
+                        match item:
+                            case FunctionCallContent():
+                                if item.id not in processed_fccs:
+                                    fcc.append(item)
+                            case FunctionResultContent():
+                                if item.id not in processed_frcs:
+                                    frc.append(item)
+
+            for i, item in enumerate(fcc):
+                summarization_reducer.add_assistant_message_list([item])
+                processed_fccs.add(item.id)
+                # Safely check if there's a matching FunctionResultContent
+                if i < len(frc):
+                    assert fcc[i].id == frc[i].id  # nosec
+                    summarization_reducer.add_tool_message_list([frc[i]])
+                    processed_frcs.add(item.id)
+
+        # Since this example is showing how to include FunctionCallContent and FunctionResultContent
+        # in the summary, we need to add them to the chat history and also to the processed sets.
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer.py b/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer.py
new file mode 100644
index 000000000000..075cbb8620c0
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer.py
@@ -0,0 +1,160 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel import Kernel
+from semantic_kernel.contents import ChatHistoryTruncationReducer
+from semantic_kernel.functions import KernelArguments
+
+# This sample shows how to create a chatbot using a kernel function and leverage a chat history
+# truncation reducer.
+# This sample uses the following two main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history.
+#                           A Chat History Reducer is a subclass of ChatHistory that provides additional
+#                           functionality to reduce the history.
+# - a KernelFunction: This function will be a prompt function, meaning the function is composed of
+#                     a prompt and will be invoked by Semantic Kernel.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+# [NOTE]
+# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer.
+# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly.
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a kernel and register a prompt function.
+# The prompt here contains two variables: chat_history and user_input.
+# They will be replaced by the kernel with the actual values when the function is invoked.
+# [NOTE]
+# The chat_history, which is a ChatHistory object, will be serialized to a string internally
+# to create/render the final prompt.
+# Since this sample uses a chat completion service, the prompt will be deserialized back to
+# a ChatHistory object that gets passed to the chat completion service. This new chat history
+# object will contain the original messages and the user input.
+kernel = Kernel()
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    # You can attach the request settings to the function or
+    # pass the settings to the kernel.invoke method via the kernel arguments.
+    # If you specify the settings in both places, the settings in the kernel arguments will
+    # take precedence given the same service id.
+    # prompt_execution_settings=request_settings,
+)
+
+# Invoking a kernel function requires a service, so we add the chat completion service to the kernel.
+kernel.add_service(chat_completion_service)
+
+# The chat history reducer is responsible for truncating the chat history.
+# It's a subclass of ChatHistory that provides additional functionality to reduce the history.
+# You may use it just like a regular ChatHistory object.
+truncation_reducer = ChatHistoryTruncationReducer(
+    service=kernel.get_service(),
+    # target_count:
+    # Purpose: Defines the target number of messages to retain after applying summarization.
+    # What it controls: This parameter determines how much of the most recent conversation history
+    #                   is preserved while discarding or summarizing older messages.
+    # Why change it?:
+    # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history
+    #   to maintain context.
+    # - Larger values: Use when retaining more conversational context is critical for accurate responses
+    #   or maintaining a richer dialogue.
+    target_count=3,
+    # threshold_count:
+    # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds
+    #          target_count by a small margin.
+    # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response)
+    #                   are not "orphaned" or lost during truncation or summarization.
+    # Why change it?:
+    # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older
+    #   pairs of messages sooner.
+    # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation,
+    #   especially for sensitive interactions like API function calls or complex responses.
+    threshold_count=2,
+)
+
+truncation_reducer.add_system_message(system_message)
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    # Attempt to reduce before adding the user message to the chat history.
+    await truncation_reducer.reduce()
+
+    # Get the chat message content from the chat completion service.
+    kernel_arguments = KernelArguments(
+        settings=request_settings,
+        # Use keyword arguments to pass the chat history and user input to the kernel function.
+        chat_history=truncation_reducer,
+        user_input=user_input,
+    )
+
+    answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments)
+    # Alternatively, you can invoke the function directly with the kernel as an argument:
+    # answer = await chat_function.invoke(kernel, kernel_arguments)
+    if answer:
+        print(f"Mosscap:> {answer}")
+        # Since the user_input is rendered by the template, it is not yet part of the chat history, so we add it here.
+        truncation_reducer.add_user_message(user_input)
+        # Add the chat message to the chat history to keep track of the conversation.
+        truncation_reducer.add_message(answer.value[0])
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py b/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py
index 6adde925a390..c556b7e9820c 100644
--- a/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py
+++ b/python/samples/concepts/plugins/openai_function_calling_with_custom_plugin.py
@@ -120,14 +120,13 @@ async def main():
 
         chat_history.add_message(result)
         for item in result.items:
-            await chat._process_function_call(
+            await kernel.invoke_function_call(
                 function_call=item,
-                kernel=kernel,
                 chat_history=chat_history,
                 arguments=KernelArguments(),
                 function_call_count=1,
                 request_index=0,
-                function_call_behavior=settings.function_choice_behavior,
+                function_behavior=settings.function_choice_behavior,
             )
 
 
diff --git a/python/samples/concepts/reasoning/simple_reasoning_function_calling.py b/python/samples/concepts/reasoning/simple_reasoning_function_calling.py
index 238d69753f88..0da02adacefe 100644
--- a/python/samples/concepts/reasoning/simple_reasoning_function_calling.py
+++ b/python/samples/concepts/reasoning/simple_reasoning_function_calling.py
@@ -70,7 +70,9 @@
 Note: Unsupported features may be added in future updates.
 """
 
-chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.OPENAI)
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(
+    Services.OPENAI, instruction_role="developer"
+)
 
 # This is the system message that gives the chatbot its personality.
 developer_message = """
diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py
index 40dd127eda47..ee4d6d2dfa67 100644
--- a/python/samples/concepts/setup/chat_completion_services.py
+++ b/python/samples/concepts/setup/chat_completion_services.py
@@ -32,28 +32,50 @@ class Services(str, Enum):
 
 def get_chat_completion_service_and_request_settings(
     service_name: Services,
+    instruction_role: str | None = None,
 ) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]:
-    """Return service and request settings."""
+    """Return service and request settings.
+
+    Args:
+        service_name (Services): The service name.
+        instruction_role (str | None): The role to use for 'instruction' messages, for example,
+            'system' or 'developer'. Defaults to 'system'. Currently only supported for OpenAI reasoning models.
+    """
+    # Use lambdas or functions to delay instantiation
     chat_services = {
-        Services.OPENAI: get_openai_chat_completion_service_and_request_settings,
-        Services.AZURE_OPENAI: get_azure_openai_chat_completion_service_and_request_settings,
-        Services.AZURE_AI_INFERENCE: get_azure_ai_inference_chat_completion_service_and_request_settings,
-        Services.ANTHROPIC: get_anthropic_chat_completion_service_and_request_settings,
-        Services.BEDROCK: get_bedrock_chat_completion_service_and_request_settings,
-        Services.GOOGLE_AI: get_google_ai_chat_completion_service_and_request_settings,
-        Services.MISTRAL_AI: get_mistral_ai_chat_completion_service_and_request_settings,
-        Services.OLLAMA: get_ollama_chat_completion_service_and_request_settings,
-        Services.ONNX: get_onnx_chat_completion_service_and_request_settings,
-        Services.VERTEX_AI: get_vertex_ai_chat_completion_service_and_request_settings,
+        Services.OPENAI: lambda: get_openai_chat_completion_service_and_request_settings(
+            instruction_role=instruction_role
+        ),
+        Services.AZURE_OPENAI: lambda: get_azure_openai_chat_completion_service_and_request_settings(
+            instruction_role=instruction_role
+        ),
+        Services.AZURE_AI_INFERENCE: lambda: get_azure_ai_inference_chat_completion_service_and_request_settings(
+            instruction_role=instruction_role
+        ),
+        Services.ANTHROPIC: lambda: get_anthropic_chat_completion_service_and_request_settings(),
+        Services.BEDROCK: lambda: get_bedrock_chat_completion_service_and_request_settings(),
+        Services.GOOGLE_AI: lambda: get_google_ai_chat_completion_service_and_request_settings(),
+        Services.MISTRAL_AI: lambda: get_mistral_ai_chat_completion_service_and_request_settings(),
+        Services.OLLAMA: lambda: get_ollama_chat_completion_service_and_request_settings(),
+        Services.ONNX: lambda: get_onnx_chat_completion_service_and_request_settings(),
+        Services.VERTEX_AI: lambda: get_vertex_ai_chat_completion_service_and_request_settings(),
     }
+
+    # Call the appropriate lambda or function based on the service name
+    if service_name not in chat_services:
+        raise ValueError(f"Unsupported service name: {service_name}")
     return chat_services[service_name]()
 
 
-def get_openai_chat_completion_service_and_request_settings() -> tuple[
-    "ChatCompletionClientBase", "PromptExecutionSettings"
-]:
+def get_openai_chat_completion_service_and_request_settings(
+    instruction_role: str | None = None,
+) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]:
     """Return OpenAI chat completion service and request settings.
 
+    Args:
+        instruction_role (str | None): The role to use for 'instruction' messages, for example,
+            'developer' or 'system'. (Optional)
+
     The service credentials can be read by 3 ways:
     1. Via the constructor
     2. Via the environment variables
@@ -70,7 +92,7 @@ def get_openai_chat_completion_service_and_request_settings() -> tuple[
         OpenAIChatPromptExecutionSettings,
     )
 
-    chat_service = OpenAIChatCompletion(service_id=service_id)
+    chat_service = OpenAIChatCompletion(service_id=service_id, instruction_role=instruction_role)
     request_settings = OpenAIChatPromptExecutionSettings(
         service_id=service_id, max_tokens=2000, temperature=0.7, top_p=0.8
     )
@@ -78,11 +100,15 @@ def get_openai_chat_completion_service_and_request_settings() -> tuple[
     return chat_service, request_settings
 
 
-def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[
-    "ChatCompletionClientBase", "PromptExecutionSettings"
-]:
+def get_azure_openai_chat_completion_service_and_request_settings(
+    instruction_role: str | None = None,
+) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]:
     """Return Azure OpenAI chat completion service and request settings.
 
+    Args:
+        instruction_role (str | None): The role to use for 'instruction' messages, for example,
+            'developer' or 'system'. (Optional)
+
     The service credentials can be read by 3 ways:
     1. Via the constructor
     2. Via the environment variables
@@ -99,15 +125,15 @@ def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[
         AzureChatPromptExecutionSettings,
     )
 
-    chat_service = AzureChatCompletion(service_id=service_id)
+    chat_service = AzureChatCompletion(service_id=service_id, instruction_role=instruction_role)
     request_settings = AzureChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
-def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tuple[
-    "ChatCompletionClientBase", "PromptExecutionSettings"
-]:
+def get_azure_ai_inference_chat_completion_service_and_request_settings(
+    instruction_role: str | None = None,
+) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]:
     """Return Azure AI Inference chat completion service and request settings.
 
     The service credentials can be read by 3 ways:
@@ -129,6 +155,7 @@ def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tup
     chat_service = AzureAIInferenceChatCompletion(
         service_id=service_id,
         ai_model_id="id",  # The model ID is simply an identifier as the model id cannot be obtained programmatically.
+        instruction_role=instruction_role,
     )
     request_settings = AzureAIInferenceChatPromptExecutionSettings(service_id=service_id)
 
diff --git a/python/semantic_kernel/agents/agent.py b/python/semantic_kernel/agents/agent.py
index 71728feb8362..56cd115a7751 100644
--- a/python/semantic_kernel/agents/agent.py
+++ b/python/semantic_kernel/agents/agent.py
@@ -2,17 +2,21 @@
 
 import uuid
 from collections.abc import Iterable
-from typing import ClassVar
+from typing import TYPE_CHECKING, ClassVar
 
 from pydantic import Field
 
 from semantic_kernel.agents.channels.agent_channel import AgentChannel
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.kernel import Kernel
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 from semantic_kernel.utils.experimental_decorator import experimental_class
 from semantic_kernel.utils.naming import generate_random_ascii_name
 from semantic_kernel.utils.validation import AGENT_NAME_REGEX
 
+if TYPE_CHECKING:
+    from semantic_kernel.contents.chat_history import ChatHistory
+
 
 @experimental_class
 class Agent(KernelBaseModel):
@@ -37,6 +41,22 @@ class Agent(KernelBaseModel):
     instructions: str | None = None
     kernel: Kernel = Field(default_factory=Kernel)
     channel_type: ClassVar[type[AgentChannel] | None] = None
+    history_reducer: ChatHistoryReducer | None = None
+
+    async def reduce_history(self, history: "ChatHistory") -> bool:
+        """Perform the reduction on the provided history, returning True if reduction occurred."""
+        if self.history_reducer is None:
+            return False
+
+        self.history_reducer.messages = history.messages
+
+        new_messages = await self.history_reducer.reduce()
+        if new_messages is not None:
+            history.messages.clear()
+            history.messages.extend(new_messages)
+            return True
+
+        return False
 
     def get_channel_keys(self) -> Iterable[str]:
         """Get the channel keys.
@@ -46,7 +66,11 @@ def get_channel_keys(self) -> Iterable[str]:
         """
         if not self.channel_type:
             raise NotImplementedError("Unable to get channel keys. Channel type not configured.")
-        return [self.channel_type.__name__]
+        yield self.channel_type.__name__
+
+        if self.history_reducer is not None:
+            yield self.history_reducer.__class__.__name__
+            yield str(self.history_reducer.__hash__)
 
     async def create_channel(self) -> AgentChannel:
         """Create a channel.
diff --git a/python/semantic_kernel/agents/channels/chat_history_channel.py b/python/semantic_kernel/agents/channels/chat_history_channel.py
index 563efeaef610..057c005b3d3d 100644
--- a/python/semantic_kernel/agents/channels/chat_history_channel.py
+++ b/python/semantic_kernel/agents/channels/chat_history_channel.py
@@ -64,6 +64,9 @@ async def invoke(
                 f"Invalid channel binding for agent with id: `{id}` with name: ({type(agent).__name__})"
             )
 
+        # pre-process history reduction
+        await agent.reduce_history(self)
+
         message_count = len(self.messages)
         mutated_history = set()
         message_queue: Deque[ChatMessageContent] = deque()
@@ -119,6 +122,9 @@ async def invoke_stream(
                 f"Invalid channel binding for agent with id: `{id}` with name: ({type(agent).__name__})"
             )
 
+        # pre-process history reduction
+        await agent.reduce_history(self)
+
         message_count = len(self.messages)
 
         async for response_message in agent.invoke_stream(self):
diff --git a/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py b/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py
index 352787e81d8c..cbdb218ad616 100644
--- a/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py
+++ b/python/semantic_kernel/agents/chat_completion/chat_completion_agent.py
@@ -12,6 +12,7 @@
 from semantic_kernel.const import DEFAULT_SERVICE_NAME
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.exceptions import KernelServiceNotFoundError
@@ -46,6 +47,7 @@ def __init__(
         description: str | None = None,
         instructions: str | None = None,
         execution_settings: PromptExecutionSettings | None = None,
+        history_reducer: ChatHistoryReducer | None = None,
     ) -> None:
         """Initialize a new instance of ChatCompletionAgent.
 
@@ -59,6 +61,7 @@ def __init__(
             description: The description of the agent. (optional)
             instructions: The instructions for the agent. (optional)
             execution_settings: The execution settings for the agent. (optional)
+            history_reducer: The history reducer for the agent. (optional)
         """
         if not service_id:
             service_id = DEFAULT_SERVICE_NAME
@@ -75,6 +78,8 @@ def __init__(
             args["id"] = id
         if kernel is not None:
             args["kernel"] = kernel
+        if history_reducer is not None:
+            args["history_reducer"] = history_reducer
         super().__init__(**args)
 
     @trace_agent_invocation
diff --git a/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py b/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py
index 3879fab95aca..65f7dfb2ae0b 100644
--- a/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py
+++ b/python/semantic_kernel/agents/strategies/selection/kernel_function_selection_strategy.py
@@ -1,6 +1,13 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
+import sys
+
+if sys.version_info >= (3, 12):
+    from typing import override  # pragma: no cover
+else:
+    from typing_extensions import override  # pragma: no cover
+
 from collections.abc import Callable
 from inspect import isawaitable
 from typing import TYPE_CHECKING, ClassVar
@@ -9,6 +16,7 @@
 
 from semantic_kernel.agents.strategies.selection.selection_strategy import SelectionStrategy
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.exceptions.agent_exceptions import AgentExecutionException
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.functions.kernel_function import KernelFunction
@@ -34,9 +42,11 @@ class KernelFunctionSelectionStrategy(SelectionStrategy):
     function: KernelFunction
     kernel: Kernel
     result_parser: Callable[..., str] = Field(default_factory=lambda: (lambda: ""))
+    history_reducer: ChatHistoryReducer | None = None
 
-    async def next(self, agents: list["Agent"], history: list[ChatMessageContent]) -> "Agent":
-        """Check if the agent should terminate.
+    @override
+    async def select_agent(self, agents: list["Agent"], history: list[ChatMessageContent]) -> "Agent":
+        """Select the next agent to interact with.
 
         Args:
             agents: The list of agents to select from.
@@ -48,6 +58,12 @@ async def next(self, agents: list["Agent"], history: list[ChatMessageContent]) -
         Raises:
             AgentExecutionException: If the strategy fails to execute the function or select the next agent
         """
+        if self.history_reducer is not None:
+            self.history_reducer.messages = history
+            reduced_history = await self.history_reducer.reduce()
+            if reduced_history is not None:
+                history = reduced_history.messages
+
         original_arguments = self.arguments or KernelArguments()
         execution_settings = original_arguments.execution_settings or {}
 
diff --git a/python/semantic_kernel/agents/strategies/selection/selection_strategy.py b/python/semantic_kernel/agents/strategies/selection/selection_strategy.py
index cef5625432c9..6f453a50a876 100644
--- a/python/semantic_kernel/agents/strategies/selection/selection_strategy.py
+++ b/python/semantic_kernel/agents/strategies/selection/selection_strategy.py
@@ -1,22 +1,29 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from abc import ABC, abstractmethod
+from abc import ABC
 from typing import TYPE_CHECKING
 
+from semantic_kernel.agents import Agent
+from semantic_kernel.exceptions.agent_exceptions import AgentExecutionException
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
 if TYPE_CHECKING:
-    from semantic_kernel.agents import Agent
     from semantic_kernel.contents.chat_message_content import ChatMessageContent
 
 
 @experimental_class
 class SelectionStrategy(KernelBaseModel, ABC):
-    """Contract for an agent selection strategy."""
+    """Base strategy class for selecting the next agent in a chat."""
 
-    @abstractmethod
-    async def next(self, agents: list["Agent"], history: list["ChatMessageContent"]) -> "Agent":
+    has_selected: bool = False
+    initial_agent: Agent | None = None
+
+    async def next(
+        self,
+        agents: list[Agent],
+        history: list["ChatMessageContent"],
+    ) -> Agent:
         """Select the next agent to interact with.
 
         Args:
@@ -24,6 +31,27 @@ async def next(self, agents: list["Agent"], history: list["ChatMessageContent"])
             history: The history of messages in the conversation.
 
         Returns:
-            The next agent to interact with.
+            The agent who takes the next turn.
+        """
+        if not agents and self.initial_agent is None:
+            raise AgentExecutionException("Agent Failure - No agents present to select.")
+
+        # If it's the first selection and we have an initial agent, use it
+        if not self.has_selected and self.initial_agent is not None:
+            agent = self.initial_agent
+        else:
+            agent = await self.select_agent(agents, history)
+
+        self.has_selected = True
+        return agent
+
+    async def select_agent(
+        self,
+        agents: list[Agent],
+        history: list["ChatMessageContent"],
+    ) -> Agent:
+        """Determines which agent goes next. Override for custom logic.
+
+        By default, this fallback returns the first agent in the list.
         """
-        ...
+        return agents[0]
diff --git a/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py b/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py
index 8304f405df7e..b60fc5f0f21f 100644
--- a/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py
+++ b/python/semantic_kernel/agents/strategies/selection/sequential_selection_strategy.py
@@ -1,5 +1,13 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import logging
+import sys
+
+if sys.version_info >= (3, 12):
+    from typing import override  # pragma: no cover
+else:
+    from typing_extensions import override  # pragma: no cover
+
 from typing import TYPE_CHECKING
 
 from pydantic import PrivateAttr
@@ -12,34 +20,61 @@
     from semantic_kernel.contents.chat_message_content import ChatMessageContent
 
 
+logger: logging.Logger = logging.getLogger(__name__)
+
+
 @experimental_class
 class SequentialSelectionStrategy(SelectionStrategy):
-    """A selection strategy that selects agents in a sequential order."""
+    """Round-robin turn-taking strategy. Agent order is based on the order in which they joined."""
+
+    _index: int = PrivateAttr(default=-1)
 
-    _index: int = PrivateAttr(default=0)
+    def reset(self) -> None:
+        """Reset selection to the initial/first agent."""
+        self._index = -1
 
-    def reset(self):
-        """Reset the index."""
-        self._index = 0
+    def _increment_index(self, agent_count: int) -> None:
+        """Increment the index in a circular manner."""
+        self._index = (self._index + 1) % agent_count
 
-    async def next(self, agents: list["Agent"], history: list["ChatMessageContent"]) -> "Agent":
-        """Select the next agent to interact with.
+    @override
+    async def select_agent(
+        self,
+        agents: list["Agent"],
+        history: list["ChatMessageContent"],
+    ) -> "Agent":
+        """Select the next agent in a round-robin fashion.
 
         Args:
             agents: The list of agents to select from.
             history: The history of messages in the conversation.
 
         Returns:
-            The next agent to interact with.
+            The agent who takes the next turn.
         """
-        if len(agents) == 0:
-            raise ValueError("No agents to select from")
-
         if self._index >= len(agents):
-            self.reset()
+            self._index = -1
 
-        agent = agents[self._index]
+        if (
+            self.has_selected
+            and self.initial_agent is not None
+            and len(agents) > 0
+            and agents[0] == self.initial_agent
+            and self._index < 0
+        ):
+            # Avoid selecting the same agent twice in a row
+            self._increment_index(len(agents))
 
-        self._index = (self._index + 1) % len(agents)
+        # Main index increment
+        self._increment_index(len(agents))
+
+        # Pick the agent
+        agent = agents[self._index]
 
+        logger.info(
+            "Selected agent at index %d (ID: %s, name: %s)",
+            self._index,
+            agent.id,
+            agent.name,
+        )
         return agent
diff --git a/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py b/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py
index f46cd79704ef..93c59e10ed84 100644
--- a/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py
+++ b/python/semantic_kernel/agents/strategies/termination/kernel_function_termination_strategy.py
@@ -9,6 +9,7 @@
 
 from semantic_kernel.agents.strategies.termination.termination_strategy import TerminationStrategy
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.functions.kernel_function import KernelFunction
 from semantic_kernel.kernel import Kernel
@@ -33,6 +34,7 @@ class KernelFunctionTerminationStrategy(TerminationStrategy):
     function: KernelFunction
     kernel: Kernel
     result_parser: Callable[..., bool] = Field(default_factory=lambda: (lambda: True))
+    history_reducer: ChatHistoryReducer | None = None
 
     async def should_agent_terminate(
         self,
@@ -48,6 +50,12 @@ async def should_agent_terminate(
         Returns:
             True if the agent should terminate, False otherwise
         """
+        if self.history_reducer is not None:
+            self.history_reducer.messages = history
+            reduced_history = await self.history_reducer.reduce()
+            if reduced_history is not None:
+                history = reduced_history.messages
+
         original_arguments = self.arguments or KernelArguments()
         execution_settings = original_arguments.execution_settings or {}
 
diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
index 8541fd0dc651..c18fcb30c732 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
@@ -5,7 +5,7 @@
 
 from pydantic import Field, model_validator
 
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
+from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError
 
diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py
index 87e967184234..f5baec134528 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py
@@ -4,7 +4,7 @@
 import logging
 import sys
 from collections.abc import AsyncGenerator, Callable
-from typing import Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
@@ -32,7 +32,6 @@
 )
 from semantic_kernel.connectors.ai.anthropic.settings.anthropic_settings import AnthropicSettings
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_history import ChatHistory
@@ -56,6 +55,9 @@
     trace_streaming_chat_completion,
 )
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+
 # map finish reasons from Anthropic to Semantic Kernel
 ANTHROPIC_TO_SEMANTIC_KERNEL_FINISH_REASON_MAP = {
     "end_turn": SemanticKernelFinishReason.STOP,
@@ -136,7 +138,7 @@ def service_url(self) -> str | None:
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_call_configuration
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/utils.py b/python/semantic_kernel/connectors/ai/anthropic/services/utils.py
index 31acecb0468f..e41905e1cc91 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/services/utils.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/services/utils.py
@@ -3,11 +3,9 @@
 import json
 import logging
 from collections.abc import Callable, Mapping
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
@@ -18,6 +16,11 @@
 logger: logging.Logger = logging.getLogger(__name__)
 
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
+
 def _format_user_message(message: ChatMessageContent) -> dict[str, Any]:
     """Format a user message to the expected object for the Anthropic client.
 
@@ -118,8 +121,8 @@ def _format_tool_message(message: ChatMessageContent) -> dict[str, Any]:
 
 
 def update_settings_from_function_call_configuration(
-    function_choice_configuration: FunctionCallChoiceConfiguration,
-    settings: PromptExecutionSettings,
+    function_choice_configuration: "FunctionCallChoiceConfiguration",
+    settings: "PromptExecutionSettings",
     type: FunctionChoiceType,
 ) -> None:
     """Update the settings from a FunctionChoiceConfiguration."""
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py
index 772ddb28e6c7..64e0806804e1 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py
@@ -52,6 +52,7 @@ def __init__(
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
         client: ChatCompletionsClient | EmbeddingsClient | None = None,
+        instruction_role: str | None = None,
         **kwargs: Any,
     ) -> None:
         """Initialize the Azure AI Inference Chat Completion service.
@@ -68,6 +69,7 @@ def __init__(
             env_file_path (str | None): The path to the environment file. (Optional)
             env_file_encoding (str | None): The encoding of the environment file. (Optional)
             client (ChatCompletionsClient | None): The Azure AI Inference client to use. (Optional)
+            instruction_role (str | None): The role to use for 'instruction' messages. (Optional)
             **kwargs: Additional keyword arguments.
 
         Raises:
@@ -100,11 +102,16 @@ def __init__(
                     user_agent=SEMANTIC_KERNEL_USER_AGENT,
                 )
 
-        super().__init__(
-            client=client,
-            managed_client=managed_client,
+        args: dict[str, Any] = {
+            "client": client,
+            "managed_client": managed_client,
             **kwargs,
-        )
+        }
+
+        if instruction_role:
+            args["instruction_role"] = instruction_role
+
+        super().__init__(**args)
 
     def __del__(self) -> None:
         """Close the client when the object is deleted."""
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index 8ac10561f142..9a43591938e6 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -30,7 +30,6 @@
 from semantic_kernel.connectors.ai.azure_ai_inference.services.utils import MESSAGE_CONVERTERS
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.contents.chat_history import ChatHistory
@@ -46,6 +45,7 @@
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
 logger: logging.Logger = logging.getLogger(__name__)
@@ -66,6 +66,7 @@ def __init__(
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
         client: ChatCompletionsClient | None = None,
+        instruction_role: str | None = None,
     ) -> None:
         """Initialize the Azure AI Inference Chat Completion service.
 
@@ -82,20 +83,29 @@ def __init__(
             env_file_path (str | None): The path to the environment file. (Optional)
             env_file_encoding (str | None): The encoding of the environment file. (Optional)
             client (ChatCompletionsClient | None): The Azure AI Inference client to use. (Optional)
+            instruction_role (str | None): The role to use for 'instruction' messages, for example, summarization
+                prompts could use `developer` or `system`. (Optional)
 
         Raises:
             ServiceInitializationError: If an error occurs during initialization.
         """
-        super().__init__(
-            ai_model_id=ai_model_id,
-            service_id=service_id or ai_model_id,
-            client_type=AzureAIInferenceClientType.ChatCompletions,
-            api_key=api_key,
-            endpoint=endpoint,
-            env_file_path=env_file_path,
-            env_file_encoding=env_file_encoding,
-            client=client,
-        )
+        args: dict[str, Any] = {
+            "ai_model_id": ai_model_id,
+            "api_key": api_key,
+            "client_type": AzureAIInferenceClientType.ChatCompletions,
+            "client": client,
+            "endpoint": endpoint,
+            "env_file_path": env_file_path,
+            "env_file_encoding": env_file_encoding,
+        }
+
+        if service_id:
+            args["service_id"] = service_id
+
+        if instruction_role:
+            args["instruction_role"] = instruction_role
+
+        super().__init__(**args)
 
     # region Overriding base class methods
 
@@ -179,7 +189,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_call_configuration
 
     @override
@@ -199,7 +209,13 @@ def _prepare_chat_history_for_request(
         chat_request_messages: list[ChatRequestMessage] = []
 
         for message in chat_history.messages:
-            chat_request_messages.append(MESSAGE_CONVERTERS[message.role](message))
+            # If instruction_role is 'developer' and the message role is 'system', change it to 'developer'
+            role = (
+                AuthorRole.DEVELOPER
+                if self.instruction_role == "developer" and message.role == AuthorRole.SYSTEM
+                else message.role
+            )
+            chat_request_messages.append(MESSAGE_CONVERTERS[role](message))
 
         return chat_request_messages
 
diff --git a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py
index c163b6ffda74..5c4f3e6cd192 100644
--- a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py
@@ -30,7 +30,6 @@
 )
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.contents.chat_message_content import ITEM_TYPES, ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -52,6 +51,7 @@
 )
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
     from semantic_kernel.contents.chat_history import ChatHistory
 
@@ -160,7 +160,7 @@ async def _inner_get_streaming_chat_message_contents(
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_choice_configuration
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py b/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py
index 6274bdb01ffe..7607696559c5 100644
--- a/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py
+++ b/python/semantic_kernel/connectors/ai/bedrock/services/model_provider/utils.py
@@ -4,12 +4,10 @@
 import json
 from collections.abc import Callable, Mapping
 from functools import partial
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from semantic_kernel.connectors.ai.bedrock.bedrock_prompt_execution_settings import BedrockChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.const import DEFAULT_FULLY_QUALIFIED_NAME_SEPARATOR
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -20,6 +18,10 @@
 from semantic_kernel.contents.utils.finish_reason import FinishReason
 from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
 
 async def run_in_executor(executor, func, *args, **kwargs):
     """Run a function in an executor."""
@@ -177,8 +179,8 @@ def format_bedrock_function_name_to_kernel_function_fully_qualified_name(bedrock
 
 
 def update_settings_from_function_choice_configuration(
-    function_choice_configuration: FunctionCallChoiceConfiguration,
-    settings: PromptExecutionSettings,
+    function_choice_configuration: "FunctionCallChoiceConfiguration",
+    settings: "PromptExecutionSettings",
     type: FunctionChoiceType,
 ) -> None:
     """Update the settings from a FunctionChoiceConfiguration."""
diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
index de9edf36c268..5c527e994564 100644
--- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
+++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
@@ -9,14 +9,9 @@
 from typing import TYPE_CHECKING, Any, ClassVar
 
 from opentelemetry.trace import Span, Tracer, get_tracer, use_span
+from pydantic import Field
 
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
-from semantic_kernel.connectors.ai.function_calling_utils import (
-    merge_function_results,
-    merge_streaming_function_results,
-)
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.const import AUTO_FUNCTION_INVOCATION_SPAN_NAME
 from semantic_kernel.contents.annotation_content import AnnotationContent
 from semantic_kernel.contents.file_reference_content import FileReferenceContent
@@ -26,6 +21,7 @@
 from semantic_kernel.utils.telemetry.model_diagnostics.gen_ai_attributes import AVAILABLE_FUNCTIONS
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
     from semantic_kernel.contents.chat_history import ChatHistory
     from semantic_kernel.contents.chat_message_content import ChatMessageContent
@@ -41,6 +37,7 @@ class ChatCompletionClientBase(AIServiceClientBase, ABC):
 
     # Connectors that support function calling should set this to True
     SUPPORTS_FUNCTION_CALLING: ClassVar[bool] = False
+    instruction_role: str = Field(default_factory=lambda: "system", description="The role for instructions.")
 
     # region Internal methods to be implemented by the derived classes
 
@@ -102,6 +99,10 @@ async def get_chat_message_contents(
         Returns:
             A list of chat message contents representing the response(s) from the LLM.
         """
+        from semantic_kernel.connectors.ai.function_calling_utils import (
+            merge_function_results,
+        )
+
         # Create a copy of the settings to avoid modifying the original settings
         settings = copy.deepcopy(settings)
         # Later on, we already use the tools or equivalent settings, we cast here.
@@ -111,15 +112,6 @@ async def get_chat_message_contents(
         if not self.SUPPORTS_FUNCTION_CALLING:
             return await self._inner_get_chat_message_contents(chat_history, settings)
 
-        # For backwards compatibility we need to convert the `FunctionCallBehavior` to `FunctionChoiceBehavior`
-        # if this method is called with a `FunctionCallBehavior` object as part of the settings
-        if hasattr(settings, "function_call_behavior") and isinstance(
-            settings.function_call_behavior, FunctionCallBehavior
-        ):
-            settings.function_choice_behavior = FunctionChoiceBehavior.from_function_call_behavior(
-                settings.function_call_behavior
-            )
-
         kernel: "Kernel" = kwargs.get("kernel")  # type: ignore
         if settings.function_choice_behavior is not None:
             if kernel is None:
@@ -217,6 +209,10 @@ async def get_streaming_chat_message_contents(
         Yields:
             A stream representing the response(s) from the LLM.
         """
+        from semantic_kernel.connectors.ai.function_calling_utils import (
+            merge_streaming_function_results,
+        )
+
         # Create a copy of the settings to avoid modifying the original settings
         settings = copy.deepcopy(settings)
         # Later on, we already use the tools or equivalent settings, we cast here.
@@ -230,15 +226,6 @@ async def get_streaming_chat_message_contents(
                 yield streaming_chat_message_contents
             return
 
-        # For backwards compatibility we need to convert the `FunctionCallBehavior` to `FunctionChoiceBehavior`
-        # if this method is called with a `FunctionCallBehavior` object as part of the settings
-        if hasattr(settings, "function_call_behavior") and isinstance(
-            settings.function_call_behavior, FunctionCallBehavior
-        ):
-            settings.function_choice_behavior = FunctionChoiceBehavior.from_function_call_behavior(
-                settings.function_call_behavior
-            )
-
         kernel: "Kernel" = kwargs.get("kernel")  # type: ignore
         if settings.function_choice_behavior is not None:
             if kernel is None:
@@ -397,7 +384,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
 
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         """Return the callback function to update the settings from a function call configuration.
 
         Override this method to provide a custom callback function to
diff --git a/python/semantic_kernel/connectors/ai/function_call_behavior.py b/python/semantic_kernel/connectors/ai/function_call_behavior.py
deleted file mode 100644
index 913df72f7853..000000000000
--- a/python/semantic_kernel/connectors/ai/function_call_behavior.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-from collections.abc import Callable
-from typing import TYPE_CHECKING, Literal
-
-from pydantic.dataclasses import dataclass
-from typing_extensions import deprecated
-
-from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
-from semantic_kernel.kernel_pydantic import KernelBaseModel
-
-if TYPE_CHECKING:
-    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
-    from semantic_kernel.kernel import Kernel
-
-DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS = 5
-
-
-@dataclass
-class FunctionCallConfiguration:
-    """Class that holds the configured functions for function calling."""
-
-    available_functions: list["KernelFunctionMetadata"] | None = None
-    required_functions: list["KernelFunctionMetadata"] | None = None
-
-
-@deprecated("The `FunctionCallBehavior` class is deprecated; use `FunctionChoiceBehavior` instead.", category=None)
-class FunctionCallBehavior(KernelBaseModel):
-    """Class that controls function calling behavior.
-
-    DEPRECATED: This class has been replaced by FunctionChoiceBehavior.
-
-    Args:
-        enable_kernel_functions (bool): Enable kernel functions.
-        max_auto_invoke_attempts (int): The maximum number of auto invoke attempts.
-
-    Attributes:
-        enable_kernel_functions (bool): Enable kernel functions.
-        max_auto_invoke_attempts (int): The maximum number of auto invoke attempts.
-
-    Properties:
-        auto_invoke_kernel_functions: Check if the kernel functions should be auto-invoked.
-            Determined as max_auto_invoke_attempts > 0.
-
-    Methods:
-        configure: Configures the settings for the function call behavior,
-            the default version in this class, does nothing, use subclasses for different behaviors.
-
-    Class methods:
-        AutoInvokeKernelFunctions: Returns KernelFunctions class with auto_invoke enabled, all functions.
-        EnableKernelFunctions: Returns KernelFunctions class with auto_invoke disabled, all functions.
-        EnableFunctions: Set the enable kernel functions flag, filtered functions, auto_invoke optional.
-        RequiredFunction: Set the required function flag, auto_invoke optional.
-
-    """
-
-    enable_kernel_functions: bool = True
-    max_auto_invoke_attempts: int = DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS
-
-    @property
-    def auto_invoke_kernel_functions(self):
-        """Check if the kernel functions should be auto-invoked."""
-        return self.max_auto_invoke_attempts > 0
-
-    @auto_invoke_kernel_functions.setter
-    def auto_invoke_kernel_functions(self, value: bool):
-        """Set the auto_invoke_kernel_functions flag."""
-        if not value:
-            self.max_auto_invoke_attempts = 0
-        else:
-            if self.max_auto_invoke_attempts == 0:
-                self.max_auto_invoke_attempts = DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS
-
-    def configure(
-        self,
-        kernel: "Kernel",
-        update_settings_callback: Callable[..., None],
-        settings: "PromptExecutionSettings",
-    ) -> None:
-        """Configures the settings for the function call behavior.
-
-        Using the base ToolCallBehavior means that you manually have to set tool_choice and tools.
-
-        For different behaviors, use the subclasses of ToolCallBehavior:
-            KernelFunctions (all functions in the Kernel)
-            EnabledFunctions (filtered set of functions from the Kernel)
-            RequiredFunction (a single function)
-
-        By default, the update_settings_callback is called with FunctionCallConfiguration,
-        which contains a list of available functions or a list of required functions, it also
-        takes the PromptExecutionSettings object.
-
-        It should update the prompt execution settings with the available functions or required functions.
-
-        Alternatively you can override this class and add your own logic in the configure method.
-        """
-        return
-
-    @classmethod
-    @deprecated("Use the `FunctionChoiceBehavior` `Auto` class instead.")
-    def AutoInvokeKernelFunctions(cls) -> "KernelFunctions":
-        """Returns KernelFunctions class with auto_invoke enabled."""
-        return KernelFunctions(max_auto_invoke_attempts=DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS)
-
-    @classmethod
-    @deprecated("Use the `FunctionChoiceBehavior` `Auto` class method instead.")
-    def EnableKernelFunctions(cls) -> "KernelFunctions":
-        """Returns KernelFunctions class with auto_invoke disabled.
-
-        Function calls are enabled in this case, just not invoked.
-        """
-        return KernelFunctions(max_auto_invoke_attempts=0)
-
-    @classmethod
-    @deprecated("Use the `FunctionChoiceBehavior` `Auto` class method instead.")
-    def EnableFunctions(
-        cls,
-        auto_invoke: bool = False,
-        *,
-        filters: dict[
-            Literal["excluded_plugins", "included_plugins", "excluded_functions", "included_functions"], list[str]
-        ]
-        | None = {},
-    ) -> "EnabledFunctions":
-        """Set the enable kernel functions flag."""
-        return EnabledFunctions(
-            filters=filters, max_auto_invoke_attempts=DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS if auto_invoke else 0
-        )
-
-    @classmethod
-    @deprecated("Use the `FunctionChoiceBehavior` `Required` class method instead.")
-    def RequiredFunction(
-        cls,
-        auto_invoke: bool = False,
-        *,
-        function_fully_qualified_name: str,
-    ) -> "RequiredFunction":
-        """Set the required function flag."""
-        return RequiredFunction(
-            function_fully_qualified_name=function_fully_qualified_name,
-            max_auto_invoke_attempts=1 if auto_invoke else 0,
-        )
-
-
-@deprecated("Use the `FunctionChoiceBehavior` `Auto` class instead.")
-class KernelFunctions(FunctionCallBehavior):
-    """Function call behavior for making all kernel functions available for tool calls."""
-
-    def configure(
-        self,
-        kernel: "Kernel",
-        update_settings_callback: Callable[..., None],
-        settings: "PromptExecutionSettings",
-    ) -> None:
-        """Set the options for the tool call behavior in the settings."""
-        if self.enable_kernel_functions:
-            update_settings_callback(
-                FunctionCallConfiguration(available_functions=kernel.get_full_list_of_function_metadata()), settings
-            )
-
-
-@deprecated("Use the `FunctionChoiceBehavior` `Auto` class instead.")
-class EnabledFunctions(FunctionCallBehavior):
-    """Function call behavior for making a filtered set of functions available for tool calls."""
-
-    filters: dict[
-        Literal["excluded_plugins", "included_plugins", "excluded_functions", "included_functions"], list[str]
-    ]
-
-    def configure(
-        self,
-        kernel: "Kernel",
-        update_settings_callback: Callable[..., None],
-        settings: "PromptExecutionSettings",
-    ) -> None:
-        """Set the options for the tool call behavior in the settings."""
-        if self.enable_kernel_functions:
-            update_settings_callback(
-                FunctionCallConfiguration(available_functions=kernel.get_list_of_function_metadata(self.filters)),
-                settings,
-            )
-
-
-@deprecated("Use the `FunctionChoiceBehavior` `Required` class instead.")
-class RequiredFunction(FunctionCallBehavior):
-    """Function call behavior for making a single function available for tool calls."""
-
-    function_fully_qualified_name: str
-
-    def configure(
-        self,
-        kernel: "Kernel",
-        update_settings_callback: Callable[..., None],
-        settings: "PromptExecutionSettings",
-    ) -> None:
-        """Set the options for the tool call behavior in the settings."""
-        if not self.enable_kernel_functions:
-            return
-        # since using this always calls this single function, we do not want to allow repeated calls
-        if self.max_auto_invoke_attempts > 1:
-            self.max_auto_invoke_attempts = 1
-        update_settings_callback(
-            FunctionCallConfiguration(
-                required_functions=kernel.get_list_of_function_metadata({
-                    "included_functions": [self.function_fully_qualified_name]
-                })
-            ),
-            settings,
-        )
diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py
index c7ab3dba6b39..7a5c2950c4e0 100644
--- a/python/semantic_kernel/connectors/ai/function_calling_utils.py
+++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py
@@ -3,9 +3,6 @@
 from collections import OrderedDict
 from typing import TYPE_CHECKING, Any
 
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.function_result_content import FunctionResultContent
-from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
 
@@ -15,6 +12,8 @@
         FunctionChoiceType,
     )
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+    from semantic_kernel.contents.chat_message_content import ChatMessageContent
+    from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
     from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
 
 
@@ -80,13 +79,16 @@ def _combine_filter_dicts(*dicts: dict[str, list[str]]) -> dict:
 
 
 def merge_function_results(
-    messages: list[ChatMessageContent],
-) -> list[ChatMessageContent]:
+    messages: list["ChatMessageContent"],
+) -> list["ChatMessageContent"]:
     """Combine multiple function result content types to one chat message content type.
 
     This method combines the FunctionResultContent items from separate ChatMessageContent messages,
     and is used in the event that the `context.terminate = True` condition is met.
     """
+    from semantic_kernel.contents.chat_message_content import ChatMessageContent
+    from semantic_kernel.contents.function_result_content import FunctionResultContent
+
     items: list[Any] = []
     for message in messages:
         items.extend([item for item in message.items if isinstance(item, FunctionResultContent)])
@@ -99,10 +101,10 @@ def merge_function_results(
 
 
 def merge_streaming_function_results(
-    messages: list[ChatMessageContent | StreamingChatMessageContent],
+    messages: list["ChatMessageContent | StreamingChatMessageContent"],
     ai_model_id: str,
     function_invoke_attempt: int,
-) -> list[StreamingChatMessageContent]:
+) -> list["StreamingChatMessageContent"]:
     """Combine multiple streaming function result content types to one streaming chat message content type.
 
     This method combines the FunctionResultContent items from separate StreamingChatMessageContent messages,
@@ -116,6 +118,9 @@ def merge_streaming_function_results(
     Returns:
         The combined streaming chat message content type.
     """
+    from semantic_kernel.contents.function_result_content import FunctionResultContent
+    from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+
     items: list[Any] = []
     for message in messages:
         items.extend([item for item in message.items if isinstance(item, FunctionResultContent)])
diff --git a/python/semantic_kernel/connectors/ai/function_choice_behavior.py b/python/semantic_kernel/connectors/ai/function_choice_behavior.py
index 759274d632f2..f32a57e26952 100644
--- a/python/semantic_kernel/connectors/ai/function_choice_behavior.py
+++ b/python/semantic_kernel/connectors/ai/function_choice_behavior.py
@@ -2,18 +2,14 @@
 
 import logging
 from collections.abc import Callable
-from enum import Enum
 from typing import TYPE_CHECKING, Literal, TypeVar
 
-from typing_extensions import deprecated
-
-from semantic_kernel.connectors.ai.function_calling_utils import _combine_filter_dicts
+from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType
 from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
 if TYPE_CHECKING:
-    from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
     from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
     from semantic_kernel.kernel import Kernel
@@ -27,15 +23,6 @@
 _T = TypeVar("_T", bound="FunctionChoiceBehavior")
 
 
-@experimental_class
-class FunctionChoiceType(Enum):
-    """The type of function choice behavior."""
-
-    AUTO = "auto"
-    NONE = "none"
-    REQUIRED = "required"
-
-
 @experimental_class
 class FunctionChoiceBehavior(KernelBaseModel):
     """Class that controls function choice behavior.
@@ -75,31 +62,6 @@ class FunctionChoiceBehavior(KernelBaseModel):
     ) = None
     type_: FunctionChoiceType | None = None
 
-    @classmethod
-    @deprecated("The `FunctionCallBehavior` class is deprecated; use `FunctionChoiceBehavior` instead.")
-    def from_function_call_behavior(cls: type[_T], behavior: "FunctionCallBehavior") -> _T:
-        """Create a FunctionChoiceBehavior from a FunctionCallBehavior."""
-        from semantic_kernel.connectors.ai.function_call_behavior import (
-            EnabledFunctions,
-            KernelFunctions,
-            RequiredFunction,
-        )
-
-        if isinstance(behavior, (EnabledFunctions, KernelFunctions)):
-            return cls.Auto(
-                auto_invoke=behavior.auto_invoke_kernel_functions,
-                filters=behavior.filters if hasattr(behavior, "filters") else None,
-            )
-        if isinstance(behavior, (RequiredFunction)):
-            return cls.Required(
-                auto_invoke=behavior.auto_invoke_kernel_functions,
-                filters={"included_functions": [behavior.function_fully_qualified_name]},
-            )
-        return cls(
-            enable_kernel_functions=behavior.enable_kernel_functions,
-            maximum_auto_invoke_attempts=behavior.max_auto_invoke_attempts,
-        )
-
     @property
     def auto_invoke_kernel_functions(self):
         """Return True if auto_invoke_kernel_functions is enabled."""
@@ -218,6 +180,8 @@ def Required(
     @classmethod
     def from_dict(cls: type[_T], data: dict) -> _T:
         """Create a FunctionChoiceBehavior from a dictionary."""
+        from semantic_kernel.connectors.ai.function_calling_utils import _combine_filter_dicts
+
         type_map = {
             "auto": cls.Auto,
             "none": cls.NoneInvoke,
diff --git a/python/semantic_kernel/connectors/ai/function_choice_type.py b/python/semantic_kernel/connectors/ai/function_choice_type.py
new file mode 100644
index 000000000000..d4bc2b3a598f
--- /dev/null
+++ b/python/semantic_kernel/connectors/ai/function_choice_type.py
@@ -0,0 +1,14 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from enum import Enum
+
+from semantic_kernel.utils.experimental_decorator import experimental_class
+
+
+@experimental_class
+class FunctionChoiceType(Enum):
+    """The type of function choice behavior."""
+
+    AUTO = "auto"
+    NONE = "none"
+    REQUIRED = "required"
diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py
index df8f64cf4c6c..b7005c3c1f5d 100644
--- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py
@@ -5,6 +5,11 @@
 from collections.abc import AsyncGenerator, Callable
 from typing import TYPE_CHECKING, Any, ClassVar
 
+if sys.version_info >= (3, 12):
+    from typing import override  # pragma: no cover
+else:
+    from typing_extensions import override  # pragma: no cover
+
 import google.generativeai as genai
 from google.generativeai import GenerativeModel
 from google.generativeai.protos import Candidate, Content
@@ -13,7 +18,6 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.google.google_ai.google_ai_prompt_execution_settings import (
     GoogleAIChatPromptExecutionSettings,
@@ -50,13 +54,9 @@
 )
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
-if sys.version_info >= (3, 12):
-    from typing import override  # pragma: no cover
-else:
-    from typing_extensions import override  # pragma: no cover
-
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -186,7 +186,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_choice_configuration
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py
index 6086d0167694..77c88526eedb 100644
--- a/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py
+++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py
@@ -2,11 +2,10 @@
 
 import json
 import logging
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from google.generativeai.protos import Blob, Candidate, FunctionCall, FunctionResponse, Part
 
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.google.google_ai.google_ai_prompt_execution_settings import (
     GoogleAIChatPromptExecutionSettings,
@@ -15,7 +14,6 @@
     FUNCTION_CHOICE_TYPE_TO_GOOGLE_FUNCTION_CALLING_MODE,
     GEMINI_FUNCTION_NAME_SEPARATOR,
 )
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
@@ -25,6 +23,10 @@
 from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError
 from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -148,8 +150,8 @@ def kernel_function_metadata_to_google_ai_function_call_format(metadata: KernelF
 
 
 def update_settings_from_function_choice_configuration(
-    function_choice_configuration: FunctionCallChoiceConfiguration,
-    settings: PromptExecutionSettings,
+    function_choice_configuration: "FunctionCallChoiceConfiguration",
+    settings: "PromptExecutionSettings",
     type: FunctionChoiceType,
 ) -> None:
     """Update the settings from a FunctionChoiceConfiguration."""
diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py
index 18f5b2feb6ca..f3211066d466 100644
--- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py
+++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py
@@ -2,12 +2,12 @@
 
 import json
 import logging
+from typing import TYPE_CHECKING
 
 from google.cloud.aiplatform_v1beta1.types.content import Blob, Candidate, Part
 from google.cloud.aiplatform_v1beta1.types.tool import FunctionCall, FunctionResponse
 from vertexai.generative_models import FunctionDeclaration, Tool, ToolConfig
 
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.google.shared_utils import (
     FUNCTION_CHOICE_TYPE_TO_GOOGLE_FUNCTION_CALLING_MODE,
@@ -16,7 +16,6 @@
 from semantic_kernel.connectors.ai.google.vertex_ai.vertex_ai_prompt_execution_settings import (
     VertexAIChatPromptExecutionSettings,
 )
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
@@ -26,6 +25,10 @@
 from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError
 from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -149,8 +152,8 @@ def kernel_function_metadata_to_vertex_ai_function_call_format(metadata: KernelF
 
 
 def update_settings_from_function_choice_configuration(
-    function_choice_configuration: FunctionCallChoiceConfiguration,
-    settings: PromptExecutionSettings,
+    function_choice_configuration: "FunctionCallChoiceConfiguration",
+    settings: "PromptExecutionSettings",
     type: FunctionChoiceType,
 ) -> None:
     """Update the settings from a FunctionChoiceConfiguration."""
diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py
index 6372c71c5b1c..bd7c1346accf 100644
--- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py
@@ -2,7 +2,12 @@
 
 import sys
 from collections.abc import AsyncGenerator, AsyncIterable, Callable
-from typing import Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar
+
+if sys.version_info >= (3, 12):
+    from typing import override  # pragma: no cover
+else:
+    from typing_extensions import override  # pragma: no cover
 
 import vertexai
 from google.cloud.aiplatform_v1beta1.types.content import Content
@@ -11,7 +16,6 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.google.shared_utils import (
     filter_system_message,
@@ -29,7 +33,6 @@
     VertexAIChatPromptExecutionSettings,
 )
 from semantic_kernel.connectors.ai.google.vertex_ai.vertex_ai_settings import VertexAISettings
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ITEM_TYPES, ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -48,10 +51,9 @@
     trace_streaming_chat_completion,
 )
 
-if sys.version_info >= (3, 12):
-    from typing import override  # pragma: no cover
-else:
-    from typing_extensions import override  # pragma: no cover
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
 
 class VertexAIChatCompletion(VertexAIBase, ChatCompletionClientBase):
@@ -181,7 +183,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_choice_configuration
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py
index b374235225a4..2405897a6c39 100644
--- a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py
@@ -3,7 +3,7 @@
 import logging
 import sys
 from collections.abc import AsyncGenerator, Callable
-from typing import Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
@@ -24,7 +24,6 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.mistral_ai.prompt_execution_settings.mistral_ai_prompt_execution_settings import (
@@ -32,7 +31,6 @@
 )
 from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_base import MistralAIBase
 from semantic_kernel.connectors.ai.mistral_ai.settings.mistral_ai_settings import MistralAISettings
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents import (
     ChatMessageContent,
     FunctionCallContent,
@@ -50,6 +48,10 @@
     trace_streaming_chat_completion,
 )
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -315,7 +317,7 @@ def update_settings_from_function_call_configuration_mistral(
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return self.update_settings_from_function_call_configuration_mistral
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py
index baf2d04f2914..103133af2c9f 100644
--- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py
@@ -17,7 +17,6 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
 from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import OllamaChatPromptExecutionSettings
 from semantic_kernel.connectors.ai.ollama.ollama_settings import OllamaSettings
@@ -45,6 +44,7 @@
 )
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
 CMC_TYPE = TypeVar("CMC_TYPE", bound=ChatMessageContent)
@@ -137,7 +137,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_choice_configuration
 
     @override
diff --git a/python/semantic_kernel/connectors/ai/ollama/services/utils.py b/python/semantic_kernel/connectors/ai/ollama/services/utils.py
index 7cf0e6e225cb..9745a4063484 100644
--- a/python/semantic_kernel/connectors/ai/ollama/services/utils.py
+++ b/python/semantic_kernel/connectors/ai/ollama/services/utils.py
@@ -2,19 +2,22 @@
 
 import json
 from collections.abc import Callable, Mapping
+from typing import TYPE_CHECKING
 
 from ollama._types import Message
 
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
 from semantic_kernel.contents.image_content import ImageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
 
 def _format_system_message(message: ChatMessageContent) -> Message:
     """Format a system message to the expected object for the client.
@@ -110,8 +113,8 @@ def _format_tool_message(message: ChatMessageContent) -> Message:
 
 
 def update_settings_from_function_choice_configuration(
-    function_choice_configuration: FunctionCallChoiceConfiguration,
-    settings: PromptExecutionSettings,
+    function_choice_configuration: "FunctionCallChoiceConfiguration",
+    settings: "PromptExecutionSettings",
     type: FunctionChoiceType,
 ) -> None:
     """Update the settings from a FunctionChoiceConfiguration.
diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
index 425639d6a291..d2b37d44bb40 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
@@ -1,17 +1,10 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
-import sys
 from typing import Annotated, Any, Literal
 
-if sys.version_info >= (3, 11):
-    from typing import Self  # pragma: no cover
-else:
-    from typing_extensions import Self  # pragma: no cover
-
 from pydantic import BaseModel, Field, field_validator, model_validator
 
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError
 
@@ -73,7 +66,6 @@ class OpenAIChatPromptExecutionSettings(OpenAIPromptExecutionSettings):
     messages: Annotated[
         list[dict[str, Any]] | None, Field(description="Do not set this manually. It is set by the service.")
     ] = None
-    function_call_behavior: Annotated[FunctionCallBehavior | None, Field(exclude=True)] = None
     parallel_tool_calls: bool | None = True
     tools: Annotated[
         list[dict[str, Any]] | None,
@@ -153,32 +145,6 @@ def validate_response_format_and_set_flag(cls, values: Any) -> Any:
 
         return values
 
-    @model_validator(mode="before")
-    @classmethod
-    def validate_function_calling_behaviors(cls, data: Any) -> Any:
-        """Check if function_call_behavior is set and if so, move to use function_choice_behavior instead."""
-        # In an attempt to phase out the use of `function_call_behavior` in favor of `function_choice_behavior`,
-        # we are syncing the `function_call_behavior` with `function_choice_behavior` if the former is set.
-        # This allows us to make decisions off of `function_choice_behavior`. Anytime the `function_call_behavior`
-        # is updated, this validation will run to ensure the `function_choice_behavior` stays in sync.
-        from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
-
-        if isinstance(data, dict) and "function_call_behavior" in data.get("extension_data", {}):
-            data["function_choice_behavior"] = FunctionChoiceBehavior.from_function_call_behavior(
-                data.get("extension_data", {}).get("function_call_behavior")
-            )
-        return data
-
-    @field_validator("function_call_behavior", mode="after")
-    @classmethod
-    def check_for_function_call_behavior(cls, v) -> Self:
-        """Check if function_choice_behavior is set, if not, set it to default."""
-        if v is not None:
-            logger.warning(
-                "The `function_call_behavior` parameter is deprecated. Please use the `function_choice_behavior` parameter instead."  # noqa: E501
-            )
-        return v
-
 
 class OpenAIEmbeddingPromptExecutionSettings(PromptExecutionSettings):
     """Specific settings for the text embedding endpoint."""
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
index 03289fd45d58..2549f9027961 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
@@ -54,6 +54,7 @@ def __init__(
         async_client: AsyncAzureOpenAI | None = None,
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
+        instruction_role: str | None = None,
     ) -> None:
         """Initialize an AzureChatCompletion service.
 
@@ -77,6 +78,8 @@ def __init__(
             async_client (AsyncAzureOpenAI | None): An existing client to use. (Optional)
             env_file_path (str | None): Use the environment settings file as a fallback to using env vars.
             env_file_encoding (str | None): The encoding of the environment settings file, defaults to 'utf-8'.
+            instruction_role (str | None): The role to use for 'instruction' messages, for example, summarization
+                prompts could use `developer` or `system`. (Optional)
         """
         try:
             azure_openai_settings = AzureOpenAISettings.create(
@@ -108,6 +111,7 @@ def __init__(
             default_headers=default_headers,
             ai_model_type=OpenAIModelTypes.CHAT,
             client=async_client,
+            instruction_role=instruction_role,
         )
 
     @classmethod
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py
index 93662af62579..da50e4ee56b6 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py
@@ -36,6 +36,7 @@ def __init__(
         token_endpoint: str | None = None,
         default_headers: Mapping[str, str] | None = None,
         client: AsyncAzureOpenAI | None = None,
+        instruction_role: str | None = None,
     ) -> None:
         """Internal class for configuring a connection to an Azure OpenAI service.
 
@@ -56,6 +57,8 @@ def __init__(
             token_endpoint (str): Azure AD token endpoint use to get the token. (Optional)
             default_headers (Union[Mapping[str, str], None]): Default headers for HTTP requests. (Optional)
             client (AsyncAzureOpenAI): An existing client to use. (Optional)
+            instruction_role (str | None): The role to use for 'instruction' messages, for example, summarization
+                prompts could use `developer` or `system`. (Optional)
 
         """
         # Merge APP_INFO into the headers if it exists
@@ -95,6 +98,8 @@ def __init__(
         }
         if service_id:
             args["service_id"] = service_id
+        if instruction_role:
+            args["instruction_role"] = instruction_role
         super().__init__(**args)
 
     def to_dict(self) -> dict[str, str]:
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py
index c643f11859a7..6d59561377ba 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py
@@ -30,6 +30,7 @@ def __init__(
         async_client: AsyncOpenAI | None = None,
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
+        instruction_role: str | None = None,
     ) -> None:
         """Initialize an OpenAIChatCompletion service.
 
@@ -47,6 +48,7 @@ def __init__(
             env_file_path (str | None): Use the environment settings file as a fallback
                 to environment variables. (Optional)
             env_file_encoding (str | None): The encoding of the environment settings file. (Optional)
+            instruction_role (str | None): The role to use for 'instruction' messages, for example,
         """
         try:
             openai_settings = OpenAISettings.create(
@@ -72,6 +74,7 @@ def __init__(
             ai_model_type=OpenAIModelTypes.CHAT,
             default_headers=default_headers,
             client=async_client,
+            instruction_role=instruction_role,
         )
 
     @classmethod
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
index 0c1e843c5d47..605b78812ae5 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
@@ -19,17 +19,16 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
-from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIChatPromptExecutionSettings,
 )
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIHandler
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+from semantic_kernel.contents.annotation_content import AnnotationContent
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.file_reference_content import FileReferenceContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.streaming_text_content import StreamingTextContent
@@ -46,6 +45,7 @@
 )
 
 if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
     from semantic_kernel.functions.kernel_arguments import KernelArguments
     from semantic_kernel.kernel import Kernel
@@ -150,7 +150,7 @@ def _verify_function_choice_settings(self, settings: "PromptExecutionSettings")
     @override
     def _update_function_choice_settings_callback(
         self,
-    ) -> Callable[[FunctionCallChoiceConfiguration, "PromptExecutionSettings", FunctionChoiceType], None]:
+    ) -> Callable[["FunctionCallChoiceConfiguration", "PromptExecutionSettings", FunctionChoiceType], None]:
         return update_settings_from_function_call_configuration
 
     @override
@@ -267,6 +267,41 @@ def _get_function_call_from_chat_choice(self, choice: Choice | ChunkChoice) -> l
         # When you enable asynchronous content filtering in Azure OpenAI, you may receive empty deltas
         return []
 
+    def _prepare_chat_history_for_request(
+        self,
+        chat_history: "ChatHistory",
+        role_key: str = "role",
+        content_key: str = "content",
+    ) -> Any:
+        """Prepare the chat history for a request.
+
+        Allowing customization of the key names for role/author, and optionally overriding the role.
+
+        ChatRole.TOOL messages need to be formatted different than system/user/assistant messages:
+            They require a "tool_call_id" and (function) "name" key, and the "metadata" key should
+            be removed. The "encoding" key should also be removed.
+
+        Override this method to customize the formatting of the chat history for a request.
+
+        Args:
+            chat_history (ChatHistory): The chat history to prepare.
+            role_key (str): The key name for the role/author.
+            content_key (str): The key name for the content/message.
+
+        Returns:
+            prepared_chat_history (Any): The prepared chat history for a request.
+        """
+        return [
+            {
+                **message.to_dict(role_key=role_key, content_key=content_key),
+                role_key: "developer"
+                if self.instruction_role == "developer" and message.to_dict(role_key=role_key)[role_key] == "system"
+                else message.to_dict(role_key=role_key)[role_key],
+            }
+            for message in chat_history.messages
+            if not isinstance(message, (AnnotationContent, FileReferenceContent))
+        ]
+
     # endregion
 
     # region function calling
@@ -279,15 +314,9 @@ async def _process_function_call(
         arguments: "KernelArguments | None",
         function_call_count: int,
         request_index: int,
-        function_call_behavior: FunctionChoiceBehavior | FunctionCallBehavior,
+        function_call_behavior: FunctionChoiceBehavior,
     ) -> "AutoFunctionInvocationContext | None":
         """Processes the tool calls in the result and update the chat history."""
-        # deprecated and might not even be used anymore, hard to trigger directly
-        if isinstance(function_call_behavior, FunctionCallBehavior):  # pragma: no cover
-            # We need to still support a `FunctionCallBehavior` input so it doesn't break current
-            # customers. Map from `FunctionCallBehavior` -> `FunctionChoiceBehavior`
-            function_call_behavior = FunctionChoiceBehavior.from_function_call_behavior(function_call_behavior)
-
         return await kernel.invoke_function_call(
             function_call=function_call,
             chat_history=chat_history,
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py
index 7ead64865445..d3d72795665b 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py
@@ -29,6 +29,7 @@ def __init__(
         service_id: str | None = None,
         default_headers: Mapping[str, str] | None = None,
         client: AsyncOpenAI | None = None,
+        instruction_role: str | None = None,
     ) -> None:
         """Initialize a client for OpenAI services.
 
@@ -48,6 +49,8 @@ def __init__(
             default_headers (Mapping[str, str]): Default headers
                 for HTTP requests. (Optional)
             client (AsyncOpenAI): An existing OpenAI client, optional.
+            instruction_role (str): The role to use for 'instruction'
+                messages, for example, summarization prompts could use `developer` or `system`. (Optional)
 
         """
         # Merge APP_INFO into the headers if it exists
@@ -71,6 +74,8 @@ def __init__(
         }
         if service_id:
             args["service_id"] = service_id
+        if instruction_role:
+            args["instruction_role"] = instruction_role
         super().__init__(**args)
 
     def to_dict(self) -> dict[str, str]:
diff --git a/python/semantic_kernel/contents/__init__.py b/python/semantic_kernel/contents/__init__.py
index 352a5915cc68..c326115ccd86 100644
--- a/python/semantic_kernel/contents/__init__.py
+++ b/python/semantic_kernel/contents/__init__.py
@@ -6,6 +6,8 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ChatHistorySummarizationReducer
+from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
 from semantic_kernel.contents.image_content import ImageContent
 from semantic_kernel.contents.streaming_annotation_content import StreamingAnnotationContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
@@ -20,6 +22,8 @@
     "AudioContent",
     "AuthorRole",
     "ChatHistory",
+    "ChatHistorySummarizationReducer",
+    "ChatHistoryTruncationReducer",
     "ChatMessageContent",
     "FinishReason",
     "FunctionCallContent",
diff --git a/python/semantic_kernel/contents/function_call_content.py b/python/semantic_kernel/contents/function_call_content.py
index 08b9c9e19757..7067311f4c8a 100644
--- a/python/semantic_kernel/contents/function_call_content.py
+++ b/python/semantic_kernel/contents/function_call_content.py
@@ -221,4 +221,13 @@ def to_dict(self) -> dict[str, str | Any]:
 
     def __hash__(self) -> int:
         """Return the hash of the function call content."""
-        return hash((self.tag, self.id, self.index, self.name, self.function_name, self.plugin_name, self.arguments))
+        args_hashable = frozenset(self.arguments.items()) if isinstance(self.arguments, Mapping) else None
+        return hash((
+            self.tag,
+            self.id,
+            self.index,
+            self.name,
+            self.function_name,
+            self.plugin_name,
+            args_hashable,
+        ))
diff --git a/python/semantic_kernel/contents/history_reducer/__init__.py b/python/semantic_kernel/contents/history_reducer/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py
new file mode 100644
index 000000000000..bc05c705ceda
--- /dev/null
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_reducer.py
@@ -0,0 +1,31 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import sys
+from abc import ABC, abstractmethod
+
+if sys.version < "3.11":
+    from typing_extensions import Self  # pragma: no cover
+else:
+    from typing import Self  # type: ignore # pragma: no cover
+
+from pydantic import Field
+
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.utils.experimental_decorator import experimental_class
+
+
+@experimental_class
+class ChatHistoryReducer(ChatHistory, ABC):
+    """Defines a contract for reducing chat history."""
+
+    target_count: int = Field(..., gt=0, description="Target message count.")
+    threshold_count: int = Field(0, ge=0, description="Threshold count to avoid orphaning messages.")
+
+    @abstractmethod
+    async def reduce(self) -> Self | None:
+        """Reduce the chat history in some way (e.g., truncate, summarize).
+
+        Returns:
+            A possibly shorter list of messages, or None if no change is needed.
+        """
+        ...
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py b/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py
new file mode 100644
index 000000000000..6742c0b56816
--- /dev/null
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py
@@ -0,0 +1,211 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import logging
+from collections.abc import Callable
+
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.function_call_content import FunctionCallContent
+from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
+from semantic_kernel.utils.experimental_decorator import experimental_function
+
+logger = logging.getLogger(__name__)
+
+
+SUMMARY_METADATA_KEY = "__summary__"
+
+
+@experimental_function
+def get_call_result_pairs(history: list[ChatMessageContent]) -> list[tuple[int, int]]:
+    """Identify all (FunctionCallContent, FunctionResultContent) pairs in the history.
+
+    Return a list of (call_index, result_index) pairs for safe referencing.
+    """
+    pairs: list[tuple[int, int]] = []  # Correct type: list of tuples with integers
+    call_ids_seen: dict[str, int] = {}  # Map call IDs (str) to their indices (int)
+
+    # Gather all function-call IDs and their indices.
+    for i, msg in enumerate(history):
+        for item in msg.items:
+            if isinstance(item, FunctionCallContent) and item.id is not None:
+                call_ids_seen[item.id] = i
+
+    # Now, match each FunctionResultContent to the earliest call ID with the same ID.
+    for j, msg in enumerate(history):
+        for item in msg.items:
+            if isinstance(item, FunctionResultContent) and item.id is not None:
+                call_id = item.id
+                if call_id in call_ids_seen:
+                    call_index = call_ids_seen[call_id]
+                    pairs.append((call_index, j))
+                    # Remove the call ID so we don't match it a second time
+                    del call_ids_seen[call_id]
+                    break
+
+    return pairs
+
+
+@experimental_function
+def locate_summarization_boundary(history: list[ChatMessageContent]) -> int:
+    """Identify the index of the first message that is not a summary message.
+
+    This is indicated by the presence of the SUMMARY_METADATA_KEY in the message metadata.
+
+    Returns:
+        The insertion point index for normal history messages (i.e., after all summary messages).
+    """
+    for idx, msg in enumerate(history):
+        if not msg.metadata or SUMMARY_METADATA_KEY not in msg.metadata:
+            return idx
+    return len(history)
+
+
+@experimental_function
+def locate_safe_reduction_index(
+    history: list[ChatMessageContent],
+    target_count: int,
+    threshold_count: int = 0,
+    offset_count: int = 0,
+) -> int | None:
+    """Identify the index of the first message at or beyond the specified target_count.
+
+    This index does not orphan sensitive content (function calls/results).
+
+    This method ensures that the presence of a function-call always follows with its result,
+    so the function-call and its function-result are never separated.
+
+    In addition, it attempts to locate a user message within the threshold window so that
+    context with the subsequent assistant response is preserved.
+
+    Args:
+        history: The entire chat history.
+        target_count: The desired message count after reduction.
+        threshold_count: The threshold beyond target_count required to trigger reduction.
+                         If total messages <= (target_count + threshold_count), no reduction occurs.
+        offset_count: Optional number of messages to skip at the start (e.g. existing summary messages).
+
+    Returns:
+        The index that identifies the starting point for a reduced history that does not orphan
+        sensitive content. Returns None if reduction is not needed.
+    """
+    total_count = len(history)
+    threshold_index = total_count - (threshold_count or 0) - target_count
+    if threshold_index <= offset_count:
+        return None
+
+    message_index = total_count - target_count
+
+    # Move backward to avoid cutting function calls / results
+    while message_index >= offset_count:
+        if not any(
+            isinstance(item, (FunctionCallContent, FunctionResultContent)) for item in history[message_index].items
+        ):
+            break
+        message_index -= 1
+
+    # This is our initial target truncation index
+    target_index = message_index
+
+    # Attempt to see if there's a user message in the threshold window
+    while message_index >= threshold_index:
+        if history[message_index].role == AuthorRole.USER:
+            return message_index
+        message_index -= 1
+
+    return target_index
+
+
+@experimental_function
+def extract_range(
+    history: list[ChatMessageContent],
+    start: int,
+    end: int | None = None,
+    filter_func: Callable[[ChatMessageContent], bool] | None = None,
+    preserve_pairs: bool = False,
+) -> list[ChatMessageContent]:
+    """Extract a range of messages from the source history, skipping any message for which we do not want to keep.
+
+    For example, function calls/results, if desired.
+
+    Args:
+        history: The source history.
+        start: The index of the first message to extract (inclusive).
+        end: The index of the last message to extract (exclusive). If None, extracts through end.
+        filter_func: A function that takes a ChatMessageContent and returns True if the message should
+                        be skipped, False otherwise.
+        preserve_pairs: If True, ensures that function call and result pairs are either both kept or both skipped.
+
+    Returns:
+        A list of extracted messages.
+    """
+    if end is None:
+        end = len(history)
+
+    sliced = list(range(start, end))
+
+    # If we need to preserve call->result pairs, gather them
+    pair_map = {}
+    if preserve_pairs:
+        pairs = get_call_result_pairs(history)
+        # store in a dict for quick membership checking
+        # call_idx -> result_idx, and also result_idx -> call_idx
+        for cidx, ridx in pairs:
+            pair_map[cidx] = ridx
+            pair_map[ridx] = cidx
+
+    extracted: list[ChatMessageContent] = []
+    i = 0
+    while i < len(sliced):
+        idx = sliced[i]
+        msg = history[idx]
+
+        # If filter_func excludes it, skip it
+        if filter_func and filter_func(msg):
+            i += 1
+            continue
+
+        # If preserve_pairs is on, and there's a paired index, skip or include them both
+        if preserve_pairs and idx in pair_map:
+            paired_idx = pair_map[idx]
+            # If the pair is within [start, end), we must keep or skip them together
+            if start <= paired_idx < end:
+                # Check if the pair or itself fails filter_func
+                if filter_func and (filter_func(history[paired_idx]) or filter_func(msg)):
+                    # skip both
+                    i += 1
+                    # Also skip the paired index if it's in our current slice
+                    if paired_idx in sliced:
+                        # remove it from the slice so we don't process it again
+                        sliced.remove(paired_idx)
+                    continue
+                # keep both
+                extracted.append(msg)
+                if paired_idx > idx:
+                    # We'll skip the pair in the normal iteration by removing from slice
+                    # but add it to extracted right now
+                    extracted.append(history[paired_idx])
+                    if paired_idx in sliced:
+                        sliced.remove(paired_idx)
+                else:
+                    # if paired_idx < idx, it might appear later, so skip for now
+                    # but we may have already processed it if i was the 2nd item
+                    # either way, do not add duplicates
+                    pass
+                i += 1
+                continue
+            # If the paired_idx is outside [start, end), there's no conflict
+            # so we can just do normal logic
+            extracted.append(msg)
+            i += 1
+        else:
+            # keep it if filter_func not triggered
+            extracted.append(msg)
+            i += 1
+
+    return extracted
+
+
+@experimental_function
+def contains_function_call_or_result(msg: ChatMessageContent) -> bool:
+    """Return True if the message has any function call or function result."""
+    return any(isinstance(item, (FunctionCallContent, FunctionResultContent)) for item in msg.items)
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py
new file mode 100644
index 000000000000..1feaf1a839ad
--- /dev/null
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py
@@ -0,0 +1,226 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import logging
+import sys
+from typing import Any
+
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+from semantic_kernel.utils.experimental_decorator import experimental_class
+
+if sys.version < "3.11":
+    from typing_extensions import Self  # pragma: no cover
+else:
+    from typing import Self  # type: ignore # pragma: no cover
+
+from pydantic import Field
+
+from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.const import DEFAULT_SERVICE_NAME
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
+from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import (
+    SUMMARY_METADATA_KEY,
+    contains_function_call_or_result,
+    extract_range,
+    locate_safe_reduction_index,
+    locate_summarization_boundary,
+)
+from semantic_kernel.exceptions.content_exceptions import ChatHistoryReducerException
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_SUMMARIZATION_PROMPT = """
+Provide a concise and complete summarization of the entire dialog that does not exceed 5 sentences.
+
+This summary must always:
+- Consider both user and assistant interactions
+- Maintain continuity for the purpose of further dialog
+- Include details from any existing summary
+- Focus on the most significant aspects of the dialog
+
+This summary must never:
+- Critique, correct, interpret, presume, or assume
+- Identify faults, mistakes, misunderstanding, or correctness
+- Analyze what has not occurred
+- Exclude details from any existing summary
+"""
+
+
+@experimental_class
+class ChatHistorySummarizationReducer(ChatHistoryReducer):
+    """A ChatHistory with logic to summarize older messages past a target count."""
+
+    service: ChatCompletionClientBase
+    summarization_instructions: str = Field(
+        default_factory=lambda: DEFAULT_SUMMARIZATION_PROMPT,
+        description="The summarization instructions.",
+    )
+    use_single_summary: bool = Field(True, description="Whether to use a single summary message.")
+    fail_on_error: bool = Field(True, description="Raise error if summarization fails.")
+    service_id: str = Field(
+        default_factory=lambda: DEFAULT_SERVICE_NAME, description="The ID of the chat completion service."
+    )
+    include_function_content_in_summary: bool = Field(
+        False, description="Whether to include function calls/results in the summary."
+    )
+    execution_settings: PromptExecutionSettings | None = None
+
+    def __init__(
+        self,
+        service: ChatCompletionClientBase,
+        target_count: int,
+        service_id: str | None = None,
+        threshold_count: int | None = None,
+        summarization_instructions: str | None = None,
+        use_single_summary: bool | None = None,
+        fail_on_error: bool | None = None,
+        include_function_content_in_summary: bool | None = None,
+        execution_settings: PromptExecutionSettings | None = None,
+        **kwargs: Any,
+    ):
+        """Initialize the ChatHistorySummarizationReducer.
+
+        Args:
+            service (ChatCompletionClientBase): The chat completion service.
+            target_count (int): The target number of messages to retain after applying summarization.
+            service_id (str | None): The ID of the chat completion service.
+            threshold_count (int | None): The threshold beyond target_count required to trigger reduction.
+            summarization_instructions (str | None): The summarization instructions.
+            use_single_summary (bool | None): Whether to use a single summary message.
+            fail_on_error (bool | None): Raise error if summarization fails.
+            include_function_content_in_summary (bool | None): Whether to include function calls/results in the summary.
+            execution_settings (PromptExecutionSettings | None): The prompt execution settings.
+            **kwargs (Any): Additional keyword arguments.
+        """
+        args: dict[str, Any] = {
+            "service": service,
+            "target_count": target_count,
+        }
+        if service_id is not None:
+            args["service_id"] = service_id
+        if threshold_count is not None:
+            args["threshold_count"] = threshold_count
+        if summarization_instructions is not None:
+            args["summarization_instructions"] = summarization_instructions
+        if use_single_summary is not None:
+            args["use_single_summary"] = use_single_summary
+        if fail_on_error is not None:
+            args["fail_on_error"] = fail_on_error
+        if include_function_content_in_summary is not None:
+            args["include_function_content_in_summary"] = include_function_content_in_summary
+        if execution_settings is not None:
+            args["execution_settings"] = execution_settings
+
+        super().__init__(**args, **kwargs)
+
+    async def reduce(self) -> Self | None:
+        """Summarize the older messages past the target message count."""
+        history = self.messages
+        if len(history) <= self.target_count + (self.threshold_count or 0):
+            return None  # No summarization needed
+
+        logger.info("Performing chat history summarization check...")
+
+        # 1. Identify where existing summary messages end
+        insertion_point = locate_summarization_boundary(history)
+        if insertion_point == len(history):
+            # fallback fix: force boundary to something reasonable
+            logger.warning("All messages are summaries, forcing boundary to 0.")
+            insertion_point = 0
+
+        # 2. Locate the safe reduction index
+        truncation_index = locate_safe_reduction_index(
+            history,
+            self.target_count,
+            self.threshold_count,
+            offset_count=insertion_point,
+        )
+        if truncation_index is None:
+            logger.info("No valid truncation index found.")
+            return None
+
+        # 3. Extract only the chunk of messages that need summarizing
+        #    If include_function_content_in_summary=False, skip function calls/results
+        #    Otherwise, keep them but never split pairs.
+        messages_to_summarize = extract_range(
+            history,
+            start=0 if self.use_single_summary else insertion_point,
+            end=truncation_index,
+            filter_func=(contains_function_call_or_result if not self.include_function_content_in_summary else None),
+            preserve_pairs=self.include_function_content_in_summary,
+        )
+
+        if not messages_to_summarize:
+            logger.info("No messages to summarize.")
+            return None
+
+        try:
+            # 4. Summarize the extracted messages
+            summary_msg = await self._summarize(messages_to_summarize)
+            logger.info("Chat History Summarization completed.")
+            if not summary_msg:
+                return None
+
+            # Mark the newly-created summary with metadata
+            summary_msg.metadata[SUMMARY_METADATA_KEY] = True
+
+            # 5. Reassemble the new history
+            keep_existing_summaries = []
+            if insertion_point > 0 and not self.use_single_summary:
+                keep_existing_summaries = history[:insertion_point]
+
+            remainder = history[truncation_index:]
+            new_history = [*keep_existing_summaries, summary_msg, *remainder]
+            self.messages = new_history
+
+            return self
+
+        except Exception as ex:
+            logger.warning("Summarization failed, continuing without summary.")
+            if self.fail_on_error:
+                raise ChatHistoryReducerException("Chat History Summarization failed.") from ex
+            return None
+
+    async def _summarize(self, messages: list[ChatMessageContent]) -> ChatMessageContent | None:
+        """Use the ChatCompletion service to generate a single summary message."""
+        from semantic_kernel.contents.utils.author_role import AuthorRole
+
+        chat_history = ChatHistory(messages=messages)
+
+        role = (
+            getattr(self.execution_settings, "instruction_role", AuthorRole.SYSTEM)
+            if self.execution_settings
+            else AuthorRole.SYSTEM
+        )
+
+        chat_history.add_message(ChatMessageContent(role=role, content=self.summarization_instructions))
+
+        execution_settings = self.execution_settings or self.service.get_prompt_execution_settings_class()(
+            service_id=self.service_id
+        )
+
+        return await self.service.get_chat_message_content(chat_history=chat_history, settings=execution_settings)
+
+    def __eq__(self, other: object) -> bool:
+        """Check if two ChatHistorySummarizationReducer objects are equal."""
+        if not isinstance(other, ChatHistorySummarizationReducer):
+            return False
+        return (
+            self.threshold_count == other.threshold_count
+            and self.target_count == other.target_count
+            and self.use_single_summary == other.use_single_summary
+            and self.summarization_instructions == other.summarization_instructions
+        )
+
+    def __hash__(self) -> int:
+        """Hash the object based on its properties."""
+        return hash((
+            self.__class__.__name__,
+            self.threshold_count,
+            self.target_count,
+            self.summarization_instructions,
+            self.use_single_summary,
+            self.fail_on_error,
+            self.include_function_content_in_summary,
+        ))
diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py
new file mode 100644
index 000000000000..4faf28876748
--- /dev/null
+++ b/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py
@@ -0,0 +1,83 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import logging
+import sys
+from typing import Any
+
+from semantic_kernel.utils.experimental_decorator import experimental_class
+
+if sys.version < "3.11":
+    from typing_extensions import Self  # pragma: no cover
+else:
+    from typing import Self  # type: ignore # pragma: no cover
+
+from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer
+from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import (
+    extract_range,
+    locate_safe_reduction_index,
+)
+
+logger = logging.getLogger(__name__)
+
+
+@experimental_class
+class ChatHistoryTruncationReducer(ChatHistoryReducer):
+    """A ChatHistory that supports truncation logic.
+
+    Because this class inherits from ChatHistoryReducer (which in turn inherits from ChatHistory),
+    it can also be used anywhere a ChatHistory is expected, while adding truncation capability.
+    """
+
+    def __init__(self, target_count: int, threshold_count: int | None = None, **kwargs: Any):
+        """Initialize the truncation reducer."""
+        args: dict[str, Any] = {
+            "target_count": target_count,
+        }
+        if threshold_count is not None:
+            args["threshold_count"] = threshold_count
+        super().__init__(**args, **kwargs)
+
+    async def reduce(self) -> Self | None:
+        """Truncate the chat history to the target message count, avoiding orphaned calls.
+
+        Returns:
+            The truncated list of messages if truncation occurred, or None otherwise.
+        """
+        history = self.messages
+        if len(history) <= self.target_count + (self.threshold_count or 0):
+            # No need to reduce
+            return None
+
+        logger.info("Performing chat history truncation check...")
+
+        truncation_index = locate_safe_reduction_index(history, self.target_count, self.threshold_count)
+        if truncation_index is None:
+            logger.info(
+                f"No truncation index found. Target count: {self.target_count}, Threshold: {self.threshold_count}"
+            )
+            return None
+
+        logger.info(f"Truncating history to {truncation_index} messages.")
+        truncated_list = extract_range(history, start=truncation_index)
+        self.messages = truncated_list
+        return self
+
+    def __eq__(self, other: object) -> bool:
+        """Compare equality based on truncation settings.
+
+        (We don't factor in the actual ChatHistory messages themselves.)
+
+        Returns:
+            True if the other object is a ChatHistoryTruncationReducer with the same truncation settings.
+        """
+        if not isinstance(other, ChatHistoryTruncationReducer):
+            return False
+        return self.threshold_count == other.threshold_count and self.target_count == other.target_count
+
+    def __hash__(self) -> int:
+        """Return a hash code based on truncation settings.
+
+        Returns:
+            A hash code based on the truncation settings.
+        """
+        return hash((self.__class__.__name__, self.threshold_count, self.target_count))
diff --git a/python/semantic_kernel/exceptions/agent_exceptions.py b/python/semantic_kernel/exceptions/agent_exceptions.py
index 1c6b5bb897cf..0f13d6ddd368 100644
--- a/python/semantic_kernel/exceptions/agent_exceptions.py
+++ b/python/semantic_kernel/exceptions/agent_exceptions.py
@@ -38,3 +38,9 @@ class AgentChatException(AgentException):
     """An error occurred while invoking the agent chat."""
 
     pass
+
+
+class AgentChatHistoryReducerException(AgentException):
+    """An error occurred while reducing the chat history."""
+
+    pass
diff --git a/python/semantic_kernel/exceptions/content_exceptions.py b/python/semantic_kernel/exceptions/content_exceptions.py
index d9c3f5aa10c5..4ad619951f4f 100644
--- a/python/semantic_kernel/exceptions/content_exceptions.py
+++ b/python/semantic_kernel/exceptions/content_exceptions.py
@@ -39,7 +39,14 @@ class FunctionCallInvalidArgumentsException(ContentException):
     pass
 
 
+class ChatHistoryReducerException(ContentException):
+    """An error occurred while reducing chat history."""
+
+    pass
+
+
 __all__ = [
+    "ChatHistoryReducerException",
     "ContentAdditionException",
     "ContentException",
     "ContentInitializationError",
diff --git a/python/tests/unit/agents/test_chat_completion_agent.py b/python/tests/unit/agents/test_chat_completion_agent.py
index 191826aa23a9..01f9813acf83 100644
--- a/python/tests/unit/agents/test_chat_completion_agent.py
+++ b/python/tests/unit/agents/test_chat_completion_agent.py
@@ -206,7 +206,8 @@ def test_get_channel_keys():
     agent = ChatCompletionAgent()
     keys = agent.get_channel_keys()
 
-    assert keys == [ChatHistoryChannel.__name__]
+    for key in keys:
+        assert isinstance(key, str)
 
 
 async def test_create_channel():
diff --git a/python/tests/unit/agents/test_chat_history_channel.py b/python/tests/unit/agents/test_chat_history_channel.py
index acb563b9ca7c..4ba15f01a062 100644
--- a/python/tests/unit/agents/test_chat_history_channel.py
+++ b/python/tests/unit/agents/test_chat_history_channel.py
@@ -23,6 +23,9 @@ async def invoke_stream(self, history: list[ChatMessageContent]) -> AsyncIterabl
         for message in history:
             yield ChatMessageContent(role=AuthorRole.SYSTEM, content=f"Processed: {message.content}")
 
+    async def reduce_history(self, history: list[ChatMessageContent]) -> list[ChatMessageContent]:
+        return history
+
 
 class MockNonChatHistoryHandler:
     """Mock agent to test incorrect instance handling."""
diff --git a/python/tests/unit/agents/test_sequential_strategy_selection.py b/python/tests/unit/agents/test_sequential_strategy_selection.py
index 1a2db9d7409d..17754bd389fd 100644
--- a/python/tests/unit/agents/test_sequential_strategy_selection.py
+++ b/python/tests/unit/agents/test_sequential_strategy_selection.py
@@ -1,12 +1,13 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from unittest.mock import AsyncMock
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
 from semantic_kernel.agents.agent import Agent
 from semantic_kernel.agents.channels.agent_channel import AgentChannel
 from semantic_kernel.agents.strategies.selection.sequential_selection_strategy import SequentialSelectionStrategy
+from semantic_kernel.exceptions.agent_exceptions import AgentExecutionException
 
 
 class MockAgent(Agent):
@@ -78,13 +79,47 @@ async def test_sequential_selection_exceeds_length(agents):
     selected_agent = await strategy.next(agents, [])
 
     assert selected_agent.id == "agent-0"
+    assert strategy._index == 0
+
+    selected_agent = await strategy.next(agents, [])
+
+    assert selected_agent.id == "agent-1"
     assert strategy._index == 1
 
 
 async def test_sequential_selection_empty_agents():
     strategy = SequentialSelectionStrategy()
 
-    with pytest.raises(ValueError) as excinfo:
+    with pytest.raises(AgentExecutionException) as excinfo:
         await strategy.next([], [])
 
-    assert "No agents to select from" in str(excinfo.value)
+    assert "Agent Failure - No agents present to select." in str(excinfo.value)
+
+
+async def test_sequential_selection_avoid_selecting_same_agent_twice():
+    # Arrange
+    agent_0 = MagicMock(spec=Agent)
+    agent_0.id = "agent-0"
+    agent_0.name = "Agent0"
+
+    agent_1 = MagicMock(spec=Agent)
+    agent_1.id = "agent-1"
+    agent_1.name = "Agent1"
+
+    agents = [agent_0, agent_1]
+
+    strategy = SequentialSelectionStrategy()
+    # Simulate that we've already selected an agent once:
+    strategy.has_selected = True
+    # Set the initial agent to the first agent
+    strategy.initial_agent = agent_0
+    # Ensure the internal index is set to -1
+    strategy._index = -1
+
+    # Act
+    selected_agent = await strategy.next(agents, [])
+
+    # Assert
+    # According to the condition, we should skip selecting agent_0 again
+    assert selected_agent.id == "agent-1"
+    assert strategy._index == 1
diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
index 942322bf5153..05fa5773729a 100644
--- a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
+++ b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
@@ -27,11 +27,12 @@
 # region init
 def test_azure_ai_inference_chat_completion_init(azure_ai_inference_unit_test_env, model_id) -> None:
     """Test initialization of AzureAIInferenceChatCompletion"""
-    azure_ai_inference = AzureAIInferenceChatCompletion(model_id)
+    azure_ai_inference = AzureAIInferenceChatCompletion(model_id, instruction_role="developer")
 
     assert azure_ai_inference.ai_model_id == model_id
     assert azure_ai_inference.service_id == model_id
     assert isinstance(azure_ai_inference.client, ChatCompletionsClient)
+    assert azure_ai_inference.instruction_role == "developer"
 
 
 @patch("azure.ai.inference.aio.ChatCompletionsClient.__init__", return_value=None)
diff --git a/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py b/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
index 731f0b04d2d3..c512a38f1b10 100644
--- a/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
+++ b/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
@@ -17,7 +17,6 @@
 from openai.types.chat.chat_completion_message import ChatCompletionMessage
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
 from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
 from semantic_kernel.connectors.ai.open_ai.exceptions.content_filter_ai_exception import (
@@ -193,6 +192,30 @@ async def test_cmc(
     )
 
 
+@patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
+async def test_cmc_with_developer_instruction_role_propagates(
+    mock_create,
+    kernel: Kernel,
+    azure_openai_unit_test_env,
+    chat_history: ChatHistory,
+    mock_chat_completion_response: ChatCompletion,
+) -> None:
+    mock_create.return_value = mock_chat_completion_response
+    chat_history.add_user_message("hello world")
+    complete_prompt_execution_settings = AzureChatPromptExecutionSettings(service_id="test_service_id")
+
+    azure_chat_completion = AzureChatCompletion(instruction_role="developer")
+    await azure_chat_completion.get_chat_message_contents(
+        chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel
+    )
+    mock_create.assert_awaited_once_with(
+        model=azure_openai_unit_test_env["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
+        stream=False,
+        messages=azure_chat_completion._prepare_chat_history_for_request(chat_history),
+    )
+    assert azure_chat_completion.instruction_role == "developer"
+
+
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
 async def test_cmc_with_logit_bias(
     mock_create,
@@ -865,7 +888,7 @@ async def test_no_kernel_provided_throws_error(
     prompt = "some prompt that would trigger the content filtering"
     chat_history.add_user_message(prompt)
     complete_prompt_execution_settings = AzureChatPromptExecutionSettings(
-        function_call_behavior=FunctionCallBehavior.AutoInvokeKernelFunctions()
+        function_choice_behavior=FunctionChoiceBehavior.Auto()
     )
 
     test_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
@@ -891,7 +914,7 @@ async def test_auto_invoke_false_no_kernel_provided_throws_error(
     prompt = "some prompt that would trigger the content filtering"
     chat_history.add_user_message(prompt)
     complete_prompt_execution_settings = AzureChatPromptExecutionSettings(
-        function_call_behavior=FunctionCallBehavior.EnableFunctions(auto_invoke=False, filters={})
+        function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=False)
     )
 
     test_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
diff --git a/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py b/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py
index db432e4db8eb..c0b1000ae159 100644
--- a/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py
+++ b/python/tests/unit/connectors/ai/open_ai/services/test_openai_chat_completion_base.py
@@ -12,7 +12,6 @@
 from openai.types.chat.chat_completion_chunk import ChoiceDelta as ChunkChoiceDelta
 from openai.types.chat.chat_completion_message import ChatCompletionMessage
 
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIChatPromptExecutionSettings,
@@ -115,6 +114,30 @@ async def test_cmc_singular(
     )
 
 
+@patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
+async def test_cmc_singular_with_developer_instruction_propagates(
+    mock_create,
+    kernel: Kernel,
+    chat_history: ChatHistory,
+    mock_chat_completion_response: ChatCompletion,
+    openai_unit_test_env,
+):
+    mock_create.return_value = mock_chat_completion_response
+    chat_history.add_user_message("hello world")
+    complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(service_id="test_service_id")
+
+    openai_chat_completion = OpenAIChatCompletion(instruction_role="developer")
+    await openai_chat_completion.get_chat_message_content(
+        chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel
+    )
+    mock_create.assert_awaited_once_with(
+        model=openai_unit_test_env["OPENAI_CHAT_MODEL_ID"],
+        stream=False,
+        messages=openai_chat_completion._prepare_chat_history_for_request(chat_history),
+    )
+    assert openai_chat_completion.instruction_role == "developer"
+
+
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
 async def test_cmc_prompt_execution_settings(
     mock_create,
@@ -167,7 +190,7 @@ async def test_cmc_function_call_behavior(
     chat_history.add_user_message("hello world")
     orig_chat_history = deepcopy(chat_history)
     complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(
-        service_id="test_service_id", function_call_behavior=FunctionCallBehavior.AutoInvokeKernelFunctions()
+        service_id="test_service_id", function_choice_behavior=FunctionChoiceBehavior.Auto()
     )
     with patch(
         "semantic_kernel.kernel.Kernel.invoke_function_call",
@@ -673,7 +696,7 @@ async def test_scmc_function_call_behavior(
     chat_history.add_user_message("hello world")
     orig_chat_history = deepcopy(chat_history)
     complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(
-        service_id="test_service_id", function_call_behavior=FunctionCallBehavior.AutoInvokeKernelFunctions()
+        service_id="test_service_id", function_choice_behavior=FunctionChoiceBehavior.Auto()
     )
     with patch(
         "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._process_function_call",
diff --git a/python/tests/unit/connectors/ai/test_function_call_behavior.py b/python/tests/unit/connectors/ai/test_function_call_behavior.py
deleted file mode 100644
index f9e27d6ad85c..000000000000
--- a/python/tests/unit/connectors/ai/test_function_call_behavior.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-from typing import TYPE_CHECKING
-from unittest.mock import Mock
-
-import pytest
-
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
-
-if TYPE_CHECKING:
-    from semantic_kernel.kernel import Kernel
-
-
-@pytest.fixture
-def function_call_behavior():
-    return FunctionCallBehavior()
-
-
-@pytest.fixture
-def update_settings_callback():
-    mock = Mock()
-    mock.return_value = None
-    return mock
-
-
-def test_function_call_behavior():
-    fcb = FunctionCallBehavior()
-    assert fcb is not None
-    assert fcb.enable_kernel_functions is True
-    assert fcb.max_auto_invoke_attempts == 5
-    assert fcb.auto_invoke_kernel_functions is True
-
-
-def test_function_call_behavior_get_set(function_call_behavior: FunctionCallBehavior):
-    function_call_behavior.enable_kernel_functions = False
-    assert function_call_behavior.enable_kernel_functions is False
-    function_call_behavior.max_auto_invoke_attempts = 10
-    assert function_call_behavior.max_auto_invoke_attempts == 10
-    assert function_call_behavior.auto_invoke_kernel_functions is True
-    function_call_behavior.auto_invoke_kernel_functions = False
-    assert function_call_behavior.auto_invoke_kernel_functions is False
-    assert function_call_behavior.max_auto_invoke_attempts == 0
-    function_call_behavior.auto_invoke_kernel_functions = True
-    assert function_call_behavior.auto_invoke_kernel_functions is True
-    assert function_call_behavior.max_auto_invoke_attempts == 5
-
-
-def test_auto_invoke_kernel_functions():
-    fcb = FunctionCallBehavior.AutoInvokeKernelFunctions()
-    assert fcb is not None
-    assert fcb.enable_kernel_functions is True
-    assert fcb.max_auto_invoke_attempts == 5
-    assert fcb.auto_invoke_kernel_functions is True
-
-
-def test_enable_kernel_functions():
-    fcb = FunctionCallBehavior.EnableKernelFunctions()
-    assert fcb is not None
-    assert fcb.enable_kernel_functions is True
-    assert fcb.max_auto_invoke_attempts == 0
-    assert fcb.auto_invoke_kernel_functions is False
-
-
-def test_enable_functions():
-    fcb = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters={"excluded_plugins": ["test"]})
-    assert fcb is not None
-    assert fcb.enable_kernel_functions is True
-    assert fcb.max_auto_invoke_attempts == 5
-    assert fcb.auto_invoke_kernel_functions is True
-    assert fcb.filters == {"excluded_plugins": ["test"]}
-
-
-def test_required_function():
-    fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test")
-    assert fcb is not None
-    assert fcb.enable_kernel_functions is True
-    assert fcb.max_auto_invoke_attempts == 1
-    assert fcb.auto_invoke_kernel_functions is True
-    assert fcb.function_fully_qualified_name == "test"
-
-
-def test_configure_default(function_call_behavior: FunctionCallBehavior, update_settings_callback, kernel: "Kernel"):
-    function_call_behavior.configure(kernel, update_settings_callback, None)
-    assert not update_settings_callback.called
-
-
-def test_configure_kernel_functions(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.AutoInvokeKernelFunctions()
-    fcb.configure(kernel, update_settings_callback, None)
-    assert update_settings_callback.called
-
-
-def test_configure_kernel_functions_skip(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.AutoInvokeKernelFunctions()
-    fcb.enable_kernel_functions = False
-    fcb.configure(kernel, update_settings_callback, None)
-    assert not update_settings_callback.called
-
-
-def test_configure_enable_kernel_functions(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.EnableKernelFunctions()
-    fcb.configure(kernel, update_settings_callback, None)
-    assert update_settings_callback.called
-
-
-def test_configure_enable_kernel_functions_skip(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.EnableKernelFunctions()
-    fcb.enable_kernel_functions = False
-    fcb.configure(kernel, update_settings_callback, None)
-    assert not update_settings_callback.called
-
-
-def test_configure_enable_functions(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters={"excluded_plugins": ["test"]})
-    fcb.configure(kernel, update_settings_callback, None)
-    assert update_settings_callback.called
-
-
-def test_configure_enable_functions_skip(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters={"excluded_plugins": ["test"]})
-    fcb.enable_kernel_functions = False
-    fcb.configure(kernel, update_settings_callback, None)
-    assert not update_settings_callback.called
-
-
-def test_configure_required_function(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test")
-    fcb.configure(kernel, update_settings_callback, None)
-    assert update_settings_callback.called
-
-
-def test_configure_required_function_max_invoke_updated(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test")
-    fcb.max_auto_invoke_attempts = 10
-    fcb.configure(kernel, update_settings_callback, None)
-    assert update_settings_callback.called
-    assert fcb.max_auto_invoke_attempts == 1
-
-
-def test_configure_required_function_skip(update_settings_callback, kernel: "Kernel"):
-    fcb = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="test")
-    fcb.enable_kernel_functions = False
-    fcb.configure(kernel, update_settings_callback, None)
-    assert not update_settings_callback.called
diff --git a/python/tests/unit/connectors/ai/test_function_choice_behavior.py b/python/tests/unit/connectors/ai/test_function_choice_behavior.py
index 89e211881c08..58787be12988 100644
--- a/python/tests/unit/connectors/ai/test_function_choice_behavior.py
+++ b/python/tests/unit/connectors/ai/test_function_choice_behavior.py
@@ -8,12 +8,11 @@
 if TYPE_CHECKING:
     from semantic_kernel.kernel import Kernel
 
-from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
+from semantic_kernel.connectors.ai.function_calling_utils import _combine_filter_dicts
 from semantic_kernel.connectors.ai.function_choice_behavior import (
     DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS,
     FunctionChoiceBehavior,
     FunctionChoiceType,
-    _combine_filter_dicts,
 )
 from semantic_kernel.exceptions import ServiceInitializationError
 
@@ -50,38 +49,6 @@ def test_function_choice_behavior_required():
     assert behavior.filters == expected_filters
 
 
-def test_from_function_call_behavior_kernel_functions():
-    behavior = FunctionCallBehavior.AutoInvokeKernelFunctions()
-    new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior)
-    assert new_behavior.type_ == FunctionChoiceType.AUTO
-    assert new_behavior.auto_invoke_kernel_functions is True
-
-
-def test_from_function_call_behavior_required():
-    behavior = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="plugin1-func1")
-    new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior)
-    assert new_behavior.type_ == FunctionChoiceType.REQUIRED
-    assert new_behavior.auto_invoke_kernel_functions is True
-    assert new_behavior.filters == {"included_functions": ["plugin1-func1"]}
-
-
-def test_from_function_call_behavior_enabled_functions():
-    expected_filters = {"included_functions": ["plugin1-func1"]}
-    behavior = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters=expected_filters)
-    new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior)
-    assert new_behavior.type_ == FunctionChoiceType.AUTO
-    assert new_behavior.auto_invoke_kernel_functions is True
-    assert new_behavior.filters == expected_filters
-
-
-def test_from_function_call_behavior():
-    behavior = FunctionCallBehavior()
-    new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior)
-    assert new_behavior is not None
-    assert new_behavior.enable_kernel_functions == behavior.enable_kernel_functions
-    assert new_behavior.maximum_auto_invoke_attempts == behavior.max_auto_invoke_attempts
-
-
 @pytest.mark.parametrize(("type", "max_auto_invoke_attempts"), [("auto", 5), ("none", 0), ("required", 1)])
 def test_auto_function_choice_behavior_from_dict(type: str, max_auto_invoke_attempts: int):
     data = {
diff --git a/python/tests/unit/contents/test_chat_history_reducer_utils.py b/python/tests/unit/contents/test_chat_history_reducer_utils.py
new file mode 100644
index 000000000000..b2f6ac2e282f
--- /dev/null
+++ b/python/tests/unit/contents/test_chat_history_reducer_utils.py
@@ -0,0 +1,196 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import pytest
+
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.function_call_content import FunctionCallContent
+from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import (
+    SUMMARY_METADATA_KEY,
+    contains_function_call_or_result,
+    extract_range,
+    get_call_result_pairs,
+    locate_safe_reduction_index,
+    locate_summarization_boundary,
+)
+from semantic_kernel.contents.utils.author_role import AuthorRole
+
+
+@pytest.fixture
+def chat_messages_with_pairs():
+    msgs = []
+
+    # 1) Summary message at index 0 (system)
+    msg_summary = ChatMessageContent(role=AuthorRole.SYSTEM, content="Summary so far.")
+    msg_summary.metadata[SUMMARY_METADATA_KEY] = True
+    msgs.append(msg_summary)
+
+    # 2) Normal user message
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="User says hello."))
+
+    # 3) Function call (call ID = "call1")
+    msg_func_call_1 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Function call #1")
+    func_call_content_1 = FunctionCallContent(id="call1", function_name="funcA", arguments={"param": "valA"})
+    msg_func_call_1.items.append(func_call_content_1)
+    msgs.append(msg_func_call_1)
+
+    # 4) Function result for call1
+    msg_func_result_1 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Result for call #1")
+    func_result_content_1 = FunctionResultContent(id="call1", content="Function #1 result text")
+    msg_func_result_1.items.append(func_result_content_1)
+    msgs.append(msg_func_result_1)
+
+    # 5) Another user message
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Another user message."))
+
+    # 6) Another function call (call ID = "call2")
+    msg_func_call_2 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Function call #2")
+    func_call_content_2 = FunctionCallContent(id="call2", function_name="funcB", arguments={"param": "valB"})
+    msg_func_call_2.items.append(func_call_content_2)
+    msgs.append(msg_func_call_2)
+
+    # 7) Another user message (no result yet for "call2")
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Wait, function result not yet?"))
+
+    # 8) Unrelated function result (call ID = "callX" doesn't match any prior call)
+    msg_func_result_x = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Result for unknown call")
+    func_result_content_x = FunctionResultContent(id="callX", content="No matching call.")
+    msg_func_result_x.items.append(func_result_content_x)
+    msgs.append(msg_func_result_x)
+
+    # 9) Function result for call2
+    msg_func_result_2 = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Result for call #2")
+    func_result_content_2 = FunctionResultContent(id="call2", content="Function #2 result text")
+    msg_func_result_2.items.append(func_result_content_2)
+    msgs.append(msg_func_result_2)
+
+    return msgs
+
+
+def test_get_call_result_pairs_fixture_has_pairs(chat_messages_with_pairs):
+    """
+    Since 'chat_messages_with_pairs' includes function calls with IDs,
+    we expect pairs. Specifically:
+      - (2,3) for call1
+      - (5,8) for call2
+    """
+    pairs = get_call_result_pairs(chat_messages_with_pairs)
+    assert (2, 3) in pairs, "Expected pair for (call1) in indexes (2,3)."
+    assert (5, 8) in pairs, "Expected pair for (call2) in indexes (5,8)."
+    assert len(pairs) == 2, "Fixture should produce exactly two matched call->result pairs."
+
+
+@pytest.mark.parametrize(
+    "message_items,expected",
+    [
+        ([], False),
+        ([FunctionCallContent(function_name="funcA", arguments={})], True),
+        ([FunctionResultContent(id="test", content="Result")], True),
+    ],
+)
+def test_contains_function_call_or_result(message_items, expected):
+    msg = ChatMessageContent(role=AuthorRole.USER, content="Test")
+    msg.items.extend(message_items)
+    assert contains_function_call_or_result(msg) == expected
+
+
+def test_extract_range_preserve_pairs(chat_messages_with_pairs):
+    """
+    Tests that extract_range with preserve_pairs=True keeps or skips
+    call/result pairs together. We'll slice from index=2 to index=9
+    in the updated fixture.
+    """
+    extracted = extract_range(
+        chat_messages_with_pairs,
+        start=2,
+        end=9,  # exclusive of index=9
+        preserve_pairs=True,
+    )
+
+    # Indices in range(2..9) => 2,3,4,5,6,7,8
+    # The code should preserve both pairs if they're fully in the slice.
+    # Pairs are (2,3) and (5,8). They are indeed fully inside [2..9).
+    # So we expect to keep them plus indices 4,6,7. That totals 7 messages.
+    assert len(extracted) == 7
+
+    # Instead of asserting exact positional equality, just check we
+    # have the same set of messages from 2..9 (no duplicates or omissions).
+    expected_slice = chat_messages_with_pairs[2:9]  # indexes 2..8
+    assert set(extracted) == set(expected_slice), "Expected messages 2..8 to be returned."
+
+
+def test_extract_range_preserve_pairs_call_outside_slice(chat_messages_with_pairs):
+    """
+    If a function call is outside the start/end range but the result is inside,
+    we do NOT have to preserve that pair since it's partially out of range.
+    We'll pick start=4, end=9 => indices 4..8.
+    """
+    extracted = extract_range(chat_messages_with_pairs, start=4, end=9, preserve_pairs=True)
+
+    # Indices in range(4..9) => 4,5,6,7,8
+    # Pairs: (2,3) is outside, (5,8) is fully inside. So (5,8) is kept together.
+    # The final set of messages is [4,5,6,7,8] => 5 total.
+    assert len(extracted) == 5
+
+    expected_slice = chat_messages_with_pairs[4:9]  # indexes 4..8
+    assert set(extracted) == set(expected_slice), "Expected messages 4..8 to be returned."
+
+    # (2,3) do not appear, and that's correct since they're outside this slice.
+
+
+def test_locate_summarization_boundary_empty():
+    # Edge case: empty history => boundary = 0
+    empty_history = []
+    assert locate_summarization_boundary(empty_history) == 0
+
+
+def test_locate_safe_reduction_index_multiple_calls(chat_messages_with_pairs):
+    """
+    If we set a small target_count, the code will attempt to find a safe
+    reduction index that doesn't orphan a function call/result pair.
+    """
+    total_count = len(chat_messages_with_pairs)  # 9
+    target_count = 4
+    idx = locate_safe_reduction_index(
+        chat_messages_with_pairs,
+        target_count=target_count,
+        threshold_count=0,
+        offset_count=0,
+    )
+    # We expect a valid index because total_count (9) > target_count (4).
+    assert idx is not None and 0 < idx < total_count
+
+    # Verify that from idx onward, we haven't split a matched call->result pair.
+    pairs = get_call_result_pairs(chat_messages_with_pairs)
+    for call_i, result_i in pairs:
+        if call_i >= idx:
+            # If the call is in the reduced set, the result must be in the reduced set:
+            assert result_i >= idx
+        if result_i >= idx:
+            # If the result is in the reduced set, the call must be in the reduced set:
+            assert call_i >= idx
+
+
+def test_locate_safe_reduction_index_high_offset(chat_messages_with_pairs):
+    """
+    If offset_count is large, we might not be able to reduce. Then the function
+    can return None if no valid reduction can be found after skipping the offset.
+    """
+    target_count = 3
+    threshold_count = 0
+    offset_count = 5
+
+    idx = locate_safe_reduction_index(
+        chat_messages_with_pairs,
+        target_count=target_count,
+        threshold_count=threshold_count,
+        offset_count=offset_count,
+    )
+
+    # Possibly None if we cannot reduce after skipping the first 5 messages.
+    if idx is not None:
+        # Then it must be >= offset_count
+        assert idx >= offset_count
+    else:
+        # It's fine if it returns None, meaning no valid safe reduction was found.
+        pass
diff --git a/python/tests/unit/contents/test_chat_history_summarization_reducer.py b/python/tests/unit/contents/test_chat_history_summarization_reducer.py
new file mode 100644
index 000000000000..35e13c969522
--- /dev/null
+++ b/python/tests/unit/contents/test_chat_history_summarization_reducer.py
@@ -0,0 +1,202 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.const import DEFAULT_SERVICE_NAME
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import SUMMARY_METADATA_KEY
+from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import (
+    ChatHistorySummarizationReducer,
+)
+from semantic_kernel.contents.utils.author_role import AuthorRole
+from semantic_kernel.exceptions.content_exceptions import ChatHistoryReducerException
+
+
+@pytest.fixture
+def mock_service():
+    """Returns a mock ChatCompletionClientBase with required methods."""
+    service = MagicMock(spec=ChatCompletionClientBase)
+    # Mock the get_prompt_execution_settings_class to return a placeholder
+    service.get_prompt_execution_settings_class.return_value = MagicMock(return_value=MagicMock(service_id="foo"))
+    # Mock the async call get_chat_message_content
+    service.get_chat_message_content = AsyncMock()
+    return service
+
+
+@pytest.fixture
+def chat_messages():
+    """Returns a list of ChatMessageContent objects with default roles."""
+    msgs = []
+
+    # Existing summary
+    summary_msg = ChatMessageContent(role=AuthorRole.SYSTEM, content="Prior summary.")
+    summary_msg.metadata[SUMMARY_METADATA_KEY] = True
+    msgs.append(summary_msg)
+
+    # Normal user messages
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Hello!"))
+    msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Hi there."))
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="What can you do?"))
+    msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="I can help with tasks."))
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="Ok, let's do something."))
+    return msgs
+
+
+def test_summarization_reducer_init(mock_service):
+    reducer = ChatHistorySummarizationReducer(
+        service=mock_service,
+        target_count=10,
+        service_id="my_service",
+        threshold_count=5,
+        summarization_instructions="Custom instructions",
+        use_single_summary=False,
+        fail_on_error=False,
+    )
+
+    assert reducer.service == mock_service
+    assert reducer.target_count == 10
+    assert reducer.service_id == "my_service"
+    assert reducer.threshold_count == 5
+    assert reducer.summarization_instructions == "Custom instructions"
+    assert reducer.use_single_summary is False
+    assert reducer.fail_on_error is False
+
+
+def test_summarization_reducer_defaults(mock_service):
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=5)
+    # Check default property values
+    assert reducer.threshold_count == 0
+    assert reducer.summarization_instructions in reducer.summarization_instructions
+    assert reducer.use_single_summary is True
+    assert reducer.fail_on_error is True
+    assert reducer.service_id == DEFAULT_SERVICE_NAME
+
+
+def test_summarization_reducer_eq_and_hash(mock_service):
+    r1 = ChatHistorySummarizationReducer(service=mock_service, target_count=5, threshold_count=2)
+    r2 = ChatHistorySummarizationReducer(service=mock_service, target_count=5, threshold_count=2)
+    r3 = ChatHistorySummarizationReducer(service=mock_service, target_count=6, threshold_count=2)
+    assert r1 == r2
+    assert r1 != r3
+
+    # Test hash
+    assert hash(r1) == hash(r2)
+    assert hash(r1) != hash(r3)
+
+
+async def test_summarization_reducer_reduce_no_need(chat_messages, mock_service):
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=10, threshold_count=0)
+
+    # If len(history) <= target_count => None
+    result = await reducer.reduce()
+    assert result is None
+    mock_service.get_chat_message_content.assert_not_awaited()
+
+
+async def test_summarization_reducer_reduce_needed(mock_service):
+    messages = [
+        # A summary message (as in the original test)
+        ChatMessageContent(role=AuthorRole.SYSTEM, content="Existing summary", metadata={SUMMARY_METADATA_KEY: True}),
+        # Enough additional messages so total is > 4
+        ChatMessageContent(role=AuthorRole.USER, content="User says hello"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
+        ChatMessageContent(role=AuthorRole.USER, content="User says more"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"),
+    ]
+
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=1)
+    reducer.messages = messages  # Set the chat history
+
+    # Mock that the service will return a single summary message
+    summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="This is a summary.")
+    mock_service.get_chat_message_content.return_value = summary_content
+
+    result = await reducer.reduce()
+    assert result is not None, "We expect a shortened list with a new summary inserted."
+    assert len(result) <= 5, "The resulting list should be shortened to around target_count + threshold_count."
+    assert any(msg.metadata.get(SUMMARY_METADATA_KEY) for msg in result), (
+        "We expect to see a newly inserted summary message."
+    )
+
+
+async def test_summarization_reducer_reduce_no_messages_to_summarize(mock_service):
+    # If we do use_single_summary=False, the older_range_start is insertion_point
+    # In that scenario, if insertion_point == older_range_end => no messages to summarize => return None
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=1, use_single_summary=False)
+
+    # Provide just one message flagged as summary => insertion_point=0, so older_range_start=0, older_range_end=0
+    only_summary = [
+        ChatMessageContent(role=AuthorRole.SYSTEM, content="Only summary.", metadata={SUMMARY_METADATA_KEY: True})
+    ]
+
+    reducer.add_message(only_summary[0])
+    result = await reducer.reduce()
+    assert result is None
+    mock_service.get_chat_message_content.assert_not_awaited()
+
+
+async def test_summarization_reducer_reduce_summarizer_returns_none(mock_service):
+    # If the summarizer yields no messages, we return None
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3)
+    # Provide enough messages that summarization would normally occur
+    messages = [
+        ChatMessageContent(role=AuthorRole.SYSTEM, content="Existing summary", metadata={SUMMARY_METADATA_KEY: True}),
+        ChatMessageContent(role=AuthorRole.USER, content="User asks something"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant replies"),
+        ChatMessageContent(role=AuthorRole.USER, content="Another user query"),
+    ]
+    reducer.messages = messages
+
+    # Summarizer returns None
+    mock_service.get_chat_message_content.return_value = None
+
+    result = await reducer.reduce()
+    assert result is None, "If the summarizer yields no message, we return None."
+
+
+async def test_summarization_reducer_reduce_summarization_fails(mock_service):
+    # If summarization fails, we raise if fail_on_error=True
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, fail_on_error=True)
+    # Enough messages to trigger summarization
+    messages = [
+        ChatMessageContent(role=AuthorRole.USER, content="Msg1"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Msg2"),
+        ChatMessageContent(role=AuthorRole.USER, content="Msg3"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Msg4"),
+    ]
+    reducer.messages = messages
+
+    mock_service.get_chat_message_content.side_effect = Exception("Summarizer error")
+
+    with pytest.raises(ChatHistoryReducerException, match="failed"):
+        await reducer.reduce()
+
+
+async def test_summarization_reducer_reduce_summarization_fails_no_raise(chat_messages, mock_service):
+    # If summarization fails, but fail_on_error=False, we just log and return None
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, fail_on_error=False)
+    mock_service.get_chat_message_content.side_effect = Exception("Summarizer error")
+    reducer.messages = chat_messages
+    result = await reducer.reduce()
+    assert result is None
+
+
+async def test_summarization_reducer_private_summarize(mock_service):
+    """Directly test the _summarize method for coverage."""
+    reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=5)
+    chat_messages = [
+        ChatMessageContent(role=AuthorRole.USER, content="Message1"),
+        ChatMessageContent(role=AuthorRole.ASSISTANT, content="Message2"),
+    ]
+
+    summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Mock Summary")
+    mock_service.get_chat_message_content.return_value = summary_content
+
+    actual_summary = await reducer._summarize(chat_messages)
+    assert actual_summary is not None, "We should get a summary message back."
+    assert actual_summary.content == "Mock Summary", "We expect the mock summary content."
diff --git a/python/tests/unit/contents/test_chat_history_truncation_reducer.py b/python/tests/unit/contents/test_chat_history_truncation_reducer.py
new file mode 100644
index 000000000000..7f94eccf8518
--- /dev/null
+++ b/python/tests/unit/contents/test_chat_history_truncation_reducer.py
@@ -0,0 +1,71 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import pytest
+
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
+from semantic_kernel.contents.utils.author_role import AuthorRole
+
+
+@pytest.fixture
+def chat_messages():
+    msgs = []
+    msgs.append(ChatMessageContent(role=AuthorRole.SYSTEM, content="System message."))
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="User message 1"))
+    msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant message 1"))
+    msgs.append(ChatMessageContent(role=AuthorRole.USER, content="User message 2"))
+    msgs.append(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant message 2"))
+    return msgs
+
+
+def test_truncation_reducer_init():
+    reducer = ChatHistoryTruncationReducer(target_count=5, threshold_count=2)
+    assert reducer.target_count == 5
+    assert reducer.threshold_count == 2
+
+
+def test_truncation_reducer_defaults():
+    reducer = ChatHistoryTruncationReducer(target_count=5)
+    assert reducer.threshold_count == 0
+
+
+def test_truncation_reducer_eq_and_hash():
+    r1 = ChatHistoryTruncationReducer(target_count=5, threshold_count=2)
+    r2 = ChatHistoryTruncationReducer(target_count=5, threshold_count=2)
+    r3 = ChatHistoryTruncationReducer(target_count=5, threshold_count=1)
+    assert r1 == r2
+    assert r1 != r3
+    assert hash(r1) == hash(r2)
+    assert hash(r1) != hash(r3)
+
+
+async def test_truncation_reducer_no_need(chat_messages):
+    # If total <= target + threshold => returns None
+    reducer = ChatHistoryTruncationReducer(target_count=5, threshold_count=0)
+    result = await reducer.reduce()
+    assert result is None
+
+
+async def test_truncation_reducer_no_truncation_index_found():
+    # If the safe reduction index < 0, returns None
+    # We'll craft a scenario where the number of messages is big,
+    # but the function can't find a safe index to cut
+    msgs = [ChatMessageContent(role=AuthorRole.USER, content="Msg")] * 10
+    # Suppose threshold_count is huge, so effectively we can't reduce
+    reducer = ChatHistoryTruncationReducer(target_count=3, threshold_count=10)
+    reducer.messages = msgs
+    result = await reducer.reduce()
+    assert result is None
+
+
+async def test_truncation_reducer_truncation(chat_messages):
+    # Force a smaller target so we do need to reduce
+    reducer = ChatHistoryTruncationReducer(target_count=2)
+    reducer.messages = chat_messages
+    result = await reducer.reduce()
+    # We expect only 2 messages remain after truncation
+    assert result is not None
+    assert len(result) == 2
+    # They should be the last 2 messages
+    assert result[0] == chat_messages[-2]
+    assert result[1] == chat_messages[-1]