From 7de6367e5f504e3dfa47695d3a7e45a42485fb55 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 1 Oct 2024 14:51:41 -0700 Subject: [PATCH 01/51] Update task_query_response.prompty remove required keys --- .../simulator/_prompty/task_query_response.prompty | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty index 881d00493ff8..42a5d3fe4e37 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty @@ -3,11 +3,6 @@ name: TaskSimulatorQueryResponse description: Gets queries and responses from a blob of text model: api: chat - configuration: - type: azure_openai - azure_deployment: ${env:AZURE_DEPLOYMENT} - api_key: ${env:AZURE_OPENAI_API_KEY} - azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From f288b341820d9f54f7830dae8f841035b4f30df6 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 1 Oct 2024 14:51:54 -0700 Subject: [PATCH 02/51] Update task_simulate.prompty --- .../ai/evaluation/simulator/_prompty/task_simulate.prompty | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 7dce5e28a6d1..1d8e360b56b9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -3,10 +3,6 @@ name: TaskSimulatorWithPersona description: Simulates a user to complete a conversation model: api: chat - configuration: - type: azure_openai - azure_deployment: ${env:AZURE_DEPLOYMENT} - azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From 2a4b6f744a9a6c8faee8c742f0ad55d5cf82b922 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 2 Oct 2024 07:21:58 -0700 Subject: [PATCH 03/51] Update task_query_response.prompty --- .../evaluation/simulator/_prompty/task_query_response.prompty | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty index 42a5d3fe4e37..b8c04fb19ef1 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty @@ -3,6 +3,10 @@ name: TaskSimulatorQueryResponse description: Gets queries and responses from a blob of text model: api: chat + configuration: + type: azure_openai + azure_deployment: ${env:AZURE_DEPLOYMENT} + azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From c8ce251bc34b2c3913f1d7e793ed65292e6a2e24 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 2 Oct 2024 07:22:17 -0700 Subject: [PATCH 04/51] Update task_simulate.prompty --- .../ai/evaluation/simulator/_prompty/task_simulate.prompty | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 1d8e360b56b9..7dce5e28a6d1 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -3,6 +3,10 @@ name: TaskSimulatorWithPersona description: Simulates a user to complete a conversation model: api: chat + configuration: + type: azure_openai + azure_deployment: ${env:AZURE_DEPLOYMENT} + azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From e4cdd30f1189977531d90f89dff8248e41537f23 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 9 Oct 2024 14:24:35 -0700 Subject: [PATCH 05/51] Fix the api_key needed --- .../azure/ai/evaluation/_model_configurations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_model_configurations.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_model_configurations.py index 43114d3605c3..f9b8d64c9d5d 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_model_configurations.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_model_configurations.py @@ -16,7 +16,7 @@ class AzureOpenAIModelConfiguration(TypedDict): """Name of Azure OpenAI deployment to make request to""" azure_endpoint: str """Endpoint of Azure OpenAI resource to make request to""" - api_key: str + api_key: NotRequired[str] """API key of Azure OpenAI resource""" api_version: NotRequired[str] """(Optional) API version to use in request to Azure OpenAI deployment""" From b478651c1c77e137f535e92997770c4873edc917 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 16 Oct 2024 09:45:04 -0700 Subject: [PATCH 06/51] Update for release --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index f7da251f03bd..0e92ee34a330 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -1,8 +1,6 @@ # Release History -## 1.0.0b4 (Unreleased) - -### Features Added +## 1.0.0b4 (2024-10-16) ### Breaking Changes From 8e5a264b835c184295c396e6816b747d64f158a0 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 16 Oct 2024 10:49:11 -0700 Subject: [PATCH 07/51] Black fix for file --- .../azure/ai/evaluation/simulator/_helpers/_experimental.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_experimental.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_experimental.py index 6728a61649c6..ca676c9bcdc9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_experimental.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_experimental.py @@ -27,13 +27,11 @@ @overload -def experimental(wrapped: Type[T]) -> Type[T]: - ... +def experimental(wrapped: Type[T]) -> Type[T]: ... @overload -def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: - ... +def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: ... def experimental(wrapped: Union[Type[T], Callable[P, T]]) -> Union[Type[T], Callable[P, T]]: From 3a80606d08c319a9c6879e772d84aced41c2fd19 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 17 Oct 2024 14:12:06 -0700 Subject: [PATCH 08/51] Add original text in global context --- .../azure/ai/evaluation/simulator/_simulator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 06a62a97781a..1a4b52fa7a5f 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -172,6 +172,7 @@ async def __call__( user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, target=target, api_call_delay_sec=api_call_delay_sec, + text=text ) async def _simulate_with_predefined_turns( @@ -497,6 +498,7 @@ async def _create_conversations_from_query_responses( user_simulator_prompty_kwargs: Dict[str, Any], target: Callable, api_call_delay_sec: float, + text: str, ) -> List[JsonLineChatProtocol]: """ Creates full conversations from query-response pairs. @@ -515,6 +517,8 @@ async def _create_conversations_from_query_responses( :paramtype target: Callable :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float + :keyword text: The initial input text for generating query responses. + :paramtype text: str :return: A list of simulated conversations represented as JsonLineChatProtocol objects. :rtype: List[JsonLineChatProtocol] """ @@ -552,6 +556,7 @@ async def _create_conversations_from_query_responses( "task": task, "expected_response": response, "query": query, + "original_text": text, }, "$schema": "http://azureml/sdk-2-0/ChatConversation.json", } From 6768f9a5f0a8449f1e172f3eaf68a1bd5afbc3b7 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 17 Oct 2024 14:13:47 -0700 Subject: [PATCH 09/51] Update test --- .../tests/unittests/test_non_adv_simulator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py index b98d5940bba6..592abfa0dde3 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py @@ -353,6 +353,7 @@ async def test_create_conversations_from_query_responses( api_call_delay_sec=1, user_simulator_prompty=None, user_simulator_prompty_kwargs={}, + text="some text", ) assert len(result) == 1 From f7cc4bb1b3f7f8de6c73f41eeec20ed6702ea772 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:38:43 -0700 Subject: [PATCH 10/51] Update the indirect attack simulator --- .../simulator/_indirect_attack_simulator.py | 107 ++++++++++++------ 1 file changed, 74 insertions(+), 33 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index 83f17254be3c..ce4178274fb1 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -3,15 +3,18 @@ # --------------------------------------------------------- # pylint: disable=C0301,C0114,R0913,R0903 # noqa: E501 +import asyncio import logging from typing import Callable, cast +from tqdm import tqdm + from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException -from azure.ai.evaluation.simulator import AdversarialScenario +from azure.ai.evaluation.simulator import AdversarialScenario, SupportedLanguages from azure.core.credentials import TokenCredential -from ._adversarial_simulator import AdversarialSimulator +from ._adversarial_simulator import AdversarialSimulator, JsonLineList from ._helpers import experimental from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope @@ -19,7 +22,7 @@ @experimental -class IndirectAttackSimulator: +class IndirectAttackSimulator(AdversarialSimulator): """ Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope. @@ -69,29 +72,22 @@ def _ensure_service_dependencies(self): async def __call__( self, *, - scenario: AdversarialScenario, target: Callable, - max_conversation_turns: int = 1, max_simulation_results: int = 3, api_call_retry_limit: int = 3, api_call_retry_sleep_sec: int = 1, api_call_delay_sec: int = 0, concurrent_async_task: int = 3, + **kwargs, ): """ Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope. This simulator converses with your AI system using prompts injected into the context to interrupt normal expected functionality by eliciting manipulated content, intrusion and attempting to gather information outside the scope of your AI system. - - :keyword scenario: Enum value specifying the adversarial scenario used for generating inputs. - :paramtype scenario: azure.ai.evaluation.simulator.AdversarialScenario :keyword target: The target function to simulate adversarial inputs against. This function should be asynchronous and accept a dictionary representing the adversarial input. :paramtype target: Callable - :keyword max_conversation_turns: The maximum number of conversation turns to simulate. - Defaults to 1. - :paramtype max_conversation_turns: int :keyword max_simulation_results: The maximum number of simulation results to return. Defaults to 3. :paramtype max_simulation_results: int @@ -128,11 +124,11 @@ async def __call__( 'template_parameters': {}, 'messages': [ { - 'content': ' ', + 'content': '', 'role': 'user' }, { - 'content': "", + 'content': "", 'role': 'assistant', 'context': None } @@ -141,25 +137,70 @@ async def __call__( }] } """ - if scenario not in AdversarialScenario.__members__.values(): - msg = f"Invalid scenario: {scenario}. Supported scenarios: {AdversarialScenario.__members__.values()}" - raise EvaluationException( - message=msg, - internal_message=msg, - target=ErrorTarget.DIRECT_ATTACK_SIMULATOR, - category=ErrorCategory.INVALID_VALUE, - blame=ErrorBlame.USER_ERROR, + # values that cannot be changed: + scenario = AdversarialScenario.ADVERSARIAL_INDIRECT_JAILBREAK + max_conversation_turns = 2 + language = SupportedLanguages.English + self._ensure_service_dependencies() + templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value) + concurrent_async_task = min(concurrent_async_task, 1000) + semaphore = asyncio.Semaphore(concurrent_async_task) + sim_results = [] + tasks = [] + total_tasks = sum(len(t.template_parameters) for t in templates) + if max_simulation_results > total_tasks: + logger.warning( + "Cannot provide %s results due to maximum number of adversarial simulations that can be generated: %s." + "\n %s simulations will be generated.", + max_simulation_results, + total_tasks, + total_tasks, ) - jb_sim = AdversarialSimulator(azure_ai_project=cast(dict, self.azure_ai_project), credential=self.credential) - jb_sim_results = await jb_sim( - scenario=scenario, - target=target, - max_conversation_turns=max_conversation_turns, - max_simulation_results=max_simulation_results, - api_call_retry_limit=api_call_retry_limit, - api_call_retry_sleep_sec=api_call_retry_sleep_sec, - api_call_delay_sec=api_call_delay_sec, - concurrent_async_task=concurrent_async_task, - _jailbreak_type="xpia", + total_tasks = min(total_tasks, max_simulation_results) + progress_bar = tqdm( + total=total_tasks, + desc="generating jailbreak simulations", + ncols=100, + unit="simulations", ) - return jb_sim_results + for template in templates: + for parameter in template.template_parameters: + tasks.append( + asyncio.create_task( + self._simulate_async( + target=target, + template=template, + parameters=parameter, + max_conversation_turns=max_conversation_turns, + api_call_retry_limit=api_call_retry_limit, + api_call_retry_sleep_sec=api_call_retry_sleep_sec, + api_call_delay_sec=api_call_delay_sec, + language=language, + semaphore=semaphore, + ) + ) + ) + if len(tasks) >= max_simulation_results: + break + if len(tasks) >= max_simulation_results: + break + for task in asyncio.as_completed(tasks): + completed_task = await task + template_parameters = completed_task.get("template_parameters", {}) + xpia_attack_type = template_parameters.get("xpia_attack_type", "") + action = template_parameters.get("action", "") + document_type = template_parameters.get("document_type", "") + sim_results.append({ + "messages": completed_task["messages"], + "$schema": "http://azureml/sdk-2-0/ChatConversation.json", + "template_parameters": { + "metadata": { + "xpia_attack_type": xpia_attack_type, + "action": action, + "document_type": document_type, + }, + }, + }) + progress_bar.update(1) + progress_bar.close() + return JsonLineList(sim_results) From 07eb46678ecf0723ecbb6ea0265fdade0abb7185 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:39:43 -0700 Subject: [PATCH 11/51] Black suggested fixes --- .../simulator/_indirect_attack_simulator.py | 22 ++++++++++--------- .../ai/evaluation/simulator/_simulator.py | 4 ++-- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index ce4178274fb1..79b987f2a595 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -190,17 +190,19 @@ async def __call__( xpia_attack_type = template_parameters.get("xpia_attack_type", "") action = template_parameters.get("action", "") document_type = template_parameters.get("document_type", "") - sim_results.append({ - "messages": completed_task["messages"], - "$schema": "http://azureml/sdk-2-0/ChatConversation.json", - "template_parameters": { - "metadata": { - "xpia_attack_type": xpia_attack_type, - "action": action, - "document_type": document_type, + sim_results.append( + { + "messages": completed_task["messages"], + "$schema": "http://azureml/sdk-2-0/ChatConversation.json", + "template_parameters": { + "metadata": { + "xpia_attack_type": xpia_attack_type, + "action": action, + "document_type": document_type, + }, }, - }, - }) + } + ) progress_bar.update(1) progress_bar.close() return JsonLineList(sim_results) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 1a4b52fa7a5f..f2c529cd011c 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -172,7 +172,7 @@ async def __call__( user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, target=target, api_call_delay_sec=api_call_delay_sec, - text=text + text=text, ) async def _simulate_with_predefined_turns( @@ -517,7 +517,7 @@ async def _create_conversations_from_query_responses( :paramtype target: Callable :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float - :keyword text: The initial input text for generating query responses. + :keyword text: The initial input text for generating query responses. :paramtype text: str :return: A list of simulated conversations represented as JsonLineChatProtocol objects. :rtype: List[JsonLineChatProtocol] From 942bfd59e68ffaae698369ccfd0bde89bad30a50 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:41:06 -0700 Subject: [PATCH 12/51] Update simulator prompty --- .../ai/evaluation/simulator/_prompty/task_simulate.prompty | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 1d8e360b56b9..4aa4af9d6a3e 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -25,7 +25,7 @@ Output must be in JSON format Here's a sample output: { "content": "Here is my follow-up question.", - "user": "user" + "role": "user" } Output with a json object that continues the conversation, given the conversation history: From 98cad972ce8d9d2012ffce1002f482f2be2212ad Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:47:00 -0700 Subject: [PATCH 13/51] Update adversarial scenario enum to exclude XPIA --- .../azure/ai/evaluation/simulator/__init__.py | 3 ++- .../azure/ai/evaluation/simulator/_adversarial_scenario.py | 5 +++++ .../ai/evaluation/simulator/_indirect_attack_simulator.py | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/__init__.py index 9011665f66b6..c05842651b2f 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/__init__.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/__init__.py @@ -1,4 +1,4 @@ -from ._adversarial_scenario import AdversarialScenario +from ._adversarial_scenario import AdversarialScenario, AdversarialScenarioJailbreak from ._adversarial_simulator import AdversarialSimulator from ._constants import SupportedLanguages from ._direct_attack_simulator import DirectAttackSimulator @@ -8,6 +8,7 @@ __all__ = [ "AdversarialSimulator", "AdversarialScenario", + "AdversarialScenarioJailbreak", "DirectAttackSimulator", "IndirectAttackSimulator", "SupportedLanguages", diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py index 8588bf0d3947..a8b4489b130d 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py @@ -16,6 +16,11 @@ class AdversarialScenario(Enum): ADVERSARIAL_CONTENT_GEN_UNGROUNDED = "adv_content_gen_ungrounded" ADVERSARIAL_CONTENT_GEN_GROUNDED = "adv_content_gen_grounded" ADVERSARIAL_CONTENT_PROTECTED_MATERIAL = "adv_content_protected_material" + + +class AdversarialScenarioJailbreak(Enum): + """Adversarial scenario types for XPIA Jailbreak""" + ADVERSARIAL_INDIRECT_JAILBREAK = "adv_xpia" diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index 39ea74ece410..bcb4548d08bd 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -12,7 +12,7 @@ from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._common._experimental import experimental from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException -from azure.ai.evaluation.simulator import AdversarialScenario, SupportedLanguages +from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages from azure.core.credentials import TokenCredential from ._adversarial_simulator import AdversarialSimulator, JsonLineList @@ -140,7 +140,7 @@ async def __call__( } """ # values that cannot be changed: - scenario = AdversarialScenario.ADVERSARIAL_INDIRECT_JAILBREAK + scenario = AdversarialScenarioJailbreak.ADVERSARIAL_INDIRECT_JAILBREAK max_conversation_turns = 2 language = SupportedLanguages.English self._ensure_service_dependencies() From d5103169f8dbb807dcf3cf143f4d04796912efff Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:49:12 -0700 Subject: [PATCH 14/51] Update changelog --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index e21b4b803103..152233879dac 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -7,6 +7,7 @@ ### Breaking Changes - Renamed environment variable `PF_EVALS_BATCH_USE_ASYNC` to `AI_EVALS_BATCH_USE_ASYNC`. +- AdversarialScenario enum does not include `ADVERSARIAL_INDIRECT_JAILBREAK`, invoking IndirectJailbreak or XPIA should be done with `IndirectAttackSimulator` ### Bugs Fixed - Non adversarial simulator works with `gpt-4o` models using the `json_schema` response format From 742943ef7ed2c26256570c8f55638ccee2a31ab5 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:49:27 -0700 Subject: [PATCH 15/51] Black fixes --- .../azure/ai/evaluation/_common/_experimental.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py index 6728a61649c6..ca676c9bcdc9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py @@ -27,13 +27,11 @@ @overload -def experimental(wrapped: Type[T]) -> Type[T]: - ... +def experimental(wrapped: Type[T]) -> Type[T]: ... @overload -def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: - ... +def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: ... def experimental(wrapped: Union[Type[T], Callable[P, T]]) -> Union[Type[T], Callable[P, T]]: From 12e06155f2b8068886b56ac5ad7c9c16787ddf87 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 16:52:03 -0700 Subject: [PATCH 16/51] Remove duplicate import --- .../azure/ai/evaluation/simulator/_indirect_attack_simulator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index bcb4548d08bd..dc3c92789330 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -16,7 +16,6 @@ from azure.core.credentials import TokenCredential from ._adversarial_simulator import AdversarialSimulator, JsonLineList -from ._helpers import experimental from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope From de32b50eb491ad46b5c35fe333eebad9c7e852be Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 18 Oct 2024 18:16:38 -0700 Subject: [PATCH 17/51] Fix the mypy error --- .../simulator/_indirect_attack_simulator.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index dc3c92789330..e9426a309799 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -5,7 +5,7 @@ # noqa: E501 import asyncio import logging -from typing import Callable, cast +from typing import Any, Callable, Dict, cast from tqdm import tqdm @@ -58,6 +58,7 @@ def __init__(self, *, azure_ai_project: dict, credential): self.adversarial_template_handler = AdversarialTemplateHandler( azure_ai_project=self.azure_ai_project, rai_client=self.rai_client ) + super().__init__(azure_ai_project=azure_ai_project, credential=credential) def _ensure_service_dependencies(self): if self.rai_client is None: @@ -186,11 +187,11 @@ async def __call__( if len(tasks) >= max_simulation_results: break for task in asyncio.as_completed(tasks): - completed_task = await task - template_parameters = completed_task.get("template_parameters", {}) - xpia_attack_type = template_parameters.get("xpia_attack_type", "") - action = template_parameters.get("action", "") - document_type = template_parameters.get("document_type", "") + completed_task: Dict[str, Any] = await task + template_parameters: Dict[str, Any] = completed_task.get("template_parameters", {}) + xpia_attack_type: str = template_parameters.get("xpia_attack_type", "") + action: str = template_parameters.get("action", "") + document_type: str = template_parameters.get("document_type", "") sim_results.append( { "messages": completed_task["messages"], From 4b6413237d638bad6333e56127953a278096114e Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 21 Oct 2024 09:21:55 -0700 Subject: [PATCH 18/51] Mypy please be happy --- .../simulator/_indirect_attack_simulator.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index e9426a309799..3ffc559d18a6 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -5,7 +5,7 @@ # noqa: E501 import asyncio import logging -from typing import Any, Callable, Dict, cast +from typing import Callable, cast from tqdm import tqdm @@ -187,14 +187,14 @@ async def __call__( if len(tasks) >= max_simulation_results: break for task in asyncio.as_completed(tasks): - completed_task: Dict[str, Any] = await task - template_parameters: Dict[str, Any] = completed_task.get("template_parameters", {}) - xpia_attack_type: str = template_parameters.get("xpia_attack_type", "") - action: str = template_parameters.get("action", "") - document_type: str = template_parameters.get("document_type", "") + completed_task = await task # type: ignore + template_parameters = completed_task.get("template_parameters", {}) # type: ignore + xpia_attack_type = template_parameters.get("xpia_attack_type", "") # type: ignore + action = template_parameters.get("action", "") # type: ignore + document_type = template_parameters.get("document_type", "") # type: ignore sim_results.append( { - "messages": completed_task["messages"], + "messages": completed_task["messages"], # type: ignore "$schema": "http://azureml/sdk-2-0/ChatConversation.json", "template_parameters": { "metadata": { From 1c0b4dd68c32d9c2363657616c6724eef0b2b238 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 22 Oct 2024 08:14:38 -0700 Subject: [PATCH 19/51] Updates to non adv simulator --- sdk/evaluation/azure-ai-evaluation/README.md | 44 +++++++++---------- .../_prompty/task_query_response.prompty | 8 ++-- .../simulator/_prompty/task_simulate.prompty | 5 +++ .../ai/evaluation/simulator/_simulator.py | 29 +++++++----- 4 files changed, 48 insertions(+), 38 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/README.md b/sdk/evaluation/azure-ai-evaluation/README.md index a657c4d55577..bfe1a07e31df 100644 --- a/sdk/evaluation/azure-ai-evaluation/README.md +++ b/sdk/evaluation/azure-ai-evaluation/README.md @@ -199,28 +199,28 @@ On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. Some years later, research firms IDC and Gartner reported that Apple's market share in the U.S. had increased to about 6%. <|text_end|> Output with 5 QnAs: -[ - { - "q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?", - "r": "January 24, 1984" - }, - { - "q": "Who was the former Apple CEO that introduced the first Macintosh on January 24, 1984?", - "r": "Steve Jobs" - }, - { - "q": "What percent of the desktop share did Apple have in the United States in late 2003?", - "r": "2.06 percent" - }, - { - "q": "What were the research firms that reported on Apple's market share in the U.S.?", - "r": "IDC and Gartner" - }, - { - "q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?", - "r": "6%" - } -] +{ + "qna": [{ + "q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?", + "r": "January 24, 1984" + }, + { + "q": "Who was the former Apple CEO that introduced the first Macintosh on January 24, 1984?", + "r": "Steve Jobs" + }, + { + "q": "What percent of the desktop share did Apple have in the United States in late 2003?", + "r": "2.06 percent" + }, + { + "q": "What were the research firms that reported on Apple's market share in the U.S.?", + "r": "IDC and Gartner" + }, + { + "q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?", + "r": "6%" + }] +} Text: <|text_start|> {{ text }} diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty index 42a5d3fe4e37..08ed1fc8596b 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty @@ -36,8 +36,8 @@ On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. Some years later, research firms IDC and Gartner reported that Apple's market share in the U.S. had increased to about 6%. <|text_end|> Output with 5 QnAs: -[ - { +{ + "qna":[{ "q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?", "r": "January 24, 1984" }, @@ -56,8 +56,8 @@ Output with 5 QnAs: { "q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?", "r": "6%" - } -] + }] +} Text: <|text_start|> {{ text }} diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 4aa4af9d6a3e..225dc3904439 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -16,6 +16,9 @@ inputs: type: string conversation_history: type: dict + action: + type: string + default: "continue the converasation and make sure the task is completed by asking relevant questions" --- system: @@ -30,3 +33,5 @@ Here's a sample output: Output with a json object that continues the conversation, given the conversation history: {{ conversation_history }} + +{{ action }} diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 994b07228235..d46fe6c81340 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -611,8 +611,6 @@ async def _complete_conversation( :rtype: List[Dict[str, Optional[str]]] """ conversation_history = ConversationHistory() - # user_turn = Turn(role=ConversationRole.USER, content=conversation_starter) - # conversation_history.add_to_history(user_turn) while len(conversation_history) < max_conversation_turns: user_flow = self._load_user_simulation_flow( @@ -620,16 +618,23 @@ async def _complete_conversation( prompty_model_config=self.model_config, # type: ignore user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, ) - conversation_starter_from_simulated_user = await user_flow( - task=task, - conversation_history=[ - { - "role": "assistant", - "content": conversation_starter, - "your_task": "Act as the user and translate the content into a user query.", - } - ], - ) + if len(conversation_history) == 0: + conversation_starter_from_simulated_user = await user_flow( + task=task, + conversation_history=[ + { + "role": "assistant", + "content": conversation_starter, + } + ], + action="rewrite the assitant's message as you have to accomplish the task by asking the right questions. Make sure the original question is not lost in your rewrite.", + ) + else: + conversation_starter_from_simulated_user = await user_flow( + task=task, + conversation_history=conversation_history.to_list(), + action="Your goal is to make sure the task is completed by asking the right questions. Do not ask the same questions again.", + ) if isinstance(conversation_starter_from_simulated_user, dict): conversation_starter_from_simulated_user = conversation_starter_from_simulated_user["content"] user_turn = Turn(role=ConversationRole.USER, content=conversation_starter_from_simulated_user) From 6de617cd4786fe52d4382695cc65430c5596d21a Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 10:50:55 -0700 Subject: [PATCH 20/51] accept context from assistant messages, exclude them when using them for conversation --- .../_helpers/_simulator_data_classes.py | 23 +++++++++++++++++- .../ai/evaluation/simulator/_simulator.py | 24 +++++++++---------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py index 109384bc2500..7f1b541a53e6 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py @@ -30,7 +30,19 @@ def to_dict(self) -> Dict[str, Optional[str]]: return { "role": self.role.value if isinstance(self.role, ConversationRole) else self.role, "content": self.content, - "context": self.context, + "context": str(self.context), + } + + def to_context_free_dict(self) -> Dict[str, Optional[str]]: + """ + Convert the conversation turn to a dictionary without context. + + :returns: A dictionary representation of the conversation turn without context. + :rtype: Dict[str, Optional[str]] + """ + return { + "role": self.role.value if isinstance(self.role, ConversationRole) else self.role, + "content": self.content, } def __repr__(self): @@ -65,6 +77,15 @@ def to_list(self) -> List[Dict[str, Optional[str]]]: :rtype: List[Dict[str, str]] """ return [turn.to_dict() for turn in self.history] + + def to_context_free_list(self) -> List[Dict[str, Optional[str]]]: + """ + Converts the conversation history to a list of dictionaries without context. + + :returns: A list of dictionaries representing the conversation turns without context. + :rtype: List[Dict[str, str]] + """ + return [turn.to_context_free_dict() for turn in self.history] def __len__(self) -> int: return len(self.history) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index d46fe6c81340..61b1291e14bf 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -222,10 +222,10 @@ async def _simulate_with_predefined_turns( for simulated_turn in simulation: user_turn = Turn(role=ConversationRole.USER, content=simulated_turn) current_simulation.add_to_history(user_turn) - assistant_response = await self._get_target_response( + assistant_response, assistant_context = await self._get_target_response( target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation ) - assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response) + assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context) current_simulation.add_to_history(assistant_turn) progress_bar.update(1) # Update progress bar for both user and assistant turns @@ -295,17 +295,17 @@ async def _extend_conversation_with_simulator( while len(current_simulation) < max_conversation_turns: user_response_content = await user_flow( task="Continue the conversation", - conversation_history=current_simulation.to_list(), + conversation_history=current_simulation.to_context_free_list(), **user_simulator_prompty_kwargs, ) user_response = self._parse_prompty_response(response=user_response_content) user_turn = Turn(role=ConversationRole.USER, content=user_response["content"]) current_simulation.add_to_history(user_turn) await asyncio.sleep(api_call_delay_sec) - assistant_response = await self._get_target_response( + assistant_response, assistant_context = await self._get_target_response( target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=current_simulation ) - assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response) + assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context) current_simulation.add_to_history(assistant_turn) progress_bar.update(1) @@ -632,17 +632,17 @@ async def _complete_conversation( else: conversation_starter_from_simulated_user = await user_flow( task=task, - conversation_history=conversation_history.to_list(), + conversation_history=conversation_history.to_context_free_list(), action="Your goal is to make sure the task is completed by asking the right questions. Do not ask the same questions again.", ) if isinstance(conversation_starter_from_simulated_user, dict): conversation_starter_from_simulated_user = conversation_starter_from_simulated_user["content"] user_turn = Turn(role=ConversationRole.USER, content=conversation_starter_from_simulated_user) conversation_history.add_to_history(user_turn) - assistant_response = await self._get_target_response( + assistant_response, assistant_context = await self._get_target_response( target=target, api_call_delay_sec=api_call_delay_sec, conversation_history=conversation_history ) - assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response) + assistant_turn = Turn(role=ConversationRole.ASSISTANT, content=assistant_response, context=assistant_context) conversation_history.add_to_history(assistant_turn) progress_bar.update(1) @@ -653,7 +653,7 @@ async def _complete_conversation( async def _get_target_response( self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory - ) -> str: + ) -> str, Optional[str]: """ Retrieves the response from the target callback based on the current conversation history. @@ -663,8 +663,8 @@ async def _get_target_response( :paramtype api_call_delay_sec: float :keyword conversation_history: The current conversation history. :paramtype conversation_history: ConversationHistory - :return: The content of the response from the target. - :rtype: str + :return: The content of the response from the target and an optional context. + :rtype: str, Optional[str] """ response = await target( messages={"messages": conversation_history.to_list()}, @@ -674,4 +674,4 @@ async def _get_target_response( ) await asyncio.sleep(api_call_delay_sec) latest_message = response["messages"][-1] - return latest_message["content"] + return latest_message["content"], latest_message.get("context", "") # type: ignore From 1e5d40c74c3f5ba3b56d185f8c652ecc32e59819 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 10:53:23 -0700 Subject: [PATCH 21/51] update changelog --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 4add9ed69184..1425828f73cc 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -12,6 +12,7 @@ ### Bugs Fixed - Non adversarial simulator works with `gpt-4o` models using the `json_schema` response format - Fix evaluate API failure when `trace.destination` is set to `none` +- Non adversarial simulator now accepts context from the callback ### Other Changes - Improved error messages for the `evaluate` API by enhancing the validation of input parameters. This update provides more detailed and actionable error descriptions. From 93b29c7d2a116e40f61a65668eb9053dec29ff82 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 11:01:49 -0700 Subject: [PATCH 22/51] pylint fixes --- .../evaluation/simulator/_helpers/_simulator_data_classes.py | 4 ++-- .../azure/ai/evaluation/simulator/_simulator.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py index 7f1b541a53e6..6bd57db206bf 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py @@ -32,7 +32,7 @@ def to_dict(self) -> Dict[str, Optional[str]]: "content": self.content, "context": str(self.context), } - + def to_context_free_dict(self) -> Dict[str, Optional[str]]: """ Convert the conversation turn to a dictionary without context. @@ -77,7 +77,7 @@ def to_list(self) -> List[Dict[str, Optional[str]]]: :rtype: List[Dict[str, str]] """ return [turn.to_dict() for turn in self.history] - + def to_context_free_list(self) -> List[Dict[str, Optional[str]]]: """ Converts the conversation history to a list of dictionaries without context. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 61b1291e14bf..94b708ca60de 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -9,7 +9,7 @@ import os import re import warnings -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Union, Tuple from promptflow.core import AsyncPrompty from tqdm import tqdm @@ -653,7 +653,7 @@ async def _complete_conversation( async def _get_target_response( self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory - ) -> str, Optional[str]: + ) -> Tuple[str, Optional[str]]: """ Retrieves the response from the target callback based on the current conversation history. From 8e3ddc316c8ecd0621db457cd44850508e1d015a Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 11:04:00 -0700 Subject: [PATCH 23/51] pylint fixes --- .../evaluation/simulator/_helpers/_simulator_data_classes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py index 6bd57db206bf..a887e1d133b4 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py @@ -32,7 +32,7 @@ def to_dict(self) -> Dict[str, Optional[str]]: "content": self.content, "context": str(self.context), } - + def to_context_free_dict(self) -> Dict[str, Optional[str]]: """ Convert the conversation turn to a dictionary without context. @@ -77,7 +77,7 @@ def to_list(self) -> List[Dict[str, Optional[str]]]: :rtype: List[Dict[str, str]] """ return [turn.to_dict() for turn in self.history] - + def to_context_free_list(self) -> List[Dict[str, Optional[str]]]: """ Converts the conversation history to a list of dictionaries without context. From 4ccc7c8d449e6ff3374d8da205a8fafaf9047d5a Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 11:24:45 -0700 Subject: [PATCH 24/51] remove redundant quotes --- .../ai/evaluation/simulator/_prompty/task_simulate.prompty | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 225dc3904439..00af8c580464 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -18,7 +18,7 @@ inputs: type: dict action: type: string - default: "continue the converasation and make sure the task is completed by asking relevant questions" + default: continue the converasation and make sure the task is completed by asking relevant questions --- system: From bed51962970e4949d739a9de72705761638d1ed0 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 11:46:29 -0700 Subject: [PATCH 25/51] Fix typo --- .../azure/ai/evaluation/simulator/_simulator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 7bdbc8af24d1..814c3e4d369e 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -627,7 +627,7 @@ async def _complete_conversation( "content": conversation_starter, } ], - action="rewrite the assitant's message as you have to accomplish the task by asking the right questions. Make sure the original question is not lost in your rewrite.", + action="rewrite the assistant's message as you have to accomplish the task by asking the right questions. Make sure the original question is not lost in your rewrite.", ) else: conversation_starter_from_simulated_user = await user_flow( From 0fdd6441bd6fe9c866141974c3e6d7f6a461c69f Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 12:10:58 -0700 Subject: [PATCH 26/51] pylint fix --- .../azure/ai/evaluation/simulator/_indirect_attack_simulator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index 6575c0798a53..3ffc559d18a6 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -11,7 +11,6 @@ from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._common._experimental import experimental -from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages from azure.core.credentials import TokenCredential From 1f695ccab667d4c89d70e506ec3029d967bb30f6 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 23 Oct 2024 12:52:40 -0700 Subject: [PATCH 27/51] Update broken tests --- .../tests/unittests/test_non_adv_simulator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py index 592abfa0dde3..8be780461674 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py @@ -161,7 +161,7 @@ async def test_complete_conversation( mock_user_flow = AsyncMock() mock_user_flow.return_value = {"content": "User response"} mock_load_user_simulation_flow.return_value = mock_user_flow - mock_get_target_response.return_value = "Assistant response" + mock_get_target_response.return_value = "Assistant response", "Assistant context" conversation = await simulator._complete_conversation( conversation_starter="Hello", @@ -185,7 +185,7 @@ async def test_get_target_response(self, valid_openai_model_config): mock_target = AsyncMock() mock_target.return_value = { "messages": [ - {"role": "assistant", "content": "Assistant response"}, + {"role": "assistant", "content": "Assistant response", "context": "assistant context"}, ] } response = await simulator._get_target_response( @@ -193,7 +193,7 @@ async def test_get_target_response(self, valid_openai_model_config): api_call_delay_sec=0, conversation_history=AsyncMock(), ) - assert response == "Assistant response" + assert response == ("Assistant response", "assistant context") @pytest.mark.asyncio async def test_call_with_both_conversation_turns_and_text_tasks(self, valid_openai_model_config): @@ -317,7 +317,7 @@ async def test_simulate_with_predefined_turns( self, mock_extend_conversation_with_simulator, mock_get_target_response, valid_openai_model_config ): simulator = Simulator(model_config=valid_openai_model_config) - mock_get_target_response.return_value = "assistant_response" + mock_get_target_response.return_value = "assistant_response", "assistant_context" mock_extend_conversation_with_simulator.return_value = None conversation_turns = [["user_turn"]] From 92c9a6d04bf01bebca18c9bb4fb0132a798da01b Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 25 Oct 2024 09:56:49 -0700 Subject: [PATCH 28/51] Include the grounding json in the manifest --- sdk/evaluation/azure-ai-evaluation/MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/MANIFEST.in b/sdk/evaluation/azure-ai-evaluation/MANIFEST.in index 1aeecacdfc11..7294aaa88864 100644 --- a/sdk/evaluation/azure-ai-evaluation/MANIFEST.in +++ b/sdk/evaluation/azure-ai-evaluation/MANIFEST.in @@ -4,3 +4,4 @@ include azure/__init__.py include azure/ai/__init__.py include azure/ai/evaluation/py.typed recursive-include azure/ai/evaluation *.prompty +include azure/ai/evaluation/simulator/_data_sources *.json From 0673cd5178450db9cf2f3a0b49df57702e98e347 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 25 Oct 2024 10:03:16 -0700 Subject: [PATCH 29/51] Fix typo --- sdk/evaluation/azure-ai-evaluation/MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/MANIFEST.in b/sdk/evaluation/azure-ai-evaluation/MANIFEST.in index 7294aaa88864..fa5dccd6c8f7 100644 --- a/sdk/evaluation/azure-ai-evaluation/MANIFEST.in +++ b/sdk/evaluation/azure-ai-evaluation/MANIFEST.in @@ -4,4 +4,4 @@ include azure/__init__.py include azure/ai/__init__.py include azure/ai/evaluation/py.typed recursive-include azure/ai/evaluation *.prompty -include azure/ai/evaluation/simulator/_data_sources *.json +include azure/ai/evaluation/simulator/_data_sources/grounding.json \ No newline at end of file From 7b360fce457b5bdb4c98e7fac27da75d448c9d54 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 25 Oct 2024 10:11:10 -0700 Subject: [PATCH 30/51] Come on package --- .../azure/ai/evaluation/simulator/_data_sources/__init__.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/__init__.py diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/__init__.py new file mode 100644 index 000000000000..d540fd20468c --- /dev/null +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/__init__.py @@ -0,0 +1,3 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- From c9f38c94a177b299d2d3ae15b5f58392b5534d58 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 28 Oct 2024 06:45:01 -0700 Subject: [PATCH 31/51] Release 1.0.0b5 --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 33fbfa2096fc..eeece2d2ae9d 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -1,7 +1,6 @@ # Release History - -## 1.0.0b5 (Unreleased) +## 1.0.0b5 (2024-10-28) ### Features Added - Added `GroundednessProEvaluator`, which is a service-based evaluator for determining response groundedness. From ed7eed1129bc62ec4fc461c4d520b15329f7ebd7 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 28 Oct 2024 13:51:47 -0700 Subject: [PATCH 32/51] Notice from Chang --- sdk/evaluation/azure-ai-evaluation/NOTICE.txt | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/NOTICE.txt b/sdk/evaluation/azure-ai-evaluation/NOTICE.txt index 9dc8704c7f6e..ec5e545abaef 100644 --- a/sdk/evaluation/azure-ai-evaluation/NOTICE.txt +++ b/sdk/evaluation/azure-ai-evaluation/NOTICE.txt @@ -48,3 +48,23 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + + +License notice for [Is GPT-4 a reliable rater? Evaluating consistency in GPT-4's text ratings](https://www.frontiersin.org/journals/education/articles/10.3389/feduc.2023.1272229/full) +------------------------------------------------------------------------------------------------------------------ +Copyright © 2023 Hackl, Müller, Granitzer and Sailer. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/). + + +License notice for [Is ChatGPT a Good NLG Evaluator? A Preliminary Study](https://aclanthology.org/2023.newsum-1.1) (Wang et al., NewSum 2023) +------------------------------------------------------------------------------------------------------------------ +Copyright © 2023. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/). + + +License notice for [SummEval: Re-evaluating Summarization Evaluation.](https://doi.org/10.1162/tacl_a_00373) (Fabbri et al.) +------------------------------------------------------------------------------------------------------------------ +© 2021 Association for Computational Linguistics. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/). + + +License notice for [Evaluation Metrics in the Era of GPT-4: Reliably Evaluating Large Language Models on Sequence to Sequence Tasks](https://aclanthology.org/2023.emnlp-main.543) (Sottana et al., EMNLP 2023) +------------------------------------------------------------------------------------------------------------------ +© 2023 Association for Computational Linguistics. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/). \ No newline at end of file From 3de5b660335a7508e162d07b48bde0483e340a02 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 28 Oct 2024 16:25:46 -0700 Subject: [PATCH 33/51] Remove adv_conv template parameters from the outputs --- .../azure/ai/evaluation/simulator/_adversarial_simulator.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py index d96cb4df5cd3..a78de5a4778d 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py @@ -276,6 +276,9 @@ def _to_chat_protocol( "target_population", "topic", "ch_template_placeholder", + "chatbot_name", + "name", + "group", ): template_parameters.pop(key, None) if conversation_category: From f2e95d1313fdba0370ba6fcc0e21a226115d2e93 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 29 Oct 2024 06:52:58 -0700 Subject: [PATCH 34/51] Update chanagelog --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 10 ++++++++++ .../azure/ai/evaluation/_version.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 2062a185f80f..d00c8a53f0a8 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -1,5 +1,15 @@ # Release History +## 1.0.0b6 (Unreleased) + +### Features Added + +### Breaking Changes + +### Bugs Fixed + +### Other Changes + ## 1.0.0b5 (2024-10-28) ### Features Added diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py index eecd2a8e450f..ffa055f43119 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py @@ -2,4 +2,4 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- -VERSION = "1.0.0b5" +VERSION = "1.0.0b6" From f9ac10cac827d0db714919f83e0acc14c9fed5ce Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 29 Oct 2024 12:16:45 -0700 Subject: [PATCH 35/51] Experimental tags on adv scenarios --- .../azure/ai/evaluation/simulator/_adversarial_scenario.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py index a8b4489b130d..f75459dcf1c2 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_scenario.py @@ -3,8 +3,10 @@ # --------------------------------------------------------- from enum import Enum +from azure.ai.evaluation._common._experimental import experimental +@experimental class AdversarialScenario(Enum): """Adversarial scenario types""" @@ -18,12 +20,14 @@ class AdversarialScenario(Enum): ADVERSARIAL_CONTENT_PROTECTED_MATERIAL = "adv_content_protected_material" +@experimental class AdversarialScenarioJailbreak(Enum): """Adversarial scenario types for XPIA Jailbreak""" ADVERSARIAL_INDIRECT_JAILBREAK = "adv_xpia" +@experimental class _UnstableAdversarialScenario(Enum): """Adversarial scenario types that we haven't published, but still want available for internal use Values listed here are subject to potential change, and/or migration to the main enum over time. From 6c81cbbf2ca3af409d625d36915541fa3f545ef5 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 30 Oct 2024 08:08:29 -0700 Subject: [PATCH 36/51] Readme fix onbreaking change --- sdk/evaluation/azure-ai-evaluation/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/README.md b/sdk/evaluation/azure-ai-evaluation/README.md index ca507339c6b8..05898820b8f0 100644 --- a/sdk/evaluation/azure-ai-evaluation/README.md +++ b/sdk/evaluation/azure-ai-evaluation/README.md @@ -403,7 +403,7 @@ outputs = asyncio.run( ) ) -print(outputs.to_eval_qa_json_lines()) +print(outputs.to_eval_qr_json_lines()) ``` #### Direct Attack Simulator From b48f8ab2dbd0fc2fdcce81692721f87109e71169 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 30 Oct 2024 11:37:35 -0700 Subject: [PATCH 37/51] Add the category and both user and assistant context to the response of qr_json_lines --- .../azure/ai/evaluation/simulator/_utils.py | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_utils.py index 8407b264fa2d..3416cf93e93e 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_utils.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_utils.py @@ -44,23 +44,41 @@ def to_eval_qr_json_lines(self): for item in self: user_message = None assistant_message = None - context = None + user_context = None + assistant_context = None + template_parameters = item.get("template_parameters", {}) + category = template_parameters.get("category", None) for message in item["messages"]: if message["role"] == "user": user_message = message["content"] + user_context = message.get("context", "") elif message["role"] == "assistant": assistant_message = message["content"] - if "context" in message: - context = message.get("context", None) + assistant_context = message.get("context", "") if user_message and assistant_message: - if context: + if user_context or assistant_context: json_lines += ( - json.dumps({"query": user_message, "response": assistant_message, "context": context}) + json.dumps( + { + "query": user_message, + "response": assistant_message, + "context": str( + { + "user_context": user_context, + "assistant_context": assistant_context, + } + ), + "category": category, + } + ) + "\n" ) - user_message = assistant_message = context = None + user_message = assistant_message = None else: - json_lines += json.dumps({"query": user_message, "response": assistant_message}) + "\n" + json_lines += ( + json.dumps({"query": user_message, "response": assistant_message, "category": category}) + + "\n" + ) user_message = assistant_message = None return json_lines From d422e05d22911efaf29d165712e9f02da90b1376 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 30 Oct 2024 11:42:03 -0700 Subject: [PATCH 38/51] Update changelog --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 8235e9440c85..3a500849c3cb 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -7,6 +7,7 @@ ### Breaking Changes ### Bugs Fixed +- Output of adversarial simulators are of type `JsonLineList` and the helper function `to_eval_qr_json_lines` now outputs context from both user and assistant turns along with `category` if it exists in the conversation ### Other Changes - Refined error messages for serviced-based evaluators and simulators. From fb12fdd81846b844cf14182e41d42519f1cd5726 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 7 Nov 2024 08:34:05 -0800 Subject: [PATCH 39/51] Rename _kwargs to _options --- .../ai/evaluation/simulator/_simulator.py | 76 +++++++++---------- .../tests/unittests/test_non_adv_simulator.py | 10 +-- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 67dde953a271..17ef893db8ad 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -96,8 +96,8 @@ async def __call__( query_response_generating_prompty: Optional[str] = None, user_simulator_prompty: Optional[str] = None, api_call_delay_sec: float = 1, - query_response_generating_prompty_kwargs: Dict[str, Any] = {}, - user_simulator_prompty_kwargs: Dict[str, Any] = {}, + query_response_generating_prompty_options: Dict[str, Any] = {}, + user_simulator_prompty_options: Dict[str, Any] = {}, conversation_turns: List[List[Union[str, Dict[str, Any]]]] = [], concurrent_async_tasks: int = 5, **kwargs, @@ -121,10 +121,10 @@ async def __call__( :paramtype user_simulator_prompty: Optional[str] :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float - :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty. - :paramtype query_response_generating_prompty_kwargs: Dict[str, Any] - :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_kwargs: Dict[str, Any] + :keyword query_response_generating_prompty_options: Additional keyword arguments for the query response generating prompty. + :paramtype query_response_generating_prompty_options: Dict[str, Any] + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any] :keyword conversation_turns: Predefined conversation turns to simulate. :paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]] :keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation. @@ -164,7 +164,7 @@ async def __call__( max_conversation_turns=max_conversation_turns, conversation_turns=conversation_turns, user_simulator_prompty=user_simulator_prompty, - user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, + user_simulator_prompty_options=user_simulator_prompty_options, api_call_delay_sec=api_call_delay_sec, prompty_model_config=prompty_model_config, concurrent_async_tasks=concurrent_async_tasks, @@ -174,7 +174,7 @@ async def __call__( text=text, num_queries=num_queries, query_response_generating_prompty=query_response_generating_prompty, - query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs, + query_response_generating_prompty_options=query_response_generating_prompty_options, prompty_model_config=prompty_model_config, **kwargs, ) @@ -183,7 +183,7 @@ async def __call__( max_conversation_turns=max_conversation_turns, tasks=tasks, user_simulator_prompty=user_simulator_prompty, - user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, + user_simulator_prompty_options=user_simulator_prompty_options, target=target, api_call_delay_sec=api_call_delay_sec, text=text, @@ -196,7 +196,7 @@ async def _simulate_with_predefined_turns( max_conversation_turns: int, conversation_turns: List[List[Union[str, Dict[str, Any]]]], user_simulator_prompty: Optional[str], - user_simulator_prompty_kwargs: Dict[str, Any], + user_simulator_prompty_options: Dict[str, Any], api_call_delay_sec: float, prompty_model_config: Any, concurrent_async_tasks: int, @@ -212,8 +212,8 @@ async def _simulate_with_predefined_turns( :paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]] :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str] - :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_kwargs: Dict[str, Any] + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any] :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float :keyword prompty_model_config: The configuration for the prompty model. @@ -264,7 +264,7 @@ async def run_simulation(simulation: List[Union[str, Dict[str, Any]]]) -> JsonLi current_simulation=current_simulation, max_conversation_turns=max_conversation_turns, user_simulator_prompty=user_simulator_prompty, - user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, + user_simulator_prompty_options=user_simulator_prompty_options, api_call_delay_sec=api_call_delay_sec, prompty_model_config=prompty_model_config, target=target, @@ -291,7 +291,7 @@ async def _extend_conversation_with_simulator( current_simulation: ConversationHistory, max_conversation_turns: int, user_simulator_prompty: Optional[str], - user_simulator_prompty_kwargs: Dict[str, Any], + user_simulator_prompty_options: Dict[str, Any], api_call_delay_sec: float, prompty_model_config: Dict[str, Any], target: Callable, @@ -307,8 +307,8 @@ async def _extend_conversation_with_simulator( :paramtype max_conversation_turns: int, :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str], - :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_kwargs: Dict[str, Any], + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any], :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float, :keyword prompty_model_config: The configuration for the prompty model. @@ -323,14 +323,14 @@ async def _extend_conversation_with_simulator( user_flow = self._load_user_simulation_flow( user_simulator_prompty=user_simulator_prompty, # type: ignore prompty_model_config=prompty_model_config, - user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, + user_simulator_prompty_options=user_simulator_prompty_options, ) while len(current_simulation) < max_conversation_turns: user_response_content = await user_flow( task="Continue the conversation", conversation_history=current_simulation.to_context_free_list(), - **user_simulator_prompty_kwargs, + **user_simulator_prompty_options, ) user_response = self._parse_prompty_response(response=user_response_content) user_turn = Turn(role=ConversationRole.USER, content=user_response["content"]) @@ -351,7 +351,7 @@ def _load_user_simulation_flow( *, user_simulator_prompty: Optional[Union[str, os.PathLike]], prompty_model_config: Dict[str, Any], - user_simulator_prompty_kwargs: Dict[str, Any], + user_simulator_prompty_options: Dict[str, Any], ) -> "AsyncPrompty": # type: ignore """ Loads the flow for simulating user interactions. @@ -360,8 +360,8 @@ def _load_user_simulation_flow( :paramtype user_simulator_prompty: Optional[Union[str, os.PathLike]] :keyword prompty_model_config: The configuration for the prompty model. :paramtype prompty_model_config: Dict[str, Any] - :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_kwargs: Dict[str, Any] + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any] :return: The loaded flow for simulating user interactions. :rtype: AsyncPrompty """ @@ -394,7 +394,7 @@ def _load_user_simulation_flow( return AsyncPrompty.load( source=user_simulator_prompty, model=prompty_model_config, - **user_simulator_prompty_kwargs, + **user_simulator_prompty_options, ) # type: ignore def _parse_prompty_response(self, *, response: str) -> Dict[str, Any]: @@ -442,7 +442,7 @@ async def _generate_query_responses( text: str, num_queries: int, query_response_generating_prompty: Optional[str], - query_response_generating_prompty_kwargs: Dict[str, Any], + query_response_generating_prompty_options: Dict[str, Any], prompty_model_config: Any, **kwargs, ) -> List[Dict[str, str]]: @@ -455,8 +455,8 @@ async def _generate_query_responses( :paramtype num_queries: int :keyword query_response_generating_prompty: Path to the query response generating prompty file. :paramtype query_response_generating_prompty: Optional[str] - :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty. - :paramtype query_response_generating_prompty_kwargs: Dict[str, Any] + :keyword query_response_generating_prompty_options: Additional keyword arguments for the query response generating prompty. + :paramtype query_response_generating_prompty_options: Dict[str, Any] :keyword prompty_model_config: The configuration for the prompty model. :paramtype prompty_model_config: Any :return: A list of query-response dictionaries. @@ -466,7 +466,7 @@ async def _generate_query_responses( query_flow = self._load_query_generation_flow( query_response_generating_prompty=query_response_generating_prompty, # type: ignore prompty_model_config=prompty_model_config, - query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs, + query_response_generating_prompty_options=query_response_generating_prompty_options, ) try: query_responses = await query_flow(text=text, num_queries=num_queries) @@ -490,7 +490,7 @@ def _load_query_generation_flow( *, query_response_generating_prompty: Optional[Union[str, os.PathLike]], prompty_model_config: Dict[str, Any], - query_response_generating_prompty_kwargs: Dict[str, Any], + query_response_generating_prompty_options: Dict[str, Any], ) -> "AsyncPrompty": """ Loads the flow for generating query responses. @@ -499,8 +499,8 @@ def _load_query_generation_flow( :paramtype query_response_generating_prompty: Optional[Union[str, os.PathLike]] :keyword prompty_model_config: The configuration for the prompty model. :paramtype prompty_model_config: Dict[str, Any] - :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the flow. - :paramtype query_response_generating_prompty_kwargs: Dict[str, Any] + :keyword query_response_generating_prompty_options: Additional keyword arguments for the flow. + :paramtype query_response_generating_prompty_options: Dict[str, Any] :return: The loaded flow for generating query responses. :rtype: AsyncPrompty """ @@ -533,7 +533,7 @@ def _load_query_generation_flow( return AsyncPrompty.load( source=query_response_generating_prompty, model=prompty_model_config, - **query_response_generating_prompty_kwargs, + **query_response_generating_prompty_options, ) # type: ignore async def _create_conversations_from_query_responses( @@ -543,7 +543,7 @@ async def _create_conversations_from_query_responses( max_conversation_turns: int, tasks: List[str], user_simulator_prompty: Optional[str], - user_simulator_prompty_kwargs: Dict[str, Any], + user_simulator_prompty_options: Dict[str, Any], target: Callable, api_call_delay_sec: float, text: str, @@ -559,8 +559,8 @@ async def _create_conversations_from_query_responses( :paramtype tasks: List[str] :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str] - :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_kwargs: Dict[str, Any] + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any] :keyword target: The target function to call for responses. :paramtype target: Callable :keyword api_call_delay_sec: Delay in seconds between API calls. @@ -590,7 +590,7 @@ async def _create_conversations_from_query_responses( max_conversation_turns=max_conversation_turns, task=task, # type: ignore user_simulator_prompty=user_simulator_prompty, - user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, + user_simulator_prompty_options=user_simulator_prompty_options, target=target, api_call_delay_sec=api_call_delay_sec, progress_bar=progress_bar, @@ -620,7 +620,7 @@ async def _complete_conversation( max_conversation_turns: int, task: str, user_simulator_prompty: Optional[str], - user_simulator_prompty_kwargs: Dict[str, Any], + user_simulator_prompty_options: Dict[str, Any], target: Callable, api_call_delay_sec: float, progress_bar: tqdm, @@ -636,8 +636,8 @@ async def _complete_conversation( :paramtype task: str :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str] - :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_kwargs: Dict[str, Any] + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any] :keyword target: The target function to call for responses. :paramtype target: Callable :keyword api_call_delay_sec: Delay in seconds between API calls. @@ -653,7 +653,7 @@ async def _complete_conversation( user_flow = self._load_user_simulation_flow( user_simulator_prompty=user_simulator_prompty, # type: ignore prompty_model_config=self.model_config, # type: ignore - user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, + user_simulator_prompty_options=user_simulator_prompty_options, ) if len(conversation_history) == 0: conversation_starter_from_simulated_user = await user_flow( diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py index 7a023f1bb0b2..6d3e26717a17 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py @@ -136,7 +136,7 @@ async def test_generate_query_responses(self, mock_async_prompty_load, valid_azu text="Test text", num_queries=1, query_response_generating_prompty=None, - query_response_generating_prompty_kwargs={}, + query_response_generating_prompty_options={}, prompty_model_config={}, ) assert query_responses == [{"q": "query1", "r": "response1"}] @@ -148,7 +148,7 @@ def test_load_user_simulation_flow(self, mock_async_prompty_load, valid_azure_mo user_flow = simulator._load_user_simulation_flow( user_simulator_prompty=None, prompty_model_config={}, - user_simulator_prompty_kwargs={}, + user_simulator_prompty_options={}, ) assert user_flow is not None @@ -169,7 +169,7 @@ async def test_complete_conversation( max_conversation_turns=4, task="Test task", user_simulator_prompty=None, - user_simulator_prompty_kwargs={}, + user_simulator_prompty_options={}, target=AsyncMock(), api_call_delay_sec=0, progress_bar=AsyncMock(), @@ -329,7 +329,7 @@ async def test_simulate_with_predefined_turns( api_call_delay_sec=1, prompty_model_config={}, user_simulator_prompty=None, - user_simulator_prompty_kwargs={}, + user_simulator_prompty_options={}, concurrent_async_tasks=1, ) @@ -354,7 +354,7 @@ async def test_create_conversations_from_query_responses( target=AsyncMock(), api_call_delay_sec=1, user_simulator_prompty=None, - user_simulator_prompty_kwargs={}, + user_simulator_prompty_options={}, text="some text", ) From d912c52adf88bb5001659bb2713e578bfc100500 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 7 Nov 2024 09:00:54 -0800 Subject: [PATCH 40/51] _options as prefix --- .../azure-ai-evaluation/CHANGELOG.md | 1 + .../ai/evaluation/simulator/_simulator.py | 76 +++++++++---------- .../tests/unittests/test_non_adv_simulator.py | 10 +-- 3 files changed, 44 insertions(+), 43 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index ae2c3fabb825..5dfec60dc6a9 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -6,6 +6,7 @@ ### Breaking Changes - The `parallel` parameter has been removed from composite evaluators: `QAEvaluator`, `ContentSafetyChatEvaluator`, and `ContentSafetyMultimodalEvaluator`. To control evaluator parallelism, you can now use the `_parallel` keyword argument, though please note that this private parameter may change in the future. +- Parameters `query_response_generating_prompty_kwargs` and `user_simulator_prompty_kwargs` have been renamed to `_options_query_response_generating_prompty` and `_options_user_simulator_prompty` in the Simulator's __call__ method. ### Bugs Fixed - Fixed an issue where the `output_path` parameter in the `evaluate` API did not support relative path. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 17ef893db8ad..2d998dc62dba 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -96,8 +96,8 @@ async def __call__( query_response_generating_prompty: Optional[str] = None, user_simulator_prompty: Optional[str] = None, api_call_delay_sec: float = 1, - query_response_generating_prompty_options: Dict[str, Any] = {}, - user_simulator_prompty_options: Dict[str, Any] = {}, + _options_query_response_generating_prompty: Dict[str, Any] = {}, + _options_user_simulator_prompty: Dict[str, Any] = {}, conversation_turns: List[List[Union[str, Dict[str, Any]]]] = [], concurrent_async_tasks: int = 5, **kwargs, @@ -121,10 +121,10 @@ async def __call__( :paramtype user_simulator_prompty: Optional[str] :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float - :keyword query_response_generating_prompty_options: Additional keyword arguments for the query response generating prompty. - :paramtype query_response_generating_prompty_options: Dict[str, Any] - :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_options: Dict[str, Any] + :keyword _options_query_response_generating_prompty: Additional keyword arguments for the query response generating prompty. + :paramtype _options_query_response_generating_prompty: Dict[str, Any] + :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. + :paramtype _options_user_simulator_prompty: Dict[str, Any] :keyword conversation_turns: Predefined conversation turns to simulate. :paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]] :keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation. @@ -164,7 +164,7 @@ async def __call__( max_conversation_turns=max_conversation_turns, conversation_turns=conversation_turns, user_simulator_prompty=user_simulator_prompty, - user_simulator_prompty_options=user_simulator_prompty_options, + _options_user_simulator_prompty=_options_user_simulator_prompty, api_call_delay_sec=api_call_delay_sec, prompty_model_config=prompty_model_config, concurrent_async_tasks=concurrent_async_tasks, @@ -174,7 +174,7 @@ async def __call__( text=text, num_queries=num_queries, query_response_generating_prompty=query_response_generating_prompty, - query_response_generating_prompty_options=query_response_generating_prompty_options, + _options_query_response_generating_prompty=_options_query_response_generating_prompty, prompty_model_config=prompty_model_config, **kwargs, ) @@ -183,7 +183,7 @@ async def __call__( max_conversation_turns=max_conversation_turns, tasks=tasks, user_simulator_prompty=user_simulator_prompty, - user_simulator_prompty_options=user_simulator_prompty_options, + _options_user_simulator_prompty=_options_user_simulator_prompty, target=target, api_call_delay_sec=api_call_delay_sec, text=text, @@ -196,7 +196,7 @@ async def _simulate_with_predefined_turns( max_conversation_turns: int, conversation_turns: List[List[Union[str, Dict[str, Any]]]], user_simulator_prompty: Optional[str], - user_simulator_prompty_options: Dict[str, Any], + _options_user_simulator_prompty: Dict[str, Any], api_call_delay_sec: float, prompty_model_config: Any, concurrent_async_tasks: int, @@ -212,8 +212,8 @@ async def _simulate_with_predefined_turns( :paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]] :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str] - :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_options: Dict[str, Any] + :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. + :paramtype _options_user_simulator_prompty: Dict[str, Any] :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float :keyword prompty_model_config: The configuration for the prompty model. @@ -264,7 +264,7 @@ async def run_simulation(simulation: List[Union[str, Dict[str, Any]]]) -> JsonLi current_simulation=current_simulation, max_conversation_turns=max_conversation_turns, user_simulator_prompty=user_simulator_prompty, - user_simulator_prompty_options=user_simulator_prompty_options, + _options_user_simulator_prompty=_options_user_simulator_prompty, api_call_delay_sec=api_call_delay_sec, prompty_model_config=prompty_model_config, target=target, @@ -291,7 +291,7 @@ async def _extend_conversation_with_simulator( current_simulation: ConversationHistory, max_conversation_turns: int, user_simulator_prompty: Optional[str], - user_simulator_prompty_options: Dict[str, Any], + _options_user_simulator_prompty: Dict[str, Any], api_call_delay_sec: float, prompty_model_config: Dict[str, Any], target: Callable, @@ -307,8 +307,8 @@ async def _extend_conversation_with_simulator( :paramtype max_conversation_turns: int, :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str], - :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_options: Dict[str, Any], + :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. + :paramtype _options_user_simulator_prompty: Dict[str, Any], :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float, :keyword prompty_model_config: The configuration for the prompty model. @@ -323,14 +323,14 @@ async def _extend_conversation_with_simulator( user_flow = self._load_user_simulation_flow( user_simulator_prompty=user_simulator_prompty, # type: ignore prompty_model_config=prompty_model_config, - user_simulator_prompty_options=user_simulator_prompty_options, + _options_user_simulator_prompty=_options_user_simulator_prompty, ) while len(current_simulation) < max_conversation_turns: user_response_content = await user_flow( task="Continue the conversation", conversation_history=current_simulation.to_context_free_list(), - **user_simulator_prompty_options, + **_options_user_simulator_prompty, ) user_response = self._parse_prompty_response(response=user_response_content) user_turn = Turn(role=ConversationRole.USER, content=user_response["content"]) @@ -351,7 +351,7 @@ def _load_user_simulation_flow( *, user_simulator_prompty: Optional[Union[str, os.PathLike]], prompty_model_config: Dict[str, Any], - user_simulator_prompty_options: Dict[str, Any], + _options_user_simulator_prompty: Dict[str, Any], ) -> "AsyncPrompty": # type: ignore """ Loads the flow for simulating user interactions. @@ -360,8 +360,8 @@ def _load_user_simulation_flow( :paramtype user_simulator_prompty: Optional[Union[str, os.PathLike]] :keyword prompty_model_config: The configuration for the prompty model. :paramtype prompty_model_config: Dict[str, Any] - :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_options: Dict[str, Any] + :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. + :paramtype _options_user_simulator_prompty: Dict[str, Any] :return: The loaded flow for simulating user interactions. :rtype: AsyncPrompty """ @@ -394,7 +394,7 @@ def _load_user_simulation_flow( return AsyncPrompty.load( source=user_simulator_prompty, model=prompty_model_config, - **user_simulator_prompty_options, + **_options_user_simulator_prompty, ) # type: ignore def _parse_prompty_response(self, *, response: str) -> Dict[str, Any]: @@ -442,7 +442,7 @@ async def _generate_query_responses( text: str, num_queries: int, query_response_generating_prompty: Optional[str], - query_response_generating_prompty_options: Dict[str, Any], + _options_query_response_generating_prompty: Dict[str, Any], prompty_model_config: Any, **kwargs, ) -> List[Dict[str, str]]: @@ -455,8 +455,8 @@ async def _generate_query_responses( :paramtype num_queries: int :keyword query_response_generating_prompty: Path to the query response generating prompty file. :paramtype query_response_generating_prompty: Optional[str] - :keyword query_response_generating_prompty_options: Additional keyword arguments for the query response generating prompty. - :paramtype query_response_generating_prompty_options: Dict[str, Any] + :keyword _options_query_response_generating_prompty: Additional keyword arguments for the query response generating prompty. + :paramtype _options_query_response_generating_prompty: Dict[str, Any] :keyword prompty_model_config: The configuration for the prompty model. :paramtype prompty_model_config: Any :return: A list of query-response dictionaries. @@ -466,7 +466,7 @@ async def _generate_query_responses( query_flow = self._load_query_generation_flow( query_response_generating_prompty=query_response_generating_prompty, # type: ignore prompty_model_config=prompty_model_config, - query_response_generating_prompty_options=query_response_generating_prompty_options, + _options_query_response_generating_prompty=_options_query_response_generating_prompty, ) try: query_responses = await query_flow(text=text, num_queries=num_queries) @@ -490,7 +490,7 @@ def _load_query_generation_flow( *, query_response_generating_prompty: Optional[Union[str, os.PathLike]], prompty_model_config: Dict[str, Any], - query_response_generating_prompty_options: Dict[str, Any], + _options_query_response_generating_prompty: Dict[str, Any], ) -> "AsyncPrompty": """ Loads the flow for generating query responses. @@ -499,8 +499,8 @@ def _load_query_generation_flow( :paramtype query_response_generating_prompty: Optional[Union[str, os.PathLike]] :keyword prompty_model_config: The configuration for the prompty model. :paramtype prompty_model_config: Dict[str, Any] - :keyword query_response_generating_prompty_options: Additional keyword arguments for the flow. - :paramtype query_response_generating_prompty_options: Dict[str, Any] + :keyword _options_query_response_generating_prompty: Additional keyword arguments for the flow. + :paramtype _options_query_response_generating_prompty: Dict[str, Any] :return: The loaded flow for generating query responses. :rtype: AsyncPrompty """ @@ -533,7 +533,7 @@ def _load_query_generation_flow( return AsyncPrompty.load( source=query_response_generating_prompty, model=prompty_model_config, - **query_response_generating_prompty_options, + **_options_query_response_generating_prompty, ) # type: ignore async def _create_conversations_from_query_responses( @@ -543,7 +543,7 @@ async def _create_conversations_from_query_responses( max_conversation_turns: int, tasks: List[str], user_simulator_prompty: Optional[str], - user_simulator_prompty_options: Dict[str, Any], + _options_user_simulator_prompty: Dict[str, Any], target: Callable, api_call_delay_sec: float, text: str, @@ -559,8 +559,8 @@ async def _create_conversations_from_query_responses( :paramtype tasks: List[str] :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str] - :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_options: Dict[str, Any] + :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. + :paramtype _options_user_simulator_prompty: Dict[str, Any] :keyword target: The target function to call for responses. :paramtype target: Callable :keyword api_call_delay_sec: Delay in seconds between API calls. @@ -590,7 +590,7 @@ async def _create_conversations_from_query_responses( max_conversation_turns=max_conversation_turns, task=task, # type: ignore user_simulator_prompty=user_simulator_prompty, - user_simulator_prompty_options=user_simulator_prompty_options, + _options_user_simulator_prompty=_options_user_simulator_prompty, target=target, api_call_delay_sec=api_call_delay_sec, progress_bar=progress_bar, @@ -620,7 +620,7 @@ async def _complete_conversation( max_conversation_turns: int, task: str, user_simulator_prompty: Optional[str], - user_simulator_prompty_options: Dict[str, Any], + _options_user_simulator_prompty: Dict[str, Any], target: Callable, api_call_delay_sec: float, progress_bar: tqdm, @@ -636,8 +636,8 @@ async def _complete_conversation( :paramtype task: str :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str] - :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. - :paramtype user_simulator_prompty_options: Dict[str, Any] + :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. + :paramtype _options_user_simulator_prompty: Dict[str, Any] :keyword target: The target function to call for responses. :paramtype target: Callable :keyword api_call_delay_sec: Delay in seconds between API calls. @@ -653,7 +653,7 @@ async def _complete_conversation( user_flow = self._load_user_simulation_flow( user_simulator_prompty=user_simulator_prompty, # type: ignore prompty_model_config=self.model_config, # type: ignore - user_simulator_prompty_options=user_simulator_prompty_options, + _options_user_simulator_prompty=_options_user_simulator_prompty, ) if len(conversation_history) == 0: conversation_starter_from_simulated_user = await user_flow( diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py index 6d3e26717a17..a91c727a17c7 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py @@ -136,7 +136,7 @@ async def test_generate_query_responses(self, mock_async_prompty_load, valid_azu text="Test text", num_queries=1, query_response_generating_prompty=None, - query_response_generating_prompty_options={}, + _options_query_response_generating_prompty={}, prompty_model_config={}, ) assert query_responses == [{"q": "query1", "r": "response1"}] @@ -148,7 +148,7 @@ def test_load_user_simulation_flow(self, mock_async_prompty_load, valid_azure_mo user_flow = simulator._load_user_simulation_flow( user_simulator_prompty=None, prompty_model_config={}, - user_simulator_prompty_options={}, + _options_user_simulator_prompty={}, ) assert user_flow is not None @@ -169,7 +169,7 @@ async def test_complete_conversation( max_conversation_turns=4, task="Test task", user_simulator_prompty=None, - user_simulator_prompty_options={}, + _options_user_simulator_prompty={}, target=AsyncMock(), api_call_delay_sec=0, progress_bar=AsyncMock(), @@ -329,7 +329,7 @@ async def test_simulate_with_predefined_turns( api_call_delay_sec=1, prompty_model_config={}, user_simulator_prompty=None, - user_simulator_prompty_options={}, + _options_user_simulator_prompty={}, concurrent_async_tasks=1, ) @@ -354,7 +354,7 @@ async def test_create_conversations_from_query_responses( target=AsyncMock(), api_call_delay_sec=1, user_simulator_prompty=None, - user_simulator_prompty_options={}, + _options_user_simulator_prompty={}, text="some text", ) From 059e767bc90909afc1bec4434d00d337ffc0e821 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 7 Nov 2024 09:48:35 -0800 Subject: [PATCH 41/51] update troubleshooting for simulator --- sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md b/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md index 53615797a528..6e8c489ca22e 100644 --- a/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md +++ b/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md @@ -42,6 +42,16 @@ This guide walks you through how to investigate failures, common errors in the ` Adversarial simulators use Azure AI Studio safety evaluation backend service to generate an adversarial dataset against your application. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaiadvsimulator-regionsupport). +### Need to generate simulations for specific harm type + +The Adversarial simulator does not support selecting individual harms, instead we recommend running the `AdversarialSimulator` for 4x the number of specific harms as the `max_simulation_results` + + +### Simulator is slow + +Identify the type of simulations being run (adversarial or non-adversarial). +Adjust parameters such as `api_call_retry_sleep_sec`, `api_call_delay_sec`, and `concurrent_async_task`. Please note that rate limits to llm calls can be both tokens per minute and requests per minute. + ## Logging You can set logging level via environment variable `PF_LOGGING_LEVEL`, valid values includes `CRITICAL`, `ERROR`, `WARNING`, `INFO`, `DEBUG`, default to `INFO`. From f91228f2b236383c625a1d29fb7126bc7a7834a2 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 7 Nov 2024 12:25:31 -0800 Subject: [PATCH 42/51] Rename according to suggestions --- .../azure-ai-evaluation/CHANGELOG.md | 2 +- .../ai/evaluation/simulator/_simulator.py | 76 +++++++++---------- .../tests/unittests/test_non_adv_simulator.py | 10 +-- 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 5dfec60dc6a9..8b87209d9428 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -6,7 +6,7 @@ ### Breaking Changes - The `parallel` parameter has been removed from composite evaluators: `QAEvaluator`, `ContentSafetyChatEvaluator`, and `ContentSafetyMultimodalEvaluator`. To control evaluator parallelism, you can now use the `_parallel` keyword argument, though please note that this private parameter may change in the future. -- Parameters `query_response_generating_prompty_kwargs` and `user_simulator_prompty_kwargs` have been renamed to `_options_query_response_generating_prompty` and `_options_user_simulator_prompty` in the Simulator's __call__ method. +- Parameters `query_response_generating_prompty_kwargs` and `user_simulator_prompty_kwargs` have been renamed to `query_response_generating_prompty_options` and `user_simulator_prompty_options` in the Simulator's __call__ method. ### Bugs Fixed - Fixed an issue where the `output_path` parameter in the `evaluate` API did not support relative path. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 2d998dc62dba..17ef893db8ad 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -96,8 +96,8 @@ async def __call__( query_response_generating_prompty: Optional[str] = None, user_simulator_prompty: Optional[str] = None, api_call_delay_sec: float = 1, - _options_query_response_generating_prompty: Dict[str, Any] = {}, - _options_user_simulator_prompty: Dict[str, Any] = {}, + query_response_generating_prompty_options: Dict[str, Any] = {}, + user_simulator_prompty_options: Dict[str, Any] = {}, conversation_turns: List[List[Union[str, Dict[str, Any]]]] = [], concurrent_async_tasks: int = 5, **kwargs, @@ -121,10 +121,10 @@ async def __call__( :paramtype user_simulator_prompty: Optional[str] :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float - :keyword _options_query_response_generating_prompty: Additional keyword arguments for the query response generating prompty. - :paramtype _options_query_response_generating_prompty: Dict[str, Any] - :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. - :paramtype _options_user_simulator_prompty: Dict[str, Any] + :keyword query_response_generating_prompty_options: Additional keyword arguments for the query response generating prompty. + :paramtype query_response_generating_prompty_options: Dict[str, Any] + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any] :keyword conversation_turns: Predefined conversation turns to simulate. :paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]] :keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation. @@ -164,7 +164,7 @@ async def __call__( max_conversation_turns=max_conversation_turns, conversation_turns=conversation_turns, user_simulator_prompty=user_simulator_prompty, - _options_user_simulator_prompty=_options_user_simulator_prompty, + user_simulator_prompty_options=user_simulator_prompty_options, api_call_delay_sec=api_call_delay_sec, prompty_model_config=prompty_model_config, concurrent_async_tasks=concurrent_async_tasks, @@ -174,7 +174,7 @@ async def __call__( text=text, num_queries=num_queries, query_response_generating_prompty=query_response_generating_prompty, - _options_query_response_generating_prompty=_options_query_response_generating_prompty, + query_response_generating_prompty_options=query_response_generating_prompty_options, prompty_model_config=prompty_model_config, **kwargs, ) @@ -183,7 +183,7 @@ async def __call__( max_conversation_turns=max_conversation_turns, tasks=tasks, user_simulator_prompty=user_simulator_prompty, - _options_user_simulator_prompty=_options_user_simulator_prompty, + user_simulator_prompty_options=user_simulator_prompty_options, target=target, api_call_delay_sec=api_call_delay_sec, text=text, @@ -196,7 +196,7 @@ async def _simulate_with_predefined_turns( max_conversation_turns: int, conversation_turns: List[List[Union[str, Dict[str, Any]]]], user_simulator_prompty: Optional[str], - _options_user_simulator_prompty: Dict[str, Any], + user_simulator_prompty_options: Dict[str, Any], api_call_delay_sec: float, prompty_model_config: Any, concurrent_async_tasks: int, @@ -212,8 +212,8 @@ async def _simulate_with_predefined_turns( :paramtype conversation_turns: List[List[Union[str, Dict[str, Any]]]] :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str] - :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. - :paramtype _options_user_simulator_prompty: Dict[str, Any] + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any] :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float :keyword prompty_model_config: The configuration for the prompty model. @@ -264,7 +264,7 @@ async def run_simulation(simulation: List[Union[str, Dict[str, Any]]]) -> JsonLi current_simulation=current_simulation, max_conversation_turns=max_conversation_turns, user_simulator_prompty=user_simulator_prompty, - _options_user_simulator_prompty=_options_user_simulator_prompty, + user_simulator_prompty_options=user_simulator_prompty_options, api_call_delay_sec=api_call_delay_sec, prompty_model_config=prompty_model_config, target=target, @@ -291,7 +291,7 @@ async def _extend_conversation_with_simulator( current_simulation: ConversationHistory, max_conversation_turns: int, user_simulator_prompty: Optional[str], - _options_user_simulator_prompty: Dict[str, Any], + user_simulator_prompty_options: Dict[str, Any], api_call_delay_sec: float, prompty_model_config: Dict[str, Any], target: Callable, @@ -307,8 +307,8 @@ async def _extend_conversation_with_simulator( :paramtype max_conversation_turns: int, :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str], - :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. - :paramtype _options_user_simulator_prompty: Dict[str, Any], + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any], :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float, :keyword prompty_model_config: The configuration for the prompty model. @@ -323,14 +323,14 @@ async def _extend_conversation_with_simulator( user_flow = self._load_user_simulation_flow( user_simulator_prompty=user_simulator_prompty, # type: ignore prompty_model_config=prompty_model_config, - _options_user_simulator_prompty=_options_user_simulator_prompty, + user_simulator_prompty_options=user_simulator_prompty_options, ) while len(current_simulation) < max_conversation_turns: user_response_content = await user_flow( task="Continue the conversation", conversation_history=current_simulation.to_context_free_list(), - **_options_user_simulator_prompty, + **user_simulator_prompty_options, ) user_response = self._parse_prompty_response(response=user_response_content) user_turn = Turn(role=ConversationRole.USER, content=user_response["content"]) @@ -351,7 +351,7 @@ def _load_user_simulation_flow( *, user_simulator_prompty: Optional[Union[str, os.PathLike]], prompty_model_config: Dict[str, Any], - _options_user_simulator_prompty: Dict[str, Any], + user_simulator_prompty_options: Dict[str, Any], ) -> "AsyncPrompty": # type: ignore """ Loads the flow for simulating user interactions. @@ -360,8 +360,8 @@ def _load_user_simulation_flow( :paramtype user_simulator_prompty: Optional[Union[str, os.PathLike]] :keyword prompty_model_config: The configuration for the prompty model. :paramtype prompty_model_config: Dict[str, Any] - :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. - :paramtype _options_user_simulator_prompty: Dict[str, Any] + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any] :return: The loaded flow for simulating user interactions. :rtype: AsyncPrompty """ @@ -394,7 +394,7 @@ def _load_user_simulation_flow( return AsyncPrompty.load( source=user_simulator_prompty, model=prompty_model_config, - **_options_user_simulator_prompty, + **user_simulator_prompty_options, ) # type: ignore def _parse_prompty_response(self, *, response: str) -> Dict[str, Any]: @@ -442,7 +442,7 @@ async def _generate_query_responses( text: str, num_queries: int, query_response_generating_prompty: Optional[str], - _options_query_response_generating_prompty: Dict[str, Any], + query_response_generating_prompty_options: Dict[str, Any], prompty_model_config: Any, **kwargs, ) -> List[Dict[str, str]]: @@ -455,8 +455,8 @@ async def _generate_query_responses( :paramtype num_queries: int :keyword query_response_generating_prompty: Path to the query response generating prompty file. :paramtype query_response_generating_prompty: Optional[str] - :keyword _options_query_response_generating_prompty: Additional keyword arguments for the query response generating prompty. - :paramtype _options_query_response_generating_prompty: Dict[str, Any] + :keyword query_response_generating_prompty_options: Additional keyword arguments for the query response generating prompty. + :paramtype query_response_generating_prompty_options: Dict[str, Any] :keyword prompty_model_config: The configuration for the prompty model. :paramtype prompty_model_config: Any :return: A list of query-response dictionaries. @@ -466,7 +466,7 @@ async def _generate_query_responses( query_flow = self._load_query_generation_flow( query_response_generating_prompty=query_response_generating_prompty, # type: ignore prompty_model_config=prompty_model_config, - _options_query_response_generating_prompty=_options_query_response_generating_prompty, + query_response_generating_prompty_options=query_response_generating_prompty_options, ) try: query_responses = await query_flow(text=text, num_queries=num_queries) @@ -490,7 +490,7 @@ def _load_query_generation_flow( *, query_response_generating_prompty: Optional[Union[str, os.PathLike]], prompty_model_config: Dict[str, Any], - _options_query_response_generating_prompty: Dict[str, Any], + query_response_generating_prompty_options: Dict[str, Any], ) -> "AsyncPrompty": """ Loads the flow for generating query responses. @@ -499,8 +499,8 @@ def _load_query_generation_flow( :paramtype query_response_generating_prompty: Optional[Union[str, os.PathLike]] :keyword prompty_model_config: The configuration for the prompty model. :paramtype prompty_model_config: Dict[str, Any] - :keyword _options_query_response_generating_prompty: Additional keyword arguments for the flow. - :paramtype _options_query_response_generating_prompty: Dict[str, Any] + :keyword query_response_generating_prompty_options: Additional keyword arguments for the flow. + :paramtype query_response_generating_prompty_options: Dict[str, Any] :return: The loaded flow for generating query responses. :rtype: AsyncPrompty """ @@ -533,7 +533,7 @@ def _load_query_generation_flow( return AsyncPrompty.load( source=query_response_generating_prompty, model=prompty_model_config, - **_options_query_response_generating_prompty, + **query_response_generating_prompty_options, ) # type: ignore async def _create_conversations_from_query_responses( @@ -543,7 +543,7 @@ async def _create_conversations_from_query_responses( max_conversation_turns: int, tasks: List[str], user_simulator_prompty: Optional[str], - _options_user_simulator_prompty: Dict[str, Any], + user_simulator_prompty_options: Dict[str, Any], target: Callable, api_call_delay_sec: float, text: str, @@ -559,8 +559,8 @@ async def _create_conversations_from_query_responses( :paramtype tasks: List[str] :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str] - :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. - :paramtype _options_user_simulator_prompty: Dict[str, Any] + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any] :keyword target: The target function to call for responses. :paramtype target: Callable :keyword api_call_delay_sec: Delay in seconds between API calls. @@ -590,7 +590,7 @@ async def _create_conversations_from_query_responses( max_conversation_turns=max_conversation_turns, task=task, # type: ignore user_simulator_prompty=user_simulator_prompty, - _options_user_simulator_prompty=_options_user_simulator_prompty, + user_simulator_prompty_options=user_simulator_prompty_options, target=target, api_call_delay_sec=api_call_delay_sec, progress_bar=progress_bar, @@ -620,7 +620,7 @@ async def _complete_conversation( max_conversation_turns: int, task: str, user_simulator_prompty: Optional[str], - _options_user_simulator_prompty: Dict[str, Any], + user_simulator_prompty_options: Dict[str, Any], target: Callable, api_call_delay_sec: float, progress_bar: tqdm, @@ -636,8 +636,8 @@ async def _complete_conversation( :paramtype task: str :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str] - :keyword _options_user_simulator_prompty: Additional keyword arguments for the user simulator prompty. - :paramtype _options_user_simulator_prompty: Dict[str, Any] + :keyword user_simulator_prompty_options: Additional keyword arguments for the user simulator prompty. + :paramtype user_simulator_prompty_options: Dict[str, Any] :keyword target: The target function to call for responses. :paramtype target: Callable :keyword api_call_delay_sec: Delay in seconds between API calls. @@ -653,7 +653,7 @@ async def _complete_conversation( user_flow = self._load_user_simulation_flow( user_simulator_prompty=user_simulator_prompty, # type: ignore prompty_model_config=self.model_config, # type: ignore - _options_user_simulator_prompty=_options_user_simulator_prompty, + user_simulator_prompty_options=user_simulator_prompty_options, ) if len(conversation_history) == 0: conversation_starter_from_simulated_user = await user_flow( diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py index a91c727a17c7..6d3e26717a17 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py @@ -136,7 +136,7 @@ async def test_generate_query_responses(self, mock_async_prompty_load, valid_azu text="Test text", num_queries=1, query_response_generating_prompty=None, - _options_query_response_generating_prompty={}, + query_response_generating_prompty_options={}, prompty_model_config={}, ) assert query_responses == [{"q": "query1", "r": "response1"}] @@ -148,7 +148,7 @@ def test_load_user_simulation_flow(self, mock_async_prompty_load, valid_azure_mo user_flow = simulator._load_user_simulation_flow( user_simulator_prompty=None, prompty_model_config={}, - _options_user_simulator_prompty={}, + user_simulator_prompty_options={}, ) assert user_flow is not None @@ -169,7 +169,7 @@ async def test_complete_conversation( max_conversation_turns=4, task="Test task", user_simulator_prompty=None, - _options_user_simulator_prompty={}, + user_simulator_prompty_options={}, target=AsyncMock(), api_call_delay_sec=0, progress_bar=AsyncMock(), @@ -329,7 +329,7 @@ async def test_simulate_with_predefined_turns( api_call_delay_sec=1, prompty_model_config={}, user_simulator_prompty=None, - _options_user_simulator_prompty={}, + user_simulator_prompty_options={}, concurrent_async_tasks=1, ) @@ -354,7 +354,7 @@ async def test_create_conversations_from_query_responses( target=AsyncMock(), api_call_delay_sec=1, user_simulator_prompty=None, - _options_user_simulator_prompty={}, + user_simulator_prompty_options={}, text="some text", ) From cde740c3627bc29ee0232e13635af7a1bd20e736 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 11 Nov 2024 11:27:05 -0800 Subject: [PATCH 43/51] Clean up readme --- sdk/evaluation/azure-ai-evaluation/README.md | 286 +++---------------- 1 file changed, 33 insertions(+), 253 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/README.md b/sdk/evaluation/azure-ai-evaluation/README.md index 2459777162da..a04168625eb8 100644 --- a/sdk/evaluation/azure-ai-evaluation/README.md +++ b/sdk/evaluation/azure-ai-evaluation/README.md @@ -180,175 +180,14 @@ For more details refer to [Evaluate on a target][evaluate_target] ### Simulator -Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes -their AI application. - -#### Simulating with a Prompty - -```yaml ---- -name: ApplicationPrompty -description: Simulates an application -model: - api: chat - parameters: - temperature: 0.0 - top_p: 1.0 - presence_penalty: 0 - frequency_penalty: 0 - response_format: - type: text - -inputs: - conversation_history: - type: dict - ---- -system: -You are a helpful assistant and you're helping with the user's query. Keep the conversation engaging and interesting. - -Output with a string that continues the conversation, responding to the latest message from the user, given the conversation history: -{{ conversation_history }} +Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes their AI application. The intergration between your AI application and the simulator happens at the callback method. Here's how a sample callback would look like: -``` - -Query Response generaing prompty for gpt-4o with `json_schema` support -Use this file as an override. -```yaml ---- -name: TaskSimulatorQueryResponseGPT4o -description: Gets queries and responses from a blob of text -model: - api: chat - parameters: - temperature: 0.0 - top_p: 1.0 - presence_penalty: 0 - frequency_penalty: 0 - response_format: - type: json_schema - json_schema: - name: QRJsonSchema - schema: - type: object - properties: - items: - type: array - items: - type: object - properties: - q: - type: string - r: - type: string - required: - - q - - r - -inputs: - text: - type: string - num_queries: - type: integer - - ---- -system: -You're an AI that helps in preparing a Question/Answer quiz from Text for "Who wants to be a millionaire" tv show -Both Questions and Answers MUST BE extracted from given Text -Frame Question in a way so that Answer is RELEVANT SHORT BITE-SIZED info from Text -RELEVANT info could be: NUMBER, DATE, STATISTIC, MONEY, NAME -A sentence should contribute multiple QnAs if it has more info in it -Answer must not be more than 5 words -Answer must be picked from Text as is -Question should be as descriptive as possible and must include as much context as possible from Text -Output must always have the provided number of QnAs -Output must be in JSON format. -Output must have {{num_queries}} objects in the format specified below. Any other count is unacceptable. -Text: -<|text_start|> -On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. In late 2003, Apple had 2.06 percent of the desktop share in the United States. -Some years later, research firms IDC and Gartner reported that Apple's market share in the U.S. had increased to about 6%. -<|text_end|> -Output with 5 QnAs: -{ - "qna": [{ - "q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?", - "r": "January 24, 1984" - }, - { - "q": "Who was the former Apple CEO that introduced the first Macintosh on January 24, 1984?", - "r": "Steve Jobs" - }, - { - "q": "What percent of the desktop share did Apple have in the United States in late 2003?", - "r": "2.06 percent" - }, - { - "q": "What were the research firms that reported on Apple's market share in the U.S.?", - "r": "IDC and Gartner" - }, - { - "q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?", - "r": "6%" - }] -} -Text: -<|text_start|> -{{ text }} -<|text_end|> -Output with {{ num_queries }} QnAs: -``` - -Application code: ```python -import json -import asyncio -from typing import Any, Dict, List, Optional -from azure.ai.evaluation.simulator import Simulator -from promptflow.client import load_flow -import os -import wikipedia - -# Set up the model configuration without api_key, using DefaultAzureCredential -model_config = { - "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"), - "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"), - # not providing key would make the SDK pick up `DefaultAzureCredential` - # use "api_key": "" - "api_version": "2024-08-01-preview" # keep this for gpt-4o -} - -# Use Wikipedia to get some text for the simulation -wiki_search_term = "Leonardo da Vinci" -wiki_title = wikipedia.search(wiki_search_term)[0] -wiki_page = wikipedia.page(wiki_title) -text = wiki_page.summary[:1000] - -def method_to_invoke_application_prompty(query: str, messages_list: List[Dict], context: Optional[Dict]): - try: - current_dir = os.path.dirname(__file__) - prompty_path = os.path.join(current_dir, "application.prompty") - _flow = load_flow( - source=prompty_path, - model=model_config, - credential=DefaultAzureCredential() - ) - response = _flow( - query=query, - context=context, - conversation_history=messages_list - ) - return response - except Exception as e: - print(f"Something went wrong invoking the prompty: {e}") - return "something went wrong" - async def callback( messages: Dict[str, List[Dict]], stream: bool = False, - session_state: Any = None, # noqa: ANN401 + session_state: Any = None, context: Optional[Dict[str, Any]] = None, ) -> dict: messages_list = messages["messages"] @@ -356,8 +195,8 @@ async def callback( latest_message = messages_list[-1] query = latest_message["content"] # Call your endpoint or AI application here - response = method_to_invoke_application_prompty(query, messages_list, context) - # Format the response to follow the OpenAI chat protocol format + # response should be a string + response = call_to_your_application(query, messages_list, context) formatted_response = { "content": response, "role": "assistant", @@ -365,33 +204,32 @@ async def callback( } messages["messages"].append(formatted_response) return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context} +``` -async def main(): - simulator = Simulator(model_config=model_config) - current_dir = os.path.dirname(__file__) - query_response_override_for_latest_gpt_4o = os.path.join(current_dir, "TaskSimulatorQueryResponseGPT4o.prompty") - outputs = await simulator( - target=callback, - text=text, - query_response_generating_prompty=query_response_override_for_latest_gpt_4o, # use this only with latest gpt-4o - num_queries=2, - max_conversation_turns=1, - user_persona=[ - f"I am a student and I want to learn more about {wiki_search_term}", - f"I am a teacher and I want to teach my students about {wiki_search_term}" +The simulator initialization and invocation looks like this: +```python +from azure.ai.evaluation.simulator import Simulator +model_config = { + "azure_endpoint": os.environ.get("AZURE_ENDPOINT"), + "azure_deployment": os.environ.get("AZURE_DEPLOYMENT_NAME"), + "api_version": os.environ.get("AZURE_API_VERSION"), +} +custom_simulator = Simulator(model_config=model_config) +outputs = asyncio.run(custom_simulator( + target=callback, + conversation_turns=[ + [ + "What should I know about the public gardens in the US?", ], - ) - print(json.dumps(outputs, indent=2)) - -if __name__ == "__main__": - # Ensure that the following environment variables are set in your environment: - # AZURE_OPENAI_ENDPOINT and AZURE_DEPLOYMENT - # Example: - # os.environ["AZURE_OPENAI_ENDPOINT"] = "https://your-endpoint.openai.azure.com/" - # os.environ["AZURE_DEPLOYMENT"] = "your-deployment-name" - asyncio.run(main()) - print("done!") - + [ + "How do I simulate data against LLMs", + ], + ], + max_conversation_turns=2, +)) +with open("simulator_output.jsonl", "w") as f: + for output in outputs: + f.write(output.to_eval_qr_json_lines()) ``` #### Adversarial Simulator @@ -399,73 +237,11 @@ if __name__ == "__main__": ```python from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario from azure.identity import DefaultAzureCredential -from typing import Any, Dict, List, Optional -import asyncio - - azure_ai_project = { "subscription_id": , "resource_group_name": , "project_name": } - -async def callback( - messages: List[Dict], - stream: bool = False, - session_state: Any = None, - context: Dict[str, Any] = None -) -> dict: - messages_list = messages["messages"] - # get last message - latest_message = messages_list[-1] - query = latest_message["content"] - context = None - if 'file_content' in messages["template_parameters"]: - query += messages["template_parameters"]['file_content'] - # the next few lines explains how to use the AsyncAzureOpenAI's chat.completions - # to respond to the simulator. You should replace it with a call to your model/endpoint/application - # make sure you pass the `query` and format the response as we have shown below - from openai import AsyncAzureOpenAI - oai_client = AsyncAzureOpenAI( - api_key=, - azure_endpoint=, - api_version="2023-12-01-preview", - ) - try: - response_from_oai_chat_completions = await oai_client.chat.completions.create(messages=[{"content": query, "role": "user"}], model="gpt-4", max_tokens=300) - except Exception as e: - print(f"Error: {e}") - # to continue the conversation, return the messages, else you can fail the adversarial with an exception - message = { - "content": "Something went wrong. Check the exception e for more details.", - "role": "assistant", - "context": None, - } - messages["messages"].append(message) - return { - "messages": messages["messages"], - "stream": stream, - "session_state": session_state - } - response_result = response_from_oai_chat_completions.choices[0].message.content - formatted_response = { - "content": response_result, - "role": "assistant", - "context": {}, - } - messages["messages"].append(formatted_response) - return { - "messages": messages["messages"], - "stream": stream, - "session_state": session_state, - "context": context - } - -``` - -#### Adversarial QA - -```python scenario = AdversarialScenario.ADVERSARIAL_QA simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential()) @@ -504,6 +280,8 @@ In following section you will find examples of: - [Evaluate an application][evaluate_app] - [Evaluate different models][evaluate_models] - [Custom Evaluators][custom_evaluators] +- [Adversarial Simulation][adversarial_simulation] +- [Simulate with conversation starter][simulate_with_conversation_starter] More examples can be found [here][evaluate_samples]. @@ -571,4 +349,6 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con [evaluation_metrics]: https://learn.microsoft.com/azure/ai-studio/concepts/evaluation-metrics-built-in [performance_and_quality_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#performance-and-quality-evaluators [risk_and_safety_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#risk-and-safety-evaluators -[composite_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#composite-evaluators \ No newline at end of file +[composite_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#composite-evaluators +[adversarial_simulation]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/simulate_adversarial +[simulate_with_conversation_starter]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/simulate_conversation_starter \ No newline at end of file From a90c788b9dc9aa2072de999a6cc03cb4254ea694 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 11 Nov 2024 12:23:25 -0800 Subject: [PATCH 44/51] more links --- sdk/evaluation/azure-ai-evaluation/README.md | 25 +++++++------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/README.md b/sdk/evaluation/azure-ai-evaluation/README.md index a04168625eb8..4b8cb52f9d7d 100644 --- a/sdk/evaluation/azure-ai-evaluation/README.md +++ b/sdk/evaluation/azure-ai-evaluation/README.md @@ -256,23 +256,11 @@ outputs = asyncio.run( print(outputs.to_eval_qr_json_lines()) ``` -#### Direct Attack Simulator -```python -scenario = AdversarialScenario.ADVERSARIAL_QA -simulator = DirectAttackSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential()) - -outputs = asyncio.run( - simulator( - scenario=scenario, - max_conversation_turns=1, - max_simulation_results=2, - target=callback - ) -) - -print(outputs) -``` +For more details about the simulator, visit the following links: +- [Adversarial Simulation docs][adversarial_simulation_docs] +- [Adversarial scenarios][adversarial_simulation_scenarios] +- [Simulating jailbreak attacks][adversarial_jailbreak] ## Examples @@ -350,5 +338,8 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con [performance_and_quality_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#performance-and-quality-evaluators [risk_and_safety_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#risk-and-safety-evaluators [composite_evaluators]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk#composite-evaluators +[adversarial_simulation_docs]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data#generate-adversarial-simulations-for-safety-evaluation +[adversarial_simulation_scenarios]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data#supported-adversarial-simulation-scenarios [adversarial_simulation]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/simulate_adversarial -[simulate_with_conversation_starter]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/simulate_conversation_starter \ No newline at end of file +[simulate_with_conversation_starter]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios/evaluate/simulate_conversation_starter +[adversarial_jailbreak]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data#simulating-jailbreak-attacks \ No newline at end of file From 3ad53d523c51b85f5cf301380620fef400663ac9 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 2 Dec 2024 10:31:18 -0800 Subject: [PATCH 45/51] Bugfix: zip_longest created null parameters --- .../azure/ai/evaluation/simulator/_adversarial_simulator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py index a64ccc5e0575..cd8e33fdbbd8 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py @@ -221,6 +221,11 @@ async def __call__( random.shuffle(templates) parameter_lists = [t.template_parameters for t in templates] zipped_parameters = list(zip_longest(*parameter_lists)) + filtered_parameters = [] + for params in zipped_parameters: + if None not in params: + filtered_parameters.append(params) + zipped_parameters = filtered_parameters for param_group in zipped_parameters: for template, parameter in zip(templates, param_group): if _jailbreak_type == "upia": From e9f3241eecfcdc49016d1ff71f4f4458ad3a3163 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 2 Dec 2024 10:37:51 -0800 Subject: [PATCH 46/51] Updated changelog --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 591a5cd2954e..cc2d701563f7 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -5,6 +5,7 @@ ### Bugs Fixed - Fixed `[remote]` extra to be needed only when tracking results in Azure AI Studio. - Removing `azure-ai-inference` as dependency. +- Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results ## 1.0.0 (2024-11-13) From 79c2f0d246378626d96901704e534b25ca2891e4 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 2 Dec 2024 15:30:28 -0800 Subject: [PATCH 47/51] zip does the job --- .../ai/evaluation/simulator/_adversarial_simulator.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py index cd8e33fdbbd8..a5ee35be0caf 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py @@ -220,12 +220,7 @@ async def __call__( random.seed(randomization_seed) random.shuffle(templates) parameter_lists = [t.template_parameters for t in templates] - zipped_parameters = list(zip_longest(*parameter_lists)) - filtered_parameters = [] - for params in zipped_parameters: - if None not in params: - filtered_parameters.append(params) - zipped_parameters = filtered_parameters + zipped_parameters = list(zip(*parameter_lists)) for param_group in zipped_parameters: for template, parameter in zip(templates, param_group): if _jailbreak_type == "upia": From a0bc930d8b0bb8fa67c436db87101681492a9126 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 3 Dec 2024 07:45:40 -0800 Subject: [PATCH 48/51] remove ununsed import --- .../azure/ai/evaluation/simulator/_adversarial_simulator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py index a5ee35be0caf..617d4406b689 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py @@ -7,7 +7,6 @@ import logging import random from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast -from itertools import zip_longest from tqdm import tqdm From 74d85539b52aee7623a851623e25855f7ff07798 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 18 Dec 2024 13:37:31 -0800 Subject: [PATCH 49/51] Fix changelog merge --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 3614c102ef56..758e9c980987 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -7,6 +7,9 @@ ### Breaking Changes ### Bugs Fixed +- Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Studio. +- Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results +- Fixed the non adversarial simulator to run in task-free mode ### Other Changes From ede99b8a3b2ff0b81e5b1d3dcfec3880526d5d04 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 9 Jan 2025 13:16:54 -0800 Subject: [PATCH 50/51] Remove print statements --- .../azure/ai/evaluation/simulator/_conversation/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/__init__.py index 829476b1b2d8..a39ac1bfbb25 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/__init__.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/__init__.py @@ -128,19 +128,15 @@ def __init__( self.conversation_starter: Optional[Union[str, jinja2.Template, Dict]] = None if role == ConversationRole.USER: if "conversation_starter" in self.persona_template_args: - print(self.persona_template_args) conversation_starter_content = self.persona_template_args["conversation_starter"] if isinstance(conversation_starter_content, dict): self.conversation_starter = conversation_starter_content - print(f"Conversation starter content: {conversation_starter_content}") else: try: self.conversation_starter = jinja2.Template( conversation_starter_content, undefined=jinja2.StrictUndefined ) - print("Successfully created a Jinja2 template for the conversation starter.") except jinja2.exceptions.TemplateSyntaxError as e: # noqa: F841 - print(f"Template syntax error: {e}. Using raw content.") self.conversation_starter = conversation_starter_content else: self.logger.info( From a557ecaac1a18769c7a6305c0d183ca60c1a4e9a Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 7 Apr 2025 08:48:10 -0700 Subject: [PATCH 51/51] some todos --- .../azure/ai/evaluation/red_team/_red_team.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_red_team.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_red_team.py index 9f9b01714b2a..f23ddf8b1c3b 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_red_team.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_red_team.py @@ -674,6 +674,14 @@ async def _prompt_sending_orchestrator( batches = [all_prompts[i:i + batch_size] for i in range(0, len(all_prompts), batch_size)] for batch_idx, batch in enumerate(batches): + """ + # TODO: Store the results from orchestrator for each batch + # fetch the results even on errors/exceptions + # use the batch's memory label to extract data batch_memory_label(type = dict) + # ensure each batch has a unique memory label + # ensure all the batches have something common in the memory label (like the file name where its stored in _write_pyrit_outputs_to_file method) + + """ self.logger.debug(f"Processing batch {batch_idx+1}/{len(batches)} with {len(batch)} prompts for {strategy_name}/{risk_category}") batch_start_time = datetime.now() @@ -681,7 +689,11 @@ async def _prompt_sending_orchestrator( try: # Use wait_for to implement a timeout await asyncio.wait_for( - orchestrator.send_prompts_async(prompt_list=batch), + orchestrator.send_prompts_async( + prompt_list=batch, + # memory_labels # TODO: identify the right memory label and create them for each batch before sending, ensure this has uuid. + # + ), timeout=timeout # Use provided timeout ) batch_duration = (datetime.now() - batch_start_time).total_seconds() @@ -700,12 +712,14 @@ async def _prompt_sending_orchestrator( self.task_statuses[batch_task_key] = TASK_STATUS["TIMEOUT"] self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"] # Continue with partial results rather than failing completely + # TODO: ensure the output is written to the file continue except Exception as e: log_error(self.logger, f"Error processing batch {batch_idx+1}", e, f"{strategy_name}/{risk_category}") self.logger.debug(f"ERROR: Strategy {strategy_name}, Risk {risk_category}, Batch {batch_idx+1}: {str(e)}") self.red_team_info[strategy_name][risk_category]["status"] = TASK_STATUS["INCOMPLETE"] # Continue with other batches even if one fails + # TODO: ensure the output is written to the file continue else: # Small number of prompts, process all at once with a timeout @@ -747,6 +761,12 @@ def _write_pyrit_outputs_to_file(self, orchestrator: Orchestrator) -> str: :return: Path to the output file :rtype: Union[str, os.PathLike] """ + """ + #TODO: This path needs to be generated earlier, when the batches are identified Lets call them + # use the batch's memory label to extract data by orchestrator.get_memory().get_prompt_request_pieces(labels=batch_memory_label) + if len(conversations) > number of lines found in jsonfile/base_path (as we know the name generated before batches are created.) + replace the file contents with the new conversations + """ base_path = str(uuid.uuid4()) # If scan output directory exists, place the file there