@@ -35,6 +35,47 @@ def _safe_getattr(obj: dict[str, Any] | Any, key: str, default: Any = None) -> A
3535 return getattr (obj , key , default )
3636
3737
38+ def _extract_text_from_content (content : Any ) -> str :
39+ """Extract plain text from message content, handling multi-part structures.
40+
41+ OpenAI ChatAPI supports content as either:
42+ - String: "hello world"
43+ - List of parts: [{"type": "text", "text": "hello"}, {"type": "image_url", ...}]
44+
45+ Args:
46+ content: Message content (string, list of parts, or other)
47+
48+ Returns:
49+ Extracted text as a plain string
50+ """
51+ # Content is already a string
52+ if isinstance (content , str ):
53+ return content
54+
55+ # Content is a list of parts (multi-modal message)
56+ if isinstance (content , list ):
57+ if not content :
58+ return ""
59+
60+ text_parts = []
61+ for part in content :
62+ if isinstance (part , dict ):
63+ # Extract text from various field names
64+ text = None
65+ for field in ["text" , "input_text" , "output_text" ]:
66+ if field in part :
67+ text = part [field ]
68+ break
69+
70+ if text is not None and isinstance (text , str ):
71+ text_parts .append (text )
72+
73+ return " " .join (text_parts ) if text_parts else ""
74+
75+ # Fallback: stringify other types
76+ return str (content ) if content is not None else ""
77+
78+
3879def _normalize_conversation_payload (payload : Any ) -> list [Any ] | None :
3980 """Normalize decoded sample payload into a conversation list if possible."""
4081 if isinstance (payload , list ):
@@ -68,13 +109,36 @@ def _parse_conversation_payload(data: str) -> list[Any]:
68109 return [{"role" : "user" , "content" : data }]
69110
70111
71- def _annotate_prompt_injection_result (
112+ def _extract_latest_user_content (conversation_history : list [Any ]) -> str :
113+ """Extract plain text from the most recent user message.
114+
115+ Handles multi-part content structures (e.g., ChatAPI content parts) and
116+ normalizes to plain text for guardrails expecting text/plain.
117+
118+ Args:
119+ conversation_history: List of message dictionaries
120+
121+ Returns:
122+ Plain text string from latest user message, or empty string if none found
123+ """
124+ for message in reversed (conversation_history ):
125+ if _safe_getattr (message , "role" ) == "user" :
126+ content = _safe_getattr (message , "content" , "" )
127+ return _extract_text_from_content (content )
128+ return ""
129+
130+
131+ def _annotate_incremental_result (
72132 result : Any ,
73133 turn_index : int ,
74134 message : dict [str , Any ] | Any ,
75135) -> None :
76136 """Annotate guardrail result with incremental evaluation metadata.
77137
138+ Adds turn-by-turn context to results from conversation-aware guardrails
139+ being evaluated incrementally. This includes the turn index, role, and
140+ message that triggered the guardrail (if applicable).
141+
78142 Args:
79143 result: GuardrailResult to annotate
80144 turn_index: Index of the conversation turn (0-based)
@@ -126,11 +190,10 @@ async def _run_incremental_guardrails(
126190
127191 latest_results = stage_results or latest_results
128192
193+ # Annotate all results with turn metadata for multi-turn evaluation
129194 triggered = False
130195 for result in stage_results :
131- guardrail_name = result .info .get ("guardrail_name" )
132- if guardrail_name == "Prompt Injection Detection" :
133- _annotate_prompt_injection_result (result , turn_index , current_history [- 1 ])
196+ _annotate_incremental_result (result , turn_index , current_history [- 1 ])
134197 if result .tripwire_triggered :
135198 triggered = True
136199
@@ -258,10 +321,10 @@ async def _evaluate_sample(self, context: Context, sample: Sample) -> SampleResu
258321 """
259322 try :
260323 # Detect if this sample requires conversation history by checking guardrail metadata
324+ # Check ALL guardrails, not just those in expected_triggers
261325 needs_conversation_history = any (
262326 guardrail .definition .metadata and guardrail .definition .metadata .uses_conversation_history
263327 for guardrail in self .guardrails
264- if guardrail .definition .name in sample .expected_triggers
265328 )
266329
267330 if needs_conversation_history :
@@ -270,42 +333,73 @@ async def _evaluate_sample(self, context: Context, sample: Sample) -> SampleResu
270333 # Handles JSON conversations, plain strings (wraps as user message), etc.
271334 conversation_history = _parse_conversation_payload (sample .data )
272335
273- # Create a minimal guardrails config for conversation-aware checks
274- minimal_config = {
275- "version" : 1 ,
276- "output" : {
277- "guardrails" : [
278- {
279- "name" : guardrail .definition .name ,
280- "config" : (guardrail .config .__dict__ if hasattr (guardrail .config , "__dict__" ) else guardrail .config ),
281- }
282- for guardrail in self .guardrails
283- if guardrail .definition .metadata and guardrail .definition .metadata .uses_conversation_history
284- ],
285- },
286- }
287-
288- # Create a temporary GuardrailsAsyncOpenAI client for conversation-aware guardrails
289- temp_client = GuardrailsAsyncOpenAI (
290- config = minimal_config ,
291- api_key = getattr (context .guardrail_llm , "api_key" , None ) or "fake-key-for-eval" ,
292- )
293-
294- # Normalize conversation history using the client's normalization
295- normalized_conversation = temp_client ._normalize_conversation (conversation_history )
296-
297- if self .multi_turn :
298- results = await _run_incremental_guardrails (
299- temp_client ,
300- normalized_conversation ,
336+ # Separate conversation-aware and non-conversation-aware guardrails
337+ # Evaluate ALL guardrails, not just those in expected_triggers
338+ # (expected_triggers is used for metrics calculation, not for filtering)
339+ conversation_aware_guardrails = [
340+ g for g in self .guardrails
341+ if g .definition .metadata
342+ and g .definition .metadata .uses_conversation_history
343+ ]
344+ non_conversation_aware_guardrails = [
345+ g for g in self .guardrails
346+ if not (g .definition .metadata and g .definition .metadata .uses_conversation_history )
347+ ]
348+
349+ # Evaluate conversation-aware guardrails with conversation history
350+ conversation_results = []
351+ if conversation_aware_guardrails :
352+ # Create a minimal guardrails config for conversation-aware checks
353+ minimal_config = {
354+ "version" : 1 ,
355+ "output" : {
356+ "guardrails" : [
357+ {
358+ "name" : guardrail .definition .name ,
359+ "config" : (guardrail .config .__dict__ if hasattr (guardrail .config , "__dict__" ) else guardrail .config ),
360+ }
361+ for guardrail in conversation_aware_guardrails
362+ ],
363+ },
364+ }
365+
366+ # Create a temporary GuardrailsAsyncOpenAI client for conversation-aware guardrails
367+ temp_client = GuardrailsAsyncOpenAI (
368+ config = minimal_config ,
369+ api_key = getattr (context .guardrail_llm , "api_key" , None ) or "fake-key-for-eval" ,
301370 )
302- else :
303- results = await temp_client ._run_stage_guardrails (
304- stage_name = "output" ,
305- text = "" ,
306- conversation_history = normalized_conversation ,
371+
372+ # Normalize conversation history using the client's normalization
373+ normalized_conversation = temp_client ._normalize_conversation (conversation_history )
374+
375+ if self .multi_turn :
376+ conversation_results = await _run_incremental_guardrails (
377+ temp_client ,
378+ normalized_conversation ,
379+ )
380+ else :
381+ conversation_results = await temp_client ._run_stage_guardrails (
382+ stage_name = "output" ,
383+ text = "" ,
384+ conversation_history = normalized_conversation ,
385+ suppress_tripwire = True ,
386+ )
387+
388+ # Evaluate non-conversation-aware guardrails (if any) on extracted text
389+ non_conversation_results = []
390+ if non_conversation_aware_guardrails :
391+ # Non-conversation-aware guardrails expect plain text, not JSON
392+ latest_user_content = _extract_latest_user_content (conversation_history )
393+ non_conversation_results = await run_guardrails (
394+ ctx = context ,
395+ data = latest_user_content ,
396+ media_type = "text/plain" ,
397+ guardrails = non_conversation_aware_guardrails ,
307398 suppress_tripwire = True ,
308399 )
400+
401+ # Combine results from both types of guardrails
402+ results = conversation_results + non_conversation_results
309403 except (json .JSONDecodeError , TypeError , ValueError ) as e :
310404 logger .error (
311405 "Failed to parse conversation history for conversation-aware guardrail sample %s: %s" ,
0 commit comments