Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/my-website/docs/proxy/logging_spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ Inherits from `StandardLoggingUserAPIKeyMetadata` and adds:
| `applied_guardrails` | `Optional[List[str]]` | List of applied guardrail names |
| `usage_object` | `Optional[dict]` | Raw usage object from the LLM provider |
| `cold_storage_object_key` | `Optional[str]` | S3/GCS object key for cold storage retrieval |
| `guardrail_information` | `Optional[StandardLoggingGuardrailInformation]` | Guardrail information |
| `guardrail_information` | `Optional[list[StandardLoggingGuardrailInformation]]` | Guardrail information |


## StandardLoggingVectorStoreRequest
Expand Down Expand Up @@ -170,7 +170,7 @@ A literal type with two possible values:
| `guardrail_mode` | `Optional[Union[GuardrailEventHooks, List[GuardrailEventHooks]]]` | Guardrail mode |
| `guardrail_request` | `Optional[dict]` | Guardrail request |
| `guardrail_response` | `Optional[Union[dict, str, List[dict]]]` | Guardrail response |
| `guardrail_status` | `Literal["success", "failure", "blocked"]` | Guardrail execution status: `success` = no violations detected, `blocked` = content blocked/modified due to policy violations, `failure` = technical error or API failure |
| `guardrail_status` | `Literal["success", "guardrail_intervened", "guardrail_failed_to_respond"]` | Guardrail execution status: `success` = no violations detected, `blocked` = content blocked/modified due to policy violations, `failure` = technical error or API failure |
| `start_time` | `Optional[float]` | Start time of the guardrail |
| `end_time` | `Optional[float]` | End time of the guardrail |
| `duration` | `Optional[float]` | Duration of the guardrail in seconds |
Expand Down
55 changes: 34 additions & 21 deletions litellm/integrations/custom_guardrail.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def __init__(
self.mask_response_content: bool = mask_response_content

if supported_event_hooks:

## validate event_hook is in supported_event_hooks
self._validate_event_hook(event_hook, supported_event_hooks)
super().__init__(**kwargs)
Expand All @@ -80,7 +79,6 @@ def _validate_event_hook(
],
supported_event_hooks: List[GuardrailEventHooks],
) -> None:

def _validate_event_hook_list_is_in_supported_event_hooks(
event_hook: Union[List[GuardrailEventHooks], List[str]],
supported_event_hooks: List[GuardrailEventHooks],
Expand Down Expand Up @@ -130,23 +128,19 @@ def _guardrail_is_in_requested_guardrails(
self,
requested_guardrails: Union[List[str], List[Dict[str, DynamicGuardrailParams]]],
) -> bool:

for _guardrail in requested_guardrails:
if isinstance(_guardrail, dict):
if self.guardrail_name in _guardrail:

return True
elif isinstance(_guardrail, str):
if self.guardrail_name == _guardrail:

return True

return False

async def async_pre_call_deployment_hook(
self, kwargs: Dict[str, Any], call_type: Optional[CallTypes]
) -> Optional[dict]:

from litellm.proxy._types import UserAPIKeyAuth

# should run guardrail
Expand Down Expand Up @@ -385,14 +379,24 @@ def add_standard_logging_guardrail_information_to_request_data(
duration=duration,
masked_entity_count=masked_entity_count,
)

def _append_guardrail_info(container: dict) -> None:
key = "standard_logging_guardrail_information"
existing = container.get(key)
if existing is None:
container[key] = [slg]
elif isinstance(existing, list):
existing.append(slg)
else:
# should not happen
container[key] = [existing, slg]

if "metadata" in request_data:
if request_data["metadata"] is None:
request_data["metadata"] = {}
request_data["metadata"]["standard_logging_guardrail_information"] = slg
_append_guardrail_info(request_data["metadata"])
elif "litellm_metadata" in request_data:
request_data["litellm_metadata"][
"standard_logging_guardrail_information"
] = slg
_append_guardrail_info(request_data["litellm_metadata"])
else:
verbose_logger.warning(
"unable to log guardrail information. No metadata found in request_data"
Expand Down Expand Up @@ -497,37 +501,46 @@ def update_in_memory_litellm_params(self, litellm_params: LitellmParams) -> None
"""
for key, value in vars(litellm_params).items():
setattr(self, key, value)

def get_guardrails_messages_for_call_type(self, call_type: CallTypes, data: Optional[dict] = None) -> Optional[List[AllMessageValues]]:

def get_guardrails_messages_for_call_type(
self, call_type: CallTypes, data: Optional[dict] = None
) -> Optional[List[AllMessageValues]]:
"""
Returns the messages for the given call type and data
"""
if call_type is None or data is None:
return None

#########################################################
# /chat/completions
# /messages
# /chat/completions
# /messages
# Both endpoints store the messages in the "messages" key
#########################################################
if call_type == CallTypes.completion.value or call_type == CallTypes.acompletion.value or call_type == CallTypes.anthropic_messages.value:
if (
call_type == CallTypes.completion.value
or call_type == CallTypes.acompletion.value
or call_type == CallTypes.anthropic_messages.value
):
return data.get("messages")

#########################################################
# /responses
# /responses
# User/System messages are stored in the "input" key, use litellm transformation to get the messages
#########################################################
if call_type == CallTypes.responses.value or call_type == CallTypes.aresponses.value:
if (
call_type == CallTypes.responses.value
or call_type == CallTypes.aresponses.value
):
from typing import cast

from litellm.responses.litellm_completion_transformation.transformation import (
LiteLLMCompletionResponsesConfig,
)

input_data = data.get("input")
if input_data is None:
return None

messages = LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
input=input_data,
responses_api_request=data,
Expand Down
25 changes: 15 additions & 10 deletions litellm/integrations/datadog/datadog_llm_obs.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,9 @@ def _get_dd_llm_obs_payload_metadata(
"guardrail_information": standard_logging_payload.get(
"guardrail_information", None
),
"is_streamed_request": self._get_stream_value_from_payload(standard_logging_payload),
"is_streamed_request": self._get_stream_value_from_payload(
standard_logging_payload
),
}

#########################################################
Expand Down Expand Up @@ -548,21 +550,24 @@ def _get_latency_metrics(

# Guardrail overhead latency
guardrail_info: Optional[
StandardLoggingGuardrailInformation
list[StandardLoggingGuardrailInformation]
] = standard_logging_payload.get("guardrail_information")
if guardrail_info is not None:
_guardrail_duration_seconds: Optional[float] = guardrail_info.get(
"duration"
)
if _guardrail_duration_seconds is not None:
total_duration = 0.0
for info in guardrail_info:
_guardrail_duration_seconds: Optional[float] = info.get("duration")
if _guardrail_duration_seconds is not None:
total_duration += float(_guardrail_duration_seconds)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Guardrail data misprocessed with single dict input

The Datadog and OpenTelemetry integrations expect guardrail_information to be a list. When a single dictionary is provided, the code iterates over its keys, leading to guardrail data being incorrectly processed or silently omitted from logs and traces. This impacts duration calculations and span creation.

Additional Locations (1)

Fix in Cursor Fix in Web


if total_duration > 0:
# Convert from seconds to milliseconds for consistency
latency_metrics["guardrail_overhead_time_ms"] = (
_guardrail_duration_seconds * 1000
)
latency_metrics["guardrail_overhead_time_ms"] = total_duration * 1000

return latency_metrics

def _get_stream_value_from_payload(self, standard_logging_payload: StandardLoggingPayload) -> bool:
def _get_stream_value_from_payload(
self, standard_logging_payload: StandardLoggingPayload
) -> bool:
"""
Extract the stream value from standard logging payload.
Expand Down
69 changes: 45 additions & 24 deletions litellm/integrations/langfuse/langfuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,11 +688,17 @@ def _log_langfuse_v2( # noqa: PLR0915
"completion_tokens": _usage_obj.completion_tokens,
"total_cost": cost if self._supports_costs() else None,
}
usage_details = LangfuseUsageDetails(input=_usage_obj.prompt_tokens,
output=_usage_obj.completion_tokens,
total=_usage_obj.total_tokens,
cache_creation_input_tokens=_usage_obj.get('cache_creation_input_tokens', 0),
cache_read_input_tokens=_usage_obj.get('cache_read_input_tokens', 0))
usage_details = LangfuseUsageDetails(
input=_usage_obj.prompt_tokens,
output=_usage_obj.completion_tokens,
total=_usage_obj.total_tokens,
cache_creation_input_tokens=_usage_obj.get(
"cache_creation_input_tokens", 0
),
cache_read_input_tokens=_usage_obj.get(
"cache_read_input_tokens", 0
),
)

generation_name = clean_metadata.pop("generation_name", None)
if generation_name is None:
Expand Down Expand Up @@ -790,7 +796,7 @@ def _get_responses_api_content_for_langfuse(
"""
Get the responses API content for Langfuse logging
"""
if hasattr(response_obj, 'output') and response_obj.output:
if hasattr(response_obj, "output") and response_obj.output:
# ResponsesAPIResponse.output is a list of strings
return response_obj.output
else:
Expand Down Expand Up @@ -880,29 +886,44 @@ def _log_guardrail_information_as_span(
guardrail_information = standard_logging_object.get(
"guardrail_information", None
)
if guardrail_information is None:
if not guardrail_information:
verbose_logger.debug(
"Not logging guardrail information as span because guardrail_information is None"
"Not logging guardrail information as span because guardrail_information is empty"
)
return

span = trace.span(
name="guardrail",
input=guardrail_information.get("guardrail_request", None),
output=guardrail_information.get("guardrail_response", None),
metadata={
"guardrail_name": guardrail_information.get("guardrail_name", None),
"guardrail_mode": guardrail_information.get("guardrail_mode", None),
"guardrail_masked_entity_count": guardrail_information.get(
"masked_entity_count", None
),
},
start_time=guardrail_information.get("start_time", None), # type: ignore
end_time=guardrail_information.get("end_time", None), # type: ignore
)
if not isinstance(guardrail_information, list):
verbose_logger.debug(
"Not logging guardrail information as span because guardrail_information is not a list: %s",
type(guardrail_information),
)
return

for guardrail_entry in guardrail_information:
if not isinstance(guardrail_entry, dict):
verbose_logger.debug(
"Skipping guardrail entry with unexpected type: %s",
type(guardrail_entry),
)
continue

span = trace.span(
name="guardrail",
input=guardrail_entry.get("guardrail_request", None),
output=guardrail_entry.get("guardrail_response", None),
metadata={
"guardrail_name": guardrail_entry.get("guardrail_name", None),
"guardrail_mode": guardrail_entry.get("guardrail_mode", None),
"guardrail_masked_entity_count": guardrail_entry.get(
"masked_entity_count", None
),
},
start_time=guardrail_entry.get("start_time", None), # type: ignore
end_time=guardrail_entry.get("end_time", None), # type: ignore
)

verbose_logger.debug(f"Logged guardrail information as span: {span}")
span.end()
verbose_logger.debug(f"Logged guardrail information as span: {span}")
span.end()


def _add_prompt_to_generation_params(
Expand Down
Loading
Loading