Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions litellm/integrations/s3_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
s3_batch_size: Optional[int] = DEFAULT_S3_BATCH_SIZE,
s3_config=None,
s3_use_team_prefix: bool = False,
s3_strip_base64_files: bool = False,
**kwargs,
):
try:
Expand Down Expand Up @@ -80,6 +81,7 @@ def __init__(
s3_config=s3_config,
s3_path=s3_path,
s3_use_team_prefix=s3_use_team_prefix,
s3_strip_base64_files=s3_strip_base64_files
)
verbose_logger.debug(f"s3 logger using endpoint url {s3_endpoint_url}")

Expand Down Expand Up @@ -124,6 +126,7 @@ def _init_s3_params(
s3_config=None,
s3_path: Optional[str] = None,
s3_use_team_prefix: bool = False,
s3_strip_base64_files: bool = False,
):
"""
Initialize the s3 params for this logging callback
Expand Down Expand Up @@ -194,6 +197,11 @@ def _init_s3_params(
or s3_use_team_prefix
)

self.s3_strip_base64_files = (
bool(litellm.s3_callback_params.get("s3_strip_base64_files", False))
or s3_strip_base64_files
)

return

async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
Expand Down Expand Up @@ -364,6 +372,10 @@ def create_s3_batch_logging_element(
if standard_logging_payload is None:
return None

if self.s3_strip_base64_files:
import asyncio
standard_logging_payload = asyncio.run(self._strip_base64_from_messages(standard_logging_payload))

team_alias = standard_logging_payload["metadata"].get("user_api_key_team_alias")

team_alias_prefix = ""
Expand Down
2 changes: 2 additions & 0 deletions litellm/integrations/sqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_ti
standard_logging_payload = kwargs.get("standard_logging_object")
if standard_logging_payload is None:
raise ValueError("standard_logging_payload is None")
if self.sqs_strip_base64_files:
standard_logging_payload = await self._strip_base64_from_messages(standard_logging_payload)

self.log_queue.append(standard_logging_payload)
verbose_logger.debug(
Expand Down
143 changes: 142 additions & 1 deletion tests/test_litellm/integrations/test_s3_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,145 @@ def test_s3_v2_endpoint_url(self, mock_periodic_flush, mock_create_task):
expected_download_url = "https://download.s3.endpoint.com/download-bucket/2025-09-14/download-test-key.json"
assert url_download == expected_download_url, f"Expected download URL {expected_download_url}, got {url_download}"

assert result == {"downloaded": "data"}
assert result == {"downloaded": "data"}

@pytest.mark.asyncio
async def test_strip_base64_removes_file_and_nontext_entries():
logger = S3Logger(s3_strip_base64_files=True)

payload = {
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "Hello world"},
{"type": "image", "file": {"file_data": ""}},
{"type": "file", "file": {"file_data": "data:application/pdf;base64,BBBB"}},
],
},
{
"role": "assistant",
"content": [
{"type": "text", "text": "Response"},
{"type": "audio", "file": {"file_data": "data:audio/wav;base64,CCCC"}},
],
},
]
}

stripped = await logger._strip_base64_from_messages(payload)

# 1️⃣ File/image/audio entries are removed
assert len(stripped["messages"][0]["content"]) == 1
assert stripped["messages"][0]["content"][0]["text"] == "Hello world"

assert len(stripped["messages"][1]["content"]) == 1
assert stripped["messages"][1]["content"][0]["text"] == "Response"

# 2️⃣ No 'file' keys remain
for msg in stripped["messages"]:
for content in msg["content"]:
assert "file" not in content
assert content.get("type") == "text"


@pytest.mark.asyncio
async def test_strip_base64_keeps_non_file_content():
logger = S3Logger(s3_strip_base64_files=True)

payload = {
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "Just text"},
{"type": "text", "text": "Another message"},
],
}
]
}

stripped = await logger._strip_base64_from_messages(payload)

# Should not modify pure text messages
assert stripped["messages"][0]["content"] == payload["messages"][0]["content"]


@pytest.mark.asyncio
async def test_strip_base64_handles_empty_or_missing_messages():
logger = S3Logger(s3_strip_base64_files=True)

# Missing messages key
payload_no_messages = {}
stripped1 = await logger._strip_base64_from_messages(payload_no_messages)
assert stripped1 == payload_no_messages

# Empty messages list
payload_empty = {"messages": []}
stripped2 = await logger._strip_base64_from_messages(payload_empty)
assert stripped2 == payload_empty


@pytest.mark.asyncio
async def test_strip_base64_mixed_nested_objects():
"""
Handles weird/nested content structures gracefully.
"""
logger = S3Logger(s3_strip_base64_files=True)

payload = {
"messages": [
{
"role": "system",
"content": [
{"type": "text", "text": "Keep me"},
{"type": "custom", "metadata": "ignore but non-text"},
{"foo": "bar"},
{"file": {"file_data": "data:application/pdf;base64,XXX"}},
],
"extra": {"trace_id": "123"},
}
]
}

stripped = await logger._strip_base64_from_messages(payload)

# Custom/non-text and file entries removed
content = stripped["messages"][0]["content"]
assert len(content) == 2
assert {"type": "text", "text": "Keep me"} in content
assert {"foo": "bar"} in content
# Extra metadata preserved
assert stripped["messages"][0]["extra"]["trace_id"] == "123"


@pytest.mark.asyncio
async def test_strip_base64_recursive_redaction():
logger = S3Logger(s3_strip_base64_files=True)
payload = {
"messages": [
{
"content": [
{"type": "text", "text": "normal text"},
{"type": "text", "text": ""},
{"type": "text", "text": "Nested: {'data': 'data:application/pdf;base64,AAA...'}"},
{"file": {"file_data": "data:application/pdf;base64,AAAA"}},
{"metadata": {"preview": "data:audio/mp3;base64,AAAAA=="}},
]
}
]
}

result = await logger._strip_base64_from_messages(payload)
content = result["messages"][0]["content"]

# Dropped file-type entries
assert not any("file" in c for c in content)

# Base64 redacted globally
import json
for c in content:
if isinstance(c, dict):
s = json.dumps(c).lower()
# "[base64_redacted]" is fine, but raw base64 is not
assert "base64," not in s, f"Found real base64 blob in: {s}"
Loading