Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 128 additions & 1 deletion litellm/proxy/common_utils/http_parsing_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import re
from typing import Any, Collection, Dict, List, Optional
from typing import Any, Collection, Dict, FrozenSet, List, Optional, Tuple

import orjson
from fastapi import Request, UploadFile, status
Expand All @@ -12,6 +12,119 @@
)
from litellm.types.router import Deployment

# Fields the proxy populates itself during request processing. These are
# removed from the incoming request body so downstream code works with the
# expected shape — caller-supplied values here would collide with or shadow
# the proxy's own. disable_global_guardrails and opted_out_global_guardrails
# are intentionally excluded; they're handled by the auth layer's
# _guardrail_modification_check.
_RESTRICTED_TOP_LEVEL_FIELDS: FrozenSet[str] = frozenset(
{
"proxy_server_request",
"standard_logging_object",
"secret_fields",
"litellm_logging_obj",
}
)

# mock_response / mock_tool_calls short-circuit the real LLM call and are
# meant for test/dev use. Removed from client payloads by default; set
# ``general_settings.allow_client_side_mock_response: true`` in config.yaml
# to accept client-supplied values. Internal code (guardrails, health
# checks) sets these server-side after ingress and is unaffected.
_MOCK_RESPONSE_FIELDS: FrozenSet[str] = frozenset({"mock_response", "mock_tool_calls"})

_RESTRICTED_METADATA_FIELDS: FrozenSet[str] = frozenset(
{
"applied_guardrails",
"applied_policies",
"policy_sources",
"pillar_response_headers",
"pillar_flagged",
"pillar_scanners",
"pillar_evidence",
"pillar_evidence_truncated",
"pillar_session_id_response",
"semantic-similarity", # hyphenated, matches redis_semantic_cache.py
"_guardrail_pipelines",
"_pipeline_managed_guardrails",
}
)

# The proxy writes user_api_key_* fields into metadata during
# add_litellm_data_to_request; any incoming values of the same shape are
# removed to avoid collisions. Safe here because this strip only runs on
# raw ingress bodies, before the proxy populates its own values.
_RESTRICTED_METADATA_PREFIXES: Tuple[str, ...] = ("user_api_key_",)

_METADATA_CONTAINER_KEYS: Tuple[str, ...] = ("metadata", "litellm_metadata")


def _client_mock_response_allowed() -> bool:
"""Check the proxy's ``general_settings.allow_client_side_mock_response``.

Lazy-imported because ``general_settings`` lives on ``proxy_server`` and
we don't want an import cycle (``proxy_server`` imports this module).
In non-proxy contexts (SDK, unit tests that don't start the proxy) the
import succeeds but the dict is empty, so the function returns False
and the fields are removed by default.
"""
try:
from litellm.proxy.proxy_server import general_settings
except ImportError:
return False
return bool(general_settings.get("allow_client_side_mock_response"))


def _strip_internal_metadata_keys(metadata: dict) -> bool:
"""Remove restricted keys from `metadata` in place; return True if anything was removed."""
removed = False
for key in list(metadata.keys()):
if key in _RESTRICTED_METADATA_FIELDS or key.startswith(
_RESTRICTED_METADATA_PREFIXES
):
del metadata[key]
removed = True
return removed


def strip_internal_control_fields(data: dict) -> None:
"""Remove proxy-internal fields from a user-supplied request body in place.

Metadata containers can arrive as either a dict or a JSON string (the
latter happens with multipart/form-data and some extra_body paths);
both shapes are handled. Idempotent — safe to call on an
already-cleaned dict.

Only runs at ingress (before the proxy enriches `data` with its own
user_api_key_* / internal fields). Do not call on an enriched body.
"""
if not isinstance(data, dict):
return

for key in _RESTRICTED_TOP_LEVEL_FIELDS:
data.pop(key, None)

if not _client_mock_response_allowed():
for key in _MOCK_RESPONSE_FIELDS:
data.pop(key, None)
Comment on lines +108 to +110
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Backwards-incompatible default change for mock_response

Any client currently sending mock_response in a request body — a common pattern in staging/test environments that point at a shared proxy — will have the field silently dropped and the real LLM called instead. There is no log line at the call site explaining the strip, so the failure mode is opaque: callers see unexpected live-model responses with no indication that their field was ignored.

The opt-in flag exists, but the migration path requires ops teams to discover and set general_settings.allow_client_side_mock_response: true before their test workloads are disrupted. Per the team's rule about backwards-incompatible changes, a safer rollout would be: log a prominent warning when the field is stripped in this release, and default to true (preserve existing behavior) until a future major version flips the default.

if not _client_mock_response_allowed():
    for key in _MOCK_RESPONSE_FIELDS:
        if key in data:
            verbose_proxy_logger.warning(
                "Stripping client-supplied '%s' from request body. "
                "Set general_settings.allow_client_side_mock_response: true "
                "in config.yaml to allow this field.",
                key,
            )
            data.pop(key)

Rule Used: What: avoid backwards-incompatible changes without... (source)


for container_key in _METADATA_CONTAINER_KEYS:
container = data.get(container_key)
if isinstance(container, dict):
_strip_internal_metadata_keys(container)
elif isinstance(container, str):
try:
parsed = json.loads(container)
except (json.JSONDecodeError, ValueError):
continue
if isinstance(parsed, dict) and _strip_internal_metadata_keys(parsed):
# ensure_ascii=False keeps non-ASCII characters in their
# original UTF-8 form rather than \uXXXX-escaping them, so
# the re-serialized string stays as close to the original
# representation as possible.
data[container_key] = json.dumps(parsed, ensure_ascii=False)


async def _read_request_body(request: Optional[Request]) -> Dict:
"""
Expand All @@ -32,6 +145,11 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
request=request
)
if _cached_request_body is not None:
# The cache was populated by this function after the strip
# ran, so it's already in the expected shape. Do NOT re-strip
# here — by this point the proxy may have enriched the body
# with its own user_api_key_* / internal fields, which would
# be incorrectly removed.
return _cached_request_body

_request_headers: dict = _safe_get_request_headers(request=request)
Expand Down Expand Up @@ -80,6 +198,10 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
code=status.HTTP_400_BAD_REQUEST,
)

# Strip proxy-internal fields before anything downstream (including
# the cache) sees the body.
strip_internal_control_fields(parsed_body)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Medium: Incomplete coverage of ingress sanitization

This strip only runs when the body is parsed through _read_request_body or get_form_data. Five endpoints in proxy_server.py (moderations ~L7857, audio_speech ~L7983, create_assistant ~L8545, add_messages ~L8933, run_thread ~L9126) call orjson.loads(body) directly and then pass data to add_litellm_data_to_request without going through this path. Those endpoints remain vulnerable to injection of fields like applied_guardrails, mock_response, _guardrail_pipelines, etc. that add_litellm_data_to_request doesn't strip.

Consider either calling strip_internal_control_fields(data) inside add_litellm_data_to_request (which all endpoints already use), or refactoring those five endpoints to use _read_request_body.


# Cache the parsed result
_safe_set_request_parsed_body(request=request, parsed_body=parsed_body)
return parsed_body
Expand Down Expand Up @@ -253,6 +375,11 @@ async def get_form_data(request: Request) -> Dict[str, Any]:
parsed_form_data.setdefault(clean_key, []).append(value)
else:
parsed_form_data[key] = value
# Same ingress sanitization as _read_request_body — multipart endpoints
# (audio transcription, skills, container uploads, passthrough) route
# through here directly via get_request_body and would otherwise bypass
# the strip that JSON ingress applies.
strip_internal_control_fields(parsed_form_data)
return parsed_form_data


Expand Down
5 changes: 5 additions & 0 deletions litellm/proxy/example_config_yaml/otel_test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ model_list:
api_base: https://exampleopenaiendpoint-production.up.railway.app/


general_settings:
# Test suite uses extra_body={"mock_response": ...} via the OpenAI client to
# short-circuit upstream calls. Test/dev only.
allow_client_side_mock_response: true

litellm_settings:
cache: true
callbacks: ["otel", "prometheus"]
Expand Down
Loading
Loading