diff --git a/changelog.d/19212.misc b/changelog.d/19212.misc new file mode 100644 index 00000000000..83158ce2d9c --- /dev/null +++ b/changelog.d/19212.misc @@ -0,0 +1 @@ +Respond with useful error codes with `Content-Length` header/s are invalid. diff --git a/synapse/api/constants.py b/synapse/api/constants.py index d41e44b1541..9b6a68e929c 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -29,6 +29,19 @@ # the max size of a (canonical-json-encoded) event MAX_PDU_SIZE = 65536 +# The maximum allowed size of an HTTP request. +# Other than media uploads, the biggest request we expect to see is a fully-loaded +# /federation/v1/send request. +# +# The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are +# limited to 65536 bytes (possibly slightly more if the sender didn't use canonical +# json encoding); there is no specced limit to EDUs (see +# https://github.com/matrix-org/matrix-doc/issues/3121). +# +# in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M) +# +MAX_REQUEST_SIZE = 200 * MAX_PDU_SIZE + # Max/min size of ints in canonical JSON CANONICALJSON_MAX_INT = (2**53) - 1 CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT diff --git a/synapse/app/_base.py b/synapse/app/_base.py index d1ed1201e5a..98d051bf04d 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -59,7 +59,7 @@ from twisted.web.resource import Resource import synapse.util.caches -from synapse.api.constants import MAX_PDU_SIZE +from synapse.api.constants import MAX_REQUEST_SIZE from synapse.app import check_bind_error from synapse.config import ConfigError from synapse.config._base import format_config_error @@ -895,17 +895,8 @@ def sdnotify(state: bytes) -> None: def max_request_body_size(config: HomeServerConfig) -> int: """Get a suitable maximum size for incoming HTTP requests""" - # Other than media uploads, the biggest request we expect to see is a fully-loaded - # /federation/v1/send request. - # - # The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are - # limited to 65536 bytes (possibly slightly more if the sender didn't use canonical - # json encoding); there is no specced limit to EDUs (see - # https://github.com/matrix-org/matrix-doc/issues/3121). - # - # in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M) - # - max_request_size = 200 * MAX_PDU_SIZE + # Baseline default for any request that isn't configured in the homeserver config + max_request_size = MAX_REQUEST_SIZE # if we have a media repo enabled, we may need to allow larger uploads than that if config.media.can_load_media_repo: diff --git a/synapse/http/site.py b/synapse/http/site.py index a1b0b8d9c2f..f10380a9121 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -19,6 +19,7 @@ # # import contextlib +import json import logging import time from http import HTTPStatus @@ -36,6 +37,7 @@ from twisted.web.resource import IResource, Resource from twisted.web.server import Request +from synapse.api.errors import Codes from synapse.config.server import ListenerConfig from synapse.http import get_request_user_agent, redact_uri from synapse.http.proxy import ProxySite @@ -46,7 +48,7 @@ PreserveLoggingContext, ) from synapse.metrics import SERVER_NAME_LABEL -from synapse.types import ISynapseReactor, Requester +from synapse.types import ISynapseReactor, JsonDict, Requester if TYPE_CHECKING: import opentracing @@ -59,6 +61,16 @@ _next_request_seq = 0 +class ContentLengthError(Exception): + """Raised when content-length validation fails.""" + + def __init__(self, status: HTTPStatus, errcode: str, message: str): + self.status = status + self.errcode = errcode + self.message = message + super().__init__(message) + + class SynapseRequest(Request): """Class which encapsulates an HTTP request to synapse. @@ -144,36 +156,153 @@ def __repr__(self) -> str: self.synapse_site.site_tag, ) + def _respond_with_error(self, error_code: HTTPStatus, error_json: JsonDict) -> None: + """Send an error response and close the connection.""" + self.code = error_code.value + self.code_message = bytes(error_code.phrase, "ascii") + error_response_bytes = json.dumps(error_json).encode() + + self.responseHeaders.setRawHeaders(b"Content-Type", [b"application/json"]) + self.responseHeaders.setRawHeaders( + b"Content-Length", [f"{len(error_response_bytes)}"] + ) + self.write(error_response_bytes) + self.loseConnection() + + def _get_content_length_from_headers(self) -> int | None: + """Attempts to obtain the `Content-Length` value from the request's headers. + + Returns: + Content length as `int` if present. Otherwise `None`. + + Raises: + ContentLengthError: if multiple `Content-Length` headers are present or the + value is not an `int`. + """ + content_length_headers = self.requestHeaders.getRawHeaders(b"Content-Length") + if content_length_headers is None: + return None + + # If there are multiple `Content-Length` headers return an error. + # We don't want to even try to pick the right one if there are multiple + # as we could run into problems similar to request smuggling vulnerabilities + # which rely on the mismatch of how different systems interpret information. + if len(content_length_headers) != 1: + raise ContentLengthError( + HTTPStatus.BAD_REQUEST, + Codes.UNKNOWN, + "Multiple Content-Length headers received", + ) + + try: + return int(content_length_headers[0]) + except (ValueError, TypeError): + raise ContentLengthError( + HTTPStatus.BAD_REQUEST, + Codes.UNKNOWN, + "Content-Length header value is not a valid integer", + ) + + def _validate_content_length(self) -> None: + """Validate Content-Length header and actual content size. + + Raises: + ContentLengthError: If validation fails. + """ + # we should have a `content` by now. + assert self.content, "_validate_content_length() called before gotLength()" + content_length = self._get_content_length_from_headers() + + if content_length is None: + return + + actual_content_length = self.content.tell() + + if content_length > self._max_request_body_size: + logger.info( + "Rejecting request from %s because Content-Length %d exceeds maximum size %d: %s %s", + self.client, + content_length, + self._max_request_body_size, + self.get_method(), + self.get_redacted_uri(), + ) + raise ContentLengthError( + HTTPStatus.REQUEST_ENTITY_TOO_LARGE, + Codes.TOO_LARGE, + f"Request content is too large (>{self._max_request_body_size})", + ) + + if content_length != actual_content_length: + comparison = ( + "smaller" if content_length < actual_content_length else "larger" + ) + logger.info( + "Rejecting request from %s because Content-Length %d is %s than the request content size %d: %s %s", + self.client, + content_length, + comparison, + actual_content_length, + self.get_method(), + self.get_redacted_uri(), + ) + raise ContentLengthError( + HTTPStatus.BAD_REQUEST, + Codes.UNKNOWN, + f"Rejecting request as the Content-Length header value {content_length} " + f"is {comparison} than the actual request content size {actual_content_length}", + ) + # Twisted machinery: this method is called by the Channel once the full request has # been received, to dispatch the request to a resource. - # - # We're patching Twisted to bail/abort early when we see someone trying to upload - # `multipart/form-data` so we can avoid Twisted parsing the entire request body into - # in-memory (specific problem of this specific `Content-Type`). This protects us - # from an attacker uploading something bigger than the available RAM and crashing - # the server with a `MemoryError`, or carefully block just enough resources to cause - # all other requests to fail. - # - # FIXME: This can be removed once we Twisted releases a fix and we update to a - # version that is patched def requestReceived(self, command: bytes, path: bytes, version: bytes) -> None: + # In the case of a Content-Length header being present, and it's value being too + # large, throw a proper error to make debugging issues due to overly large requests much + # easier. Currently we handle such cases in `handleContentChunk` and abort the + # connection without providing a proper HTTP response. + # + # Attempting to write an HTTP response from within `handleContentChunk` does not + # work, so the code here has been added to at least provide a response in the + # case of the Content-Length header being present. + self.method, self.uri = command, path + self.clientproto = version + + try: + self._validate_content_length() + except ContentLengthError as e: + self._respond_with_error( + e.status, {"errcode": e.errcode, "error": e.message} + ) + return + + # We're patching Twisted to bail/abort early when we see someone trying to upload + # `multipart/form-data` so we can avoid Twisted parsing the entire request body into + # in-memory (specific problem of this specific `Content-Type`). This protects us + # from an attacker uploading something bigger than the available RAM and crashing + # the server with a `MemoryError`, or carefully block just enough resources to cause + # all other requests to fail. + # + # FIXME: This can be removed once Twisted releases a fix and we update to a + # version that is patched + # See: https://github.com/element-hq/synapse/security/advisories/GHSA-rfq8-j7rh-8hf2 if command == b"POST": ctype = self.requestHeaders.getRawHeaders(b"content-type") if ctype and b"multipart/form-data" in ctype[0]: - self.method, self.uri = command, path - self.clientproto = version + logger.warning( + "Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s", + self.client, + self.get_method(), + self.get_redacted_uri(), + ) + self.code = HTTPStatus.UNSUPPORTED_MEDIA_TYPE.value self.code_message = bytes( HTTPStatus.UNSUPPORTED_MEDIA_TYPE.phrase, "ascii" ) - self.responseHeaders.setRawHeaders(b"content-length", [b"0"]) - logger.warning( - "Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s", - self.client, - command, - path, - ) + # FIXME: Return a better error response here similar to the + # `error_response_json` returned in other code paths here. + self.responseHeaders.setRawHeaders(b"Content-Length", [b"0"]) self.write(b"") self.loseConnection() return diff --git a/tests/http/test_site.py b/tests/http/test_site.py index 9e6d929c9ef..654ec3190be 100644 --- a/tests/http/test_site.py +++ b/tests/http/test_site.py @@ -22,6 +22,7 @@ from twisted.internet.address import IPv6Address from twisted.internet.testing import MemoryReactor, StringTransport +from synapse.app._base import max_request_body_size from synapse.app.homeserver import SynapseHomeServer from synapse.server import HomeServer from synapse.util.clock import Clock @@ -143,3 +144,104 @@ def test_content_type_multipart(self) -> None: # we should get a 415 self.assertRegex(transport.value().decode(), r"^HTTP/1\.1 415 ") + + def test_content_length_too_large(self) -> None: + """HTTP requests with Content-Length exceeding max size should be rejected with 413""" + self.hs.start_listening() + + # find the HTTP server which is configured to listen on port 0 + (port, factory, _backlog, interface) = self.reactor.tcpServers[0] + self.assertEqual(interface, "::") + self.assertEqual(port, 0) + + # complete the connection and wire it up to a fake transport + client_address = IPv6Address("TCP", "::1", 2345) + protocol = factory.buildProtocol(client_address) + transport = StringTransport() + protocol.makeConnection(transport) + + # Send a request with Content-Length header that exceeds the limit. + # Default max is 50MB (from media max_upload_size), so send something larger. + oversized_length = 1 + max_request_body_size(self.hs.config) + protocol.dataReceived( + b"POST / HTTP/1.1\r\n" + b"Connection: close\r\n" + b"Content-Length: " + str(oversized_length).encode() + b"\r\n" + b"\r\n" + b"" + b"x" * oversized_length + b"\r\n" + b"\r\n" + ) + + # Advance the reactor to process the request + while not transport.disconnecting: + self.reactor.advance(1) + + # We should get a 413 Content Too Large + response = transport.value().decode() + self.assertRegex(response, r"^HTTP/1\.1 413 ") + self.assertSubstring("M_TOO_LARGE", response) + + def test_too_many_content_length_headers(self) -> None: + """HTTP requests with multiple Content-Length headers should be rejected with 400""" + self.hs.start_listening() + + # find the HTTP server which is configured to listen on port 0 + (port, factory, _backlog, interface) = self.reactor.tcpServers[0] + self.assertEqual(interface, "::") + self.assertEqual(port, 0) + + # complete the connection and wire it up to a fake transport + client_address = IPv6Address("TCP", "::1", 2345) + protocol = factory.buildProtocol(client_address) + transport = StringTransport() + protocol.makeConnection(transport) + + protocol.dataReceived( + b"POST / HTTP/1.1\r\n" + b"Connection: close\r\n" + b"Content-Length: " + str(5).encode() + b"\r\n" + b"Content-Length: " + str(5).encode() + b"\r\n" + b"\r\n" + b"" + b"xxxxx" + b"\r\n" + b"\r\n" + ) + + # Advance the reactor to process the request + while not transport.disconnecting: + self.reactor.advance(1) + + # We should get a 400 + response = transport.value().decode() + self.assertRegex(response, r"^HTTP/1\.1 400 ") + + def test_invalid_content_length_headers(self) -> None: + """HTTP requests with invalid Content-Length header should be rejected with 400""" + self.hs.start_listening() + + # find the HTTP server which is configured to listen on port 0 + (port, factory, _backlog, interface) = self.reactor.tcpServers[0] + self.assertEqual(interface, "::") + self.assertEqual(port, 0) + + # complete the connection and wire it up to a fake transport + client_address = IPv6Address("TCP", "::1", 2345) + protocol = factory.buildProtocol(client_address) + transport = StringTransport() + protocol.makeConnection(transport) + + protocol.dataReceived( + b"POST / HTTP/1.1\r\n" + b"Connection: close\r\n" + b"Content-Length: eight\r\n" + b"\r\n" + b"" + b"xxxxx" + b"\r\n" + b"\r\n" + ) + + # Advance the reactor to process the request + while not transport.disconnecting: + self.reactor.advance(1) + + # We should get a 400 + response = transport.value().decode() + self.assertRegex(response, r"^HTTP/1\.1 400 ") diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py index d599351df78..d83604a6961 100644 --- a/tests/rest/client/test_login.py +++ b/tests/rest/client/test_login.py @@ -1728,9 +1728,6 @@ def test_username_picker_use_displayname_avatar_and_email(self) -> None: content_is_form=True, custom_headers=[ ("Cookie", "username_mapping_session=" + session_id), - # old versions of twisted don't do form-parsing without a valid - # content-length header. - ("Content-Length", str(len(content))), ], ) self.assertEqual(chan.code, 302, chan.result) @@ -1818,9 +1815,6 @@ def test_username_picker_dont_use_displayname_avatar_or_email(self) -> None: content_is_form=True, custom_headers=[ ("Cookie", "username_mapping_session=" + session_id), - # old versions of twisted don't do form-parsing without a valid - # content-length header. - ("Content-Length", str(len(content))), ], ) self.assertEqual(chan.code, 302, chan.result) diff --git a/tests/rest/client/test_media.py b/tests/rest/client/test_media.py index 33172f930ed..ec81b1413c2 100644 --- a/tests/rest/client/test_media.py +++ b/tests/rest/client/test_media.py @@ -2590,7 +2590,6 @@ def test_authenticated_media(self) -> None: self.tok, shorthand=False, content_type=b"image/png", - custom_headers=[("Content-Length", str(67))], ) self.assertEqual(channel.code, 200) res = channel.json_body.get("content_uri") @@ -2750,7 +2749,6 @@ def test_authenticated_media_etag(self) -> None: self.tok, shorthand=False, content_type=b"image/png", - custom_headers=[("Content-Length", str(67))], ) self.assertEqual(channel.code, 200) res = channel.json_body.get("content_uri") @@ -2909,7 +2907,6 @@ def upload_media(self, size: int) -> FakeChannel: access_token=self.tok, shorthand=False, content_type=b"text/plain", - custom_headers=[("Content-Length", str(size))], ) def test_upload_under_limit(self) -> None: @@ -3074,7 +3071,6 @@ def upload_media(self, size: int, tok: str) -> FakeChannel: access_token=tok, shorthand=False, content_type=b"text/plain", - custom_headers=[("Content-Length", str(size))], ) def test_upload_under_limit(self) -> None: diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index 613c317b8a6..b3808d75bb9 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -612,7 +612,6 @@ def upload_media( filename: The filename of the media to be uploaded expect_code: The return code to expect from attempting to upload the media """ - image_length = len(image_data) path = "/_matrix/media/r0/upload?filename=%s" % (filename,) channel = make_request( self.reactor, @@ -621,7 +620,6 @@ def upload_media( path, content=image_data, access_token=tok, - custom_headers=[("Content-Length", str(image_length))], ) assert channel.code == expect_code, "Expected: %d, got: %d, resp: %r" % ( diff --git a/tests/server.py b/tests/server.py index 4fb7dea5ec0..ce31a4162ad 100644 --- a/tests/server.py +++ b/tests/server.py @@ -81,6 +81,7 @@ from twisted.web.resource import IResource from twisted.web.server import Request, Site +from synapse.api.constants import MAX_REQUEST_SIZE from synapse.config.database import DatabaseConnectionConfig from synapse.config.homeserver import HomeServerConfig from synapse.events.auto_accept_invites import InviteAutoAccepter @@ -241,7 +242,6 @@ def writeSequence(self, data: Iterable[bytes]) -> None: def loseConnection(self) -> None: self.unregisterProducer() - self.transport.loseConnection() # Type ignore: mypy doesn't like the fact that producer isn't an IProducer. def registerProducer(self, producer: IProducer, streaming: bool) -> None: @@ -428,18 +428,29 @@ def make_request( channel = FakeChannel(site, reactor, ip=client_ip) - req = request(channel, site, our_server_name="test_server") + req = request( + channel, + site, + our_server_name="test_server", + max_request_body_size=MAX_REQUEST_SIZE, + ) channel.request = req req.content = BytesIO(content) # Twisted expects to be at the end of the content when parsing the request. req.content.seek(0, SEEK_END) - # Old version of Twisted (<20.3.0) have issues with parsing x-www-form-urlencoded - # bodies if the Content-Length header is missing - req.requestHeaders.addRawHeader( - b"Content-Length", str(len(content)).encode("ascii") - ) + # If `Content-Length` was passed in as a custom header, don't automatically add it + # here. + if custom_headers is None or not any( + (k if isinstance(k, bytes) else k.encode("ascii")) == b"Content-Length" + for k, _ in custom_headers + ): + # Old version of Twisted (<20.3.0) have issues with parsing x-www-form-urlencoded + # bodies if the Content-Length header is missing + req.requestHeaders.addRawHeader( + b"Content-Length", str(len(content)).encode("ascii") + ) if access_token: req.requestHeaders.addRawHeader( diff --git a/tests/test_server.py b/tests/test_server.py index ec31b6cc5f6..3e3915b19ae 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -212,6 +212,66 @@ def _callback( self.assertEqual(channel.code, 200) self.assertNotIn("body", channel.result) + def test_content_larger_than_content_length(self) -> None: + """ + HTTP requests with content size exceeding Content-Length should be rejected with 400. + """ + + def _callback( + request: SynapseRequest, **kwargs: object + ) -> tuple[int, JsonDict]: + return 200, {} + + res = JsonResource(self.homeserver) + res.register_paths( + "POST", [re.compile("^/_matrix/foo$")], _callback, "test_servlet" + ) + + channel = make_request( + self.reactor, + FakeSite(res, self.reactor), + b"POST", + b"/_matrix/foo", + {}, + # Set the `Content-Length` value to be smaller than the actual content size + custom_headers=[("Content-Length", "1")], + # The request should disconnect early so don't await the result + await_result=False, + ) + + self.reactor.advance(0.1) + self.assertEqual(channel.code, 400) + + def test_content_length_larger_than_content(self) -> None: + """ + HTTP requests with content size smaller than Content-Length should be rejected with 400. + """ + + def _callback( + request: SynapseRequest, **kwargs: object + ) -> tuple[int, JsonDict]: + return 200, {} + + res = JsonResource(self.homeserver) + res.register_paths( + "POST", [re.compile("^/_matrix/foo$")], _callback, "test_servlet" + ) + + channel = make_request( + self.reactor, + FakeSite(res, self.reactor), + b"POST", + b"/_matrix/foo", + {}, + # Set the `Content-Length` value to be larger than the actual content size + custom_headers=[("Content-Length", "10")], + # The request should disconnect early so don't await the result + await_result=False, + ) + + self.reactor.advance(0.1) + self.assertEqual(channel.code, 400) + class OptionsResourceTests(unittest.TestCase): def setUp(self) -> None: