Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/backend/app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .config import Settings
from .extensions import db, jwt
from .routes import register_routes
from .compression import init_compression
from .observability import (
Observability,
configure_logging,
Expand Down Expand Up @@ -52,6 +53,9 @@ def create_app(settings: Settings | None = None) -> Flask:
# Blueprint routes
register_routes(app)

# Response compression (gzip)
init_compression(app)

# Backward-compatible schema patch for existing databases.
with app.app_context():
_ensure_schema_compatibility(app)
Expand Down
99 changes: 99 additions & 0 deletions packages/backend/app/compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""
API response compression & payload optimization (Issue #129).

Provides gzip compression for JSON/text responses via Flask after_request hook.
Uses Python's built-in gzip — no extra dependencies required.

Features:
- Gzip when client sends Accept-Encoding: gzip
- Skips compression for small responses (< MIN_SIZE bytes)
- Skips already-encoded responses
- Skips non-compressible content types (images, binary)
- Tracks compression ratio in X-Compression-Ratio header (dev/debug)
"""

from __future__ import annotations

import gzip
import logging

from flask import Flask, Request, Response, current_app

logger = logging.getLogger("finmind.compression")

# Minimum payload size (bytes) before we bother compressing
MIN_COMPRESS_SIZE = 512

# Content types that benefit from compression
_COMPRESSIBLE = {
"application/json",
"text/html",
"text/plain",
"text/csv",
"application/javascript",
"text/javascript",
"application/xml",
"text/xml",
}


def init_compression(app: Flask) -> None:
"""Register the compression after_request hook with the Flask app."""

@app.after_request
def compress_response(response: Response) -> Response:
return _maybe_compress(response)

logger.info("Response compression enabled (min_size=%d bytes)", MIN_COMPRESS_SIZE)


def _maybe_compress(response: Response) -> Response:
# Already compressed or explicitly opted out
if response.headers.get("Content-Encoding"):
return response

# Skip streaming responses: calling get_data() on a direct_passthrough /
# stream_with_context response would buffer the entire stream in memory,
# defeating the purpose of streaming and potentially exhausting RAM on large
# payloads. Streaming responses must be compressed at the WSGI/proxy layer
# (e.g. nginx gzip_proxied) instead.
if response.direct_passthrough:
return response

# Client must accept gzip
from flask import request as current_request
accept_encoding = current_request.headers.get("Accept-Encoding", "")
if "gzip" not in accept_encoding.lower():
return response

# Only compress compressible content types
content_type = response.content_type.split(";")[0].strip()
if content_type not in _COMPRESSIBLE:
return response

# Get response data (force evaluation of lazy responses)
data = response.get_data()

# Skip tiny payloads — compression overhead isn't worth it
if len(data) < MIN_COMPRESS_SIZE:
return response

compressed = gzip.compress(data, compresslevel=6)

# Only use compressed version if it's actually smaller
if len(compressed) >= len(data):
return response

ratio = round(1 - len(compressed) / len(data), 3)
response.set_data(compressed)
response.headers["Content-Encoding"] = "gzip"
response.headers["Content-Length"] = len(compressed)
response.headers["Vary"] = "Accept-Encoding"

# X-Compression-Ratio is a debug-only header: it reveals payload size
# information that could aid an attacker (e.g. BREACH-style attacks).
# Only emit it when the application is running in debug mode.
if current_app.debug:
response.headers["X-Compression-Ratio"] = str(ratio)

return response
206 changes: 206 additions & 0 deletions packages/backend/tests/test_compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
"""
Tests for API response compression & payload optimization (Issue #129).

Covers:
- Responses are gzip-compressed when client sends Accept-Encoding: gzip
- Content-Encoding: gzip header is set
- Vary: Accept-Encoding header is set
- Compressed payload is actually smaller
- No compression when client does not send Accept-Encoding: gzip
- No compression for small payloads (< MIN_SIZE)
- No double-compression of already-encoded responses
- Decompressed content matches original JSON
- Streaming responses (direct_passthrough=True) are skipped
- X-Compression-Ratio header is only emitted in debug mode
"""

from __future__ import annotations

import gzip
import json

import pytest


# ─────────────────────────────────────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────────────────────────────────────

_GZIP_HEADERS = {"Accept-Encoding": "gzip"}


def _register_login(client):
client.post("/auth/register", json={"email": "comp@test.com", "password": "pass1234"})
r = client.post("/auth/login", json={"email": "comp@test.com", "password": "pass1234"})
token = r.get_json()["access_token"]
return {"Authorization": f"Bearer {token}", "Accept-Encoding": "gzip"}


def _make_large_json_response(app, size_bytes: int = 2048):
"""Return a Flask Response with a JSON body larger than MIN_COMPRESS_SIZE,
without hitting the DB. Uses a minimal in-request-context approach."""
import json as _json
from flask import Response

payload = _json.dumps({"data": "x" * size_bytes})
return Response(payload, status=200, content_type="application/json")


# ─────────────────────────────────────────────────────────────────────────────
# Compression behaviour
# ─────────────────────────────────────────────────────────────────────────────

class TestCompression:
def test_no_compression_without_accept_encoding(self, client, app_fixture):
"""Small /health endpoint — no gzip requested."""
r = client.get("/health")
assert r.status_code == 200
assert r.headers.get("Content-Encoding") != "gzip"

def test_health_endpoint_too_small_to_compress(self, client, app_fixture):
"""Tiny JSON like {status: ok} is below MIN_SIZE — should not be compressed."""
r = client.get("/health", headers=_GZIP_HEADERS)
assert r.status_code == 200
enc = r.headers.get("Content-Encoding", "")
if enc == "gzip":
decompressed = gzip.decompress(r.data)
assert json.loads(decompressed)["status"] == "ok"

def test_large_json_is_compressed(self, client, app_fixture):
"""A large JSON response should be gzip-compressed when gzip is accepted.

Uses a mock route registered on the test app instead of seeding DB
rows, making the test fast and DB-independent.
"""
import json as _json

large_body = _json.dumps({"items": ["item_" + str(i) * 20 for i in range(200)]})

# Register a one-shot route that returns a large JSON body
with app_fixture.app_context():
@app_fixture.route("/_test_large")
def _test_large_route():
from flask import Response
return Response(large_body, status=200, content_type="application/json")

r = client.get("/_test_large", headers=_GZIP_HEADERS)
assert r.status_code == 200
assert r.headers.get("Content-Encoding") == "gzip"
assert "Accept-Encoding" in r.headers.get("Vary", "")

decompressed = gzip.decompress(r.data)
data = json.loads(decompressed)
assert "items" in data
assert len(data["items"]) == 200

def test_no_compression_without_accept_encoding_on_list(self, client, app_fixture):
"""Client without Accept-Encoding: gzip must receive uncompressed response."""
headers = _register_login(client)
plain_headers = {k: v for k, v in headers.items() if k != "Accept-Encoding"}

r = client.get("/expenses", headers=plain_headers)
assert r.status_code == 200
assert r.headers.get("Content-Encoding") != "gzip"
data = r.get_json()
assert isinstance(data, list)

def test_vary_header_set_on_compressed_response(self, client, app_fixture):
"""Vary: Accept-Encoding must be present when compression is applied."""
import json as _json

large_body = _json.dumps({"pad": "y" * 2000})

with app_fixture.app_context():
@app_fixture.route("/_test_vary")
def _test_vary_route():
from flask import Response
return Response(large_body, status=200, content_type="application/json")

r = client.get("/_test_vary", headers=_GZIP_HEADERS)
if r.headers.get("Content-Encoding") == "gzip":
assert "Accept-Encoding" in r.headers.get("Vary", "")

def test_already_encoded_response_not_double_compressed(self, client, app_fixture):
"""A response that already has Content-Encoding must not be re-compressed."""
from app.compression import _maybe_compress
from unittest.mock import MagicMock

mock_response = MagicMock()
mock_response.headers = {"Content-Encoding": "gzip"}
mock_response.content_type = "application/json"
mock_response.direct_passthrough = False

result = _maybe_compress(mock_response)
assert result is mock_response
mock_response.get_data.assert_not_called()

# ── Fix 1: streaming responses must be skipped ────────────────────────────

def test_streaming_response_skipped(self, client, app_fixture):
"""Responses with direct_passthrough=True must never have get_data() called.

Calling get_data() on a streaming response buffers the whole stream in
memory; the compression middleware must detect and skip such responses.
"""
from app.compression import _maybe_compress
from unittest.mock import MagicMock, patch

mock_response = MagicMock()
mock_response.headers = {} # no Content-Encoding yet
mock_response.direct_passthrough = True
mock_response.content_type = "application/json"

with patch("app.compression.current_request") as mock_req:
mock_req.headers.get.return_value = "gzip"
result = _maybe_compress(mock_response)

# Must return unchanged and must NOT have called get_data()
assert result is mock_response
mock_response.get_data.assert_not_called()

# ── Fix 2: X-Compression-Ratio only in debug mode ─────────────────────────

def test_x_compression_ratio_absent_in_production(self, client, app_fixture):
"""X-Compression-Ratio must NOT be sent when app.debug is False."""
import json as _json

large_body = _json.dumps({"pad": "z" * 2000})

with app_fixture.app_context():
@app_fixture.route("/_test_ratio_prod")
def _test_ratio_prod():
from flask import Response
return Response(large_body, status=200, content_type="application/json")

# Ensure debug is off
original_debug = app_fixture.debug
app_fixture.debug = False
try:
r = client.get("/_test_ratio_prod", headers=_GZIP_HEADERS)
if r.headers.get("Content-Encoding") == "gzip":
assert "X-Compression-Ratio" not in r.headers
finally:
app_fixture.debug = original_debug

def test_x_compression_ratio_present_in_debug(self, client, app_fixture):
"""X-Compression-Ratio MUST be present when app.debug is True."""
import json as _json

large_body = _json.dumps({"pad": "w" * 2000})

with app_fixture.app_context():
@app_fixture.route("/_test_ratio_debug")
def _test_ratio_debug():
from flask import Response
return Response(large_body, status=200, content_type="application/json")

original_debug = app_fixture.debug
app_fixture.debug = True
try:
r = client.get("/_test_ratio_debug", headers=_GZIP_HEADERS)
if r.headers.get("Content-Encoding") == "gzip":
ratio = r.headers.get("X-Compression-Ratio")
assert ratio is not None
assert 0 < float(ratio) < 1
finally:
app_fixture.debug = original_debug