-
Notifications
You must be signed in to change notification settings - Fork 6.7k
fix: close response body on error paths to prevent connection leak #25824
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Conversation
🔴 Preview Environment stopped on BunnyshellSee: Environment Details | Pipeline Logs Available commands (reply to this comment):
|
When executeRequest returns early due to errors (non-200 HTTP status or non-OK gRPC status), the response body was not closed. This prevented the underlying TCP connection from being returned to the connection pool, causing connection leaks and potential OOM under high load with failed requests. This fix ensures resp.Body is properly closed in all error return paths, following Go's net/http best practices. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <[email protected]> Signed-off-by: chentiewen <[email protected]>
da5ef6a to
6697840
Compare
|
Hey @ctwww this looks reasonable to me. Is there any chance we can get some tests for this somehow? |
|
argo-cd/pkg/apiclient/grpcproxy.go Lines 148 to 153 in 2f9bea6
|
yes, function executeRequest() return err and this goroutine won't execute |
@blakepettersson Execute the scrip and execute Let me know if you need any adjustments or additional test cases! |
ppapapetrou76
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch @ctwww
|
@ctwww that script looks good, could that not be converted to an e2e test doing what you're doing in that script? |
|
LGTM would also like to see some testing where possible |
olivergondza
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice detective work chasing this down. LGTM.
|
How about adding this as a test? # grpcproxy_test.go
package apiclient
import (
"context"
"io"
"net/http"
"net/http/httptest"
"strconv"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/metadata"
)
func TestExecuteRequest_ClosesBodyOnHTTPError(t *testing.T) {
bodyClosed := &atomic.Bool{}
// Create a test server that returns HTTP 500 error
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte("internal server error"))
}))
defer server.Close()
// Create client with custom httpClient that tracks body closure
originalTransport := http.DefaultTransport
customTransport := &testTransport{
base: originalTransport,
bodyClosed: bodyClosed,
}
c := &client{
ServerAddr: server.URL[7:], // Remove "http://"
PlainText: true,
httpClient: &http.Client{
Transport: customTransport,
},
GRPCWebRootPath: "",
}
// Execute request that should fail with HTTP 500
ctx := context.Background()
md := metadata.New(map[string]string{})
_, err := c.executeRequest(ctx, "/test.Service/Method", []byte("test"), md)
// Verify error was returned
require.Error(t, err)
assert.Contains(t, err.Error(), "failed with status code 500")
// Give a small delay to ensure Close() was called
time.Sleep(10 * time.Millisecond)
// Verify body was closed to prevent connection leak
assert.True(t, bodyClosed.Load(), "response body should be closed on HTTP error to prevent connection leak")
}
func TestExecuteRequest_ClosesBodyOnGRPCError(t *testing.T) {
bodyClosed := &atomic.Bool{}
// Create a test server that returns HTTP 200 but with gRPC error status
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Grpc-Status", "3") // codes.InvalidArgument
w.Header().Set("Grpc-Message", "invalid argument")
w.WriteHeader(http.StatusOK)
w.Write([]byte("grpc error"))
}))
defer server.Close()
// Create client with custom httpClient that tracks body closure
originalTransport := http.DefaultTransport
customTransport := &testTransport{
base: originalTransport,
bodyClosed: bodyClosed,
}
c := &client{
ServerAddr: server.URL[7:], // Remove "http://"
PlainText: true,
httpClient: &http.Client{
Transport: customTransport,
},
GRPCWebRootPath: "",
}
// Execute request that should fail with gRPC error
ctx := context.Background()
md := metadata.New(map[string]string{})
_, err := c.executeRequest(ctx, "/test.Service/Method", []byte("test"), md)
// Verify gRPC error was returned
require.Error(t, err)
assert.Contains(t, err.Error(), "invalid argument")
// Give a small delay to ensure Close() was called
time.Sleep(10 * time.Millisecond)
// Verify body was closed to prevent connection leak
assert.True(t, bodyClosed.Load(), "response body should be closed on gRPC error to prevent connection leak")
}
func TestExecuteRequest_ConcurrentErrorRequests_NoConnectionLeak(t *testing.T) {
// This test simulates the scenario from the test script:
// Multiple concurrent requests that fail should all close their response bodies
var totalRequests atomic.Int32
var closedBodies atomic.Int32
// Create a test server that always returns errors
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
totalRequests.Add(1)
// Alternate between HTTP errors and gRPC errors
if totalRequests.Load()%2 == 0 {
w.WriteHeader(http.StatusBadRequest)
} else {
w.Header().Set("Grpc-Status", strconv.Itoa(int(codes.PermissionDenied)))
w.Header().Set("Grpc-Message", "permission denied")
w.WriteHeader(http.StatusOK)
}
w.Write([]byte("error response"))
}))
defer server.Close()
// Create client with custom transport that tracks closures
customTransport := &testTransport{
base: http.DefaultTransport,
bodyClosed: &atomic.Bool{},
onClose: func() {
closedBodies.Add(1)
},
}
c := &client{
ServerAddr: server.URL[7:],
PlainText: true,
httpClient: &http.Client{
Transport: customTransport,
},
GRPCWebRootPath: "",
}
// Simulate concurrent requests like in the test script
concurrency := 10
iterations := 5
var wg sync.WaitGroup
for iter := 0; iter < iterations; iter++ {
for i := 0; i < concurrency; i++ {
wg.Add(1)
go func() {
defer wg.Done()
ctx := context.Background()
md := metadata.New(map[string]string{})
_, err := c.executeRequest(ctx, "/application.ApplicationService/ManagedResources", []byte("test"), md)
// We expect errors
assert.Error(t, err)
}()
}
wg.Wait()
}
// Give time for all Close() calls to complete
time.Sleep(100 * time.Millisecond)
// Verify all response bodies were closed
expectedTotal := int32(concurrency * iterations)
assert.Equal(t, expectedTotal, totalRequests.Load(), "all requests should have been made")
assert.Equal(t, expectedTotal, closedBodies.Load(), "all response bodies should be closed to prevent connection leaks")
}
func TestExecuteRequest_SuccessDoesNotCloseBodyPrematurely(t *testing.T) {
// Verify that successful requests do NOT close the body in executeRequest
// (caller is responsible for closing in success case)
bodyClosed := &atomic.Bool{}
// Create a test server that returns success
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Grpc-Status", "0") // codes.OK
w.WriteHeader(http.StatusOK)
w.Write(toFrame([]byte("success")))
}))
defer server.Close()
customTransport := &testTransport{
base: http.DefaultTransport,
bodyClosed: bodyClosed,
}
c := &client{
ServerAddr: server.URL[7:],
PlainText: true,
httpClient: &http.Client{
Transport: customTransport,
},
GRPCWebRootPath: "",
}
// Execute successful request
ctx := context.Background()
md := metadata.New(map[string]string{})
resp, err := c.executeRequest(ctx, "/test.Service/Method", []byte("test"), md)
// Verify success
require.NoError(t, err)
require.NotNil(t, resp)
defer resp.Body.Close()
// Verify body was NOT closed by executeRequest (caller's responsibility)
time.Sleep(10 * time.Millisecond)
assert.False(t, bodyClosed.Load(), "response body should NOT be closed by executeRequest on success - caller is responsible")
}
// testTransport wraps http.RoundTripper to track body closures
type testTransport struct {
base http.RoundTripper
bodyClosed *atomic.Bool
onClose func() // Optional callback for each close
}
func (t *testTransport) RoundTrip(req *http.Request) (*http.Response, error) {
resp, err := t.base.RoundTrip(req)
if err != nil {
return nil, err
}
// Wrap the response body to track Close() calls
resp.Body = &closeTracker{
ReadCloser: resp.Body,
closed: t.bodyClosed,
onClose: t.onClose,
}
return resp, nil
}
type closeTracker struct {
io.ReadCloser
closed *atomic.Bool
onClose func()
}
func (c *closeTracker) Close() error {
c.closed.Store(true)
if c.onClose != nil {
c.onClose()
}
return c.ReadCloser.Close()
} |
Description
Fixes a resource leak in
pkg/apiclient/grpcproxy.gowhere TCP connectionswere not being released when gRPC requests failed.
Changes
resp.Bodywhen HTTP status code is not OKresp.Bodywhen gRPC status code is not OKProblem
When
executeRequestreturned early due to errors (non-200 HTTP status ornon-OK gRPC status), the response body was not closed. This prevented the
underlying TCP connection from being returned to the connection pool,
causing connection leaks and potential OOM under high load with failed
requests.
Solution
Add
utilio.Close(resp.Body)in both error return paths, following thesame pattern used in the success path. This ensures proper resource cleanup
regardless of the request outcome.
Testing
Related Issue
Fixes #25823
Checklist: