From 1647c86d361901c4fa32b628f7a0393fc7d386d6 Mon Sep 17 00:00:00 2001 From: Sigma711 <1979934715@qq.com> Date: Wed, 7 Jan 2026 04:22:27 +0800 Subject: [PATCH 1/9] [src] Fix io_uring stability and spin locks --- CMakeLists.txt | 1 + example/chat_room/chat_server.cc | 12 +++- example/http_server/http_server.cc | 11 ++- example/pingpong/pingpong_client.cc | 13 ++-- example/pingpong/pingpong_client.h | 8 +-- .../rpc_demo/time_service/sync_client_main.cc | 19 ++--- example/simple_discard/discard.cc | 11 ++- example/simple_echo/echo.cc | 11 ++- example/simple_time/time.cc | 11 ++- src/balancer.cc | 9 +++ src/client.cc | 3 +- src/client.h | 4 +- src/connecting.cc | 69 +++++++++++++++++-- src/connecting.h | 11 ++- src/connector.cc | 19 ++++- src/connector.h | 4 +- src/event_manager.cc | 55 +++++++++++++-- src/event_manager.h | 5 +- src/io_buffer.cc | 64 ++++++++++++++--- src/logger.cc | 22 +++--- src/logger.h | 7 +- src/poller.cc | 45 ++++++++---- src/poller.h | 6 +- src/reactor_manager.cc | 63 ++++++++++++++--- src/reactor_manager.h | 9 ++- src/rpc_codec.cc | 22 ++++-- src/spin_lock.h | 6 ++ src/thread_pool.cc | 6 +- src/thread_pool.h | 9 +-- 29 files changed, 417 insertions(+), 118 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c25d1ef0..e7f6c018 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,7 @@ IF(TAOTU_ENABLE_CLANG_FORMAT) "${CMAKE_SOURCE_DIR}/example/*.c" "${CMAKE_SOURCE_DIR}/example/*.cc" "${CMAKE_SOURCE_DIR}/example/*.cpp") + LIST(FILTER TAOTU_CLANG_FORMAT_FILES EXCLUDE REGEX ".*\\.pb\\.(cc|h)$") ADD_CUSTOM_TARGET( clang-format COMMAND ${CLANG_FORMAT_EXE} --dry-run --Werror -style=file diff --git a/example/chat_room/chat_server.cc b/example/chat_room/chat_server.cc index 6b17dc10..50bc536b 100644 --- a/example/chat_room/chat_server.cc +++ b/example/chat_room/chat_server.cc @@ -14,13 +14,19 @@ ChatServer::ChatServer(const taotu::NetAddress& listen_address, bool should_reuse_port, size_t io_thread_amount) - : event_managers_(io_thread_amount, new taotu::EventManager), - server_(std::make_unique(&event_managers_, listen_address, - should_reuse_port)), + : event_managers_(), + server_(nullptr), codec_([this](taotu::Connecting& connection, const std::string& message, taotu::TimePoint time_point) { this->OnCodecMessage(connection, message, time_point); }) { + size_t thread_count = io_thread_amount > 0 ? io_thread_amount : 1; + event_managers_.reserve(thread_count); + for (size_t i = 0; i < thread_count; ++i) { + event_managers_.push_back(new taotu::EventManager); + } + server_ = std::make_unique(&event_managers_, listen_address, + should_reuse_port); server_->SetConnectionCallback([this](taotu::Connecting& connection) { this->OnConnectionCallback(connection); }); diff --git a/example/http_server/http_server.cc b/example/http_server/http_server.cc index 8c04f54c..d772ee8c 100644 --- a/example/http_server/http_server.cc +++ b/example/http_server/http_server.cc @@ -17,9 +17,14 @@ HttpServer::HttpServer(const taotu::NetAddress& listen_address, bool should_reuse_port, size_t io_thread_amount) - : event_managers_(io_thread_amount, new taotu::EventManager), - server_(std::make_unique(&event_managers_, listen_address, - should_reuse_port)) { + : event_managers_(), server_(nullptr) { + size_t thread_count = io_thread_amount > 0 ? io_thread_amount : 1; + event_managers_.reserve(thread_count); + for (size_t i = 0; i < thread_count; ++i) { + event_managers_.push_back(new taotu::EventManager); + } + server_ = std::make_unique(&event_managers_, listen_address, + should_reuse_port); server_->SetConnectionCallback([this](taotu::Connecting& connection) { this->OnConnectionCallback(connection); }); diff --git a/example/pingpong/pingpong_client.cc b/example/pingpong/pingpong_client.cc index 71cefb33..32c1413e 100644 --- a/example/pingpong/pingpong_client.cc +++ b/example/pingpong/pingpong_client.cc @@ -95,12 +95,10 @@ void PingpongClient::DoWithTimeout() { taotu::LOG_INFO("All stopped!"); // Print stats now, before attempting to disconnect (which may stall) ReportStatsOnce(); - // Stop sessions (best effort) for (auto& session : sessions_) { session->Stop(); } - // Force quit immediately - don't wait for disconnect callbacks RequestQuit(); } @@ -143,9 +141,7 @@ Session::Session(taotu::EventManager* event_manager, const taotu::NetAddress& server_address, const std::shared_ptr& master_client) : client_(event_manager, server_address, true), - master_client_(master_client), - bytes_read_(0), - messages_read_(0) { + master_client_(master_client) { client_.SetConnectionCallback([this](taotu::Connecting& connection) { this->OnConnectionCallback(connection); }); @@ -158,7 +154,7 @@ Session::Session(taotu::EventManager* event_manager, void Session::Start() { client_.Connect(); } -void Session::Stop() { client_.Stop(); } +void Session::Stop() { client_.StopWithoutQuit(); } void Session::OnConnectionCallback(taotu::Connecting& connection) { if (connection.IsConnected()) { @@ -179,7 +175,8 @@ void Session::OnConnectionCallback(taotu::Connecting& connection) { void Session::OnMessageCallback(taotu::Connecting& connection, taotu::IoBuffer* io_buffer, taotu::TimePoint) { - ++messages_read_; - bytes_read_ += static_cast(io_buffer->GetReadableBytes()); + messages_read_.fetch_add(1, std::memory_order_relaxed); + bytes_read_.fetch_add(static_cast(io_buffer->GetReadableBytes()), + std::memory_order_relaxed); connection.Send(io_buffer); } diff --git a/example/pingpong/pingpong_client.h b/example/pingpong/pingpong_client.h index d9e240dc..36792749 100644 --- a/example/pingpong/pingpong_client.h +++ b/example/pingpong/pingpong_client.h @@ -71,8 +71,8 @@ class Session : taotu::NonCopyableMovable { // Stop the session void Stop(); - int64_t GetBytesRead() const { return bytes_read_; } - int64_t GetMessagesRead() const { return messages_read_; } + int64_t GetBytesRead() const { return bytes_read_.load(); } + int64_t GetMessagesRead() const { return messages_read_.load(); } private: // Called after the connection creating and before the connection destroying @@ -84,8 +84,8 @@ class Session : taotu::NonCopyableMovable { taotu::Client client_; std::weak_ptr master_client_; - int64_t bytes_read_; - int64_t messages_read_; + std::atomic bytes_read_{0}; + std::atomic messages_read_{0}; }; #endif // !TAOTU_EXAMPLE_PINGPONG_PINGPONG_CLIENT_H_ diff --git a/example/rpc_demo/time_service/sync_client_main.cc b/example/rpc_demo/time_service/sync_client_main.cc index 75002540..21a63f5a 100644 --- a/example/rpc_demo/time_service/sync_client_main.cc +++ b/example/rpc_demo/time_service/sync_client_main.cc @@ -19,14 +19,17 @@ int main() { taotu::START_LOG("time_service_sync_client_log.txt"); - taotu::RpcSyncChannel rpc_sync_channel(taotu::NetAddress{"127.0.0.1", 4567}); - timeservice::TimeService::Stub stub(&rpc_sync_channel); - timeservice::TimeRequest request; - request.set_client_id("1234"); - timeservice::TimeResponse response; - stub.GetTime(nullptr, &request, &response, nullptr); - ::printf("TimeService RPC Server time: %s\n", - response.current_time().c_str()); + { + taotu::RpcSyncChannel rpc_sync_channel( + taotu::NetAddress{"127.0.0.1", 4567}); + timeservice::TimeService::Stub stub(&rpc_sync_channel); + timeservice::TimeRequest request; + request.set_client_id("1234"); + timeservice::TimeResponse response; + stub.GetTime(nullptr, &request, &response, nullptr); + ::printf("TimeService RPC Server time: %s\n", + response.current_time().c_str()); + } taotu::END_LOG(); return 0; } diff --git a/example/simple_discard/discard.cc b/example/simple_discard/discard.cc index dab9bc02..554a15cd 100644 --- a/example/simple_discard/discard.cc +++ b/example/simple_discard/discard.cc @@ -17,9 +17,14 @@ DiscardServer::DiscardServer(const taotu::NetAddress& listen_address, bool should_reuse_port, size_t io_thread_amount) - : event_managers_(io_thread_amount, new taotu::EventManager), - server_(std::make_unique(&event_managers_, listen_address, - should_reuse_port)) { + : event_managers_(), server_(nullptr) { + size_t thread_count = io_thread_amount > 0 ? io_thread_amount : 1; + event_managers_.reserve(thread_count); + for (size_t i = 0; i < thread_count; ++i) { + event_managers_.push_back(new taotu::EventManager); + } + server_ = std::make_unique(&event_managers_, listen_address, + should_reuse_port); server_->SetMessageCallback([this](taotu::Connecting& connection, taotu::IoBuffer* io_buffer, taotu::TimePoint time_point) { diff --git a/example/simple_echo/echo.cc b/example/simple_echo/echo.cc index 7b01fe81..1ebb37a4 100644 --- a/example/simple_echo/echo.cc +++ b/example/simple_echo/echo.cc @@ -15,9 +15,14 @@ EchoServer::EchoServer(const taotu::NetAddress& listen_address, bool should_reuse_port, size_t io_thread_amount) - : event_managers_(io_thread_amount, new taotu::EventManager), - server_(std::make_unique(&event_managers_, listen_address, - should_reuse_port)) { + : event_managers_(), server_(nullptr) { + size_t thread_count = io_thread_amount > 0 ? io_thread_amount : 1; + event_managers_.reserve(thread_count); + for (size_t i = 0; i < thread_count; ++i) { + event_managers_.push_back(new taotu::EventManager); + } + server_ = std::make_unique(&event_managers_, listen_address, + should_reuse_port); server_->SetMessageCallback([this](taotu::Connecting& connection, taotu::IoBuffer* io_buffer, taotu::TimePoint time_point) { diff --git a/example/simple_time/time.cc b/example/simple_time/time.cc index 320bb29c..78e70e8a 100644 --- a/example/simple_time/time.cc +++ b/example/simple_time/time.cc @@ -16,9 +16,14 @@ TimeServer::TimeServer(const taotu::NetAddress& listen_address, bool should_reuse_port, size_t io_thread_amount) - : event_managers_(io_thread_amount, new taotu::EventManager), - server_(std::make_unique(&event_managers_, listen_address, - should_reuse_port)) { + : event_managers_(), server_(nullptr) { + size_t thread_count = io_thread_amount > 0 ? io_thread_amount : 1; + event_managers_.reserve(thread_count); + for (size_t i = 0; i < thread_count; ++i) { + event_managers_.push_back(new taotu::EventManager); + } + server_ = std::make_unique(&event_managers_, listen_address, + should_reuse_port); server_->SetMessageCallback([this](taotu::Connecting& connection, taotu::IoBuffer* io_buffer, taotu::TimePoint time_point) { diff --git a/src/balancer.cc b/src/balancer.cc index b3e706d7..a463b019 100644 --- a/src/balancer.cc +++ b/src/balancer.cc @@ -12,6 +12,7 @@ #include "balancer.h" #include "event_manager.h" +#include "logger.h" #include "reactor_manager.h" namespace taotu { @@ -21,7 +22,15 @@ Balancer::Balancer(ServerReactorManager::EventManagers* event_managers, : event_managers_(event_managers), strategy_(strategy), cursor_(0) {} EventManager* Balancer::PickOneEventManager() { + if (!event_managers_ || event_managers_->empty()) { + LOG_ERROR("No EventManager available in balancer"); + return nullptr; + } auto evt_mng_num = event_managers_->size(); + if (evt_mng_num <= 1) { + cursor_ = 0; + return (*event_managers_)[0]; + } switch (strategy_) { // "Round Robin" case BalancerStrategy::kRoundRobin: diff --git a/src/client.cc b/src/client.cc index 3780566c..e6bc6834 100644 --- a/src/client.cc +++ b/src/client.cc @@ -15,7 +15,7 @@ namespace taotu { Client::Client(EventManager* event_manager, const NetAddress& server_address, bool should_retry_) - : reactor_manager_(std::make_unique(event_manager, + : reactor_manager_(std::make_shared(event_manager, server_address)) { reactor_manager_->SetRetryOn(should_retry_); } @@ -35,5 +35,6 @@ void Client::SetWriteCompleteCallback( void Client::Connect() { reactor_manager_->Connect(); } void Client::Disconnect() { reactor_manager_->Disconnect(); } void Client::Stop() { reactor_manager_->Stop(); } +void Client::StopWithoutQuit() { reactor_manager_->StopWithoutQuit(); } } // namespace taotu diff --git a/src/client.h b/src/client.h index 895d49f1..3f4b4937 100644 --- a/src/client.h +++ b/src/client.h @@ -45,9 +45,11 @@ class Client : NonCopyableMovable { // Stop the TCP connection (if because of acceptable exceptions in // hardware-level, just retry) void Stop(); + // Stop the TCP connection without stopping the shared event loop. + void StopWithoutQuit(); private: - typedef std::unique_ptr ClientReactorManagerPtr; + typedef std::shared_ptr ClientReactorManagerPtr; // Reactor manager (the "engine") ClientReactorManagerPtr reactor_manager_; diff --git a/src/connecting.cc b/src/connecting.cc index 48388b5d..fc5818d0 100644 --- a/src/connecting.cc +++ b/src/connecting.cc @@ -75,6 +75,11 @@ void Connecting::DoReading(TimePoint receive_time) { void Connecting::OnReadComplete(struct io_uring_cqe* cqe, Poller::IoUringOp* op) { auto* ctx = static_cast(op->context); + if (!ctx || ctx->self == nullptr) { + delete ctx; + op->context = nullptr; + return; + } auto* connecting = ctx->self; ssize_t res = cqe->res; int err = res < 0 ? -res : 0; @@ -87,6 +92,10 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, } if (ctx->multishot && !more && !has_buffer) { connecting->read_in_flight_ = false; + connecting->CompletePendingIo(); + if (connecting->read_ctx_ == ctx) { + connecting->read_ctx_ = nullptr; + } delete ctx; op->context = nullptr; return; @@ -146,6 +155,10 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, } } if (!more) { + connecting->CompletePendingIo(); + if (connecting->read_ctx_ == ctx) { + connecting->read_ctx_ = nullptr; + } delete ctx; op->context = nullptr; } @@ -154,6 +167,11 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, void Connecting::OnWriteComplete(struct io_uring_cqe* cqe, Poller::IoUringOp* op) { auto* ctx = static_cast(op->context); + if (!ctx || ctx->self == nullptr) { + delete ctx; + op->context = nullptr; + return; + } auto* connecting = ctx->self; connecting->write_in_flight_ = false; ssize_t res = cqe->res; @@ -168,6 +186,10 @@ void Connecting::OnWriteComplete(struct io_uring_cqe* cqe, if (connecting->pending_output_buffer_.GetReadableBytes() > 0) { connecting->output_buffer_.Swap(connecting->pending_output_buffer_); connecting->SubmitWriteOnce(); + connecting->CompletePendingIo(); + if (connecting->write_ctx_ == ctx) { + connecting->write_ctx_ = nullptr; + } delete ctx; return; } @@ -190,6 +212,10 @@ void Connecting::OnWriteComplete(struct io_uring_cqe* cqe, connecting->DoWithError(err); } } + connecting->CompletePendingIo(); + if (connecting->write_ctx_ == ctx) { + connecting->write_ctx_ = nullptr; + } delete ctx; op->context = nullptr; } @@ -200,6 +226,7 @@ void Connecting::SubmitReadOnce() { } auto* ctx = new ReadContext(); ctx->self = this; + read_ctx_ = ctx; ctx->writable = input_buffer_.GetWritableBytes(); ctx->iov[0].iov_base = const_cast(input_buffer_.GetWritablePosition()); ctx->iov[0].iov_len = ctx->writable; @@ -220,7 +247,13 @@ void Connecting::SubmitReadOnce() { ctx->multishot = true; uint64_t key = event_manager_->GetPoller()->SubmitReadMultishot( &eventer_, Poller::kBufferGroupId, &Connecting::OnReadComplete, ctx, 0, - [](void* ptr) { delete static_cast(ptr); }); + [](void* ptr) { + auto* ctx = static_cast(ptr); + if (ctx && ctx->self) { + ctx->self->CompletePendingIo(); + } + delete ctx; + }); if (key == 0) { read_in_flight_ = false; read_cancel_key_ = 0; @@ -229,13 +262,20 @@ void Connecting::SubmitReadOnce() { } ctx->key = key; read_cancel_key_ = key; + BumpPendingIo(); return; } #endif ctx->multishot = false; uint64_t key = event_manager_->GetPoller()->SubmitRead( &eventer_, ctx->iov.data(), iovcnt, &Connecting::OnReadComplete, ctx, 0, - [](void* ptr) { delete static_cast(ptr); }); + [](void* ptr) { + auto* ctx = static_cast(ptr); + if (ctx && ctx->self) { + ctx->self->CompletePendingIo(); + } + delete ctx; + }); if (key == 0) { read_in_flight_ = false; read_cancel_key_ = 0; @@ -244,6 +284,7 @@ void Connecting::SubmitReadOnce() { } ctx->key = key; read_cancel_key_ = key; + BumpPendingIo(); } void Connecting::DoWriting() { if (!write_in_flight_ && output_buffer_.GetReadableBytes() > 0) { @@ -256,6 +297,7 @@ void Connecting::SubmitWriteOnce() { } auto* ctx = new WriteContext(); ctx->self = this; + write_ctx_ = ctx; ctx->to_send = output_buffer_.GetReadableBytes(); ctx->iov.iov_base = const_cast(output_buffer_.GetReadablePosition()); ctx->iov.iov_len = ctx->to_send; @@ -264,7 +306,13 @@ void Connecting::SubmitWriteOnce() { write_in_flight_ = true; uint64_t key = event_manager_->GetPoller()->SubmitWrite( &eventer_, &ctx->iov, 1, &Connecting::OnWriteComplete, ctx, 0, - [](void* ptr) { delete static_cast(ptr); }); + [](void* ptr) { + auto* ctx = static_cast(ptr); + if (ctx && ctx->self) { + ctx->self->CompletePendingIo(); + } + delete ctx; + }); if (key == 0) { write_in_flight_ = false; write_cancel_key_ = 0; @@ -273,6 +321,7 @@ void Connecting::SubmitWriteOnce() { } ctx->key = key; write_cancel_key_ = key; + BumpPendingIo(); } void Connecting::DoClosing() { if (state_.load() != ConnectionState::kDisconnected) { @@ -390,16 +439,26 @@ void Connecting::ForceCloseAfter(int64_t delay_microseconds) { void Connecting::CancelPendingIo() { if (read_in_flight_) { if (read_cancel_key_ != 0) { - event_manager_->GetPoller()->CancelOp(read_cancel_key_); + (void)event_manager_->GetPoller()->CancelOp(read_cancel_key_); read_cancel_key_ = 0; } + if (read_ctx_) { + read_ctx_->self = nullptr; + read_ctx_ = nullptr; + } + CompletePendingIo(); read_in_flight_ = false; } if (write_in_flight_) { if (write_cancel_key_ != 0) { - event_manager_->GetPoller()->CancelOp(write_cancel_key_); + (void)event_manager_->GetPoller()->CancelOp(write_cancel_key_); write_cancel_key_ = 0; } + if (write_ctx_) { + write_ctx_->self = nullptr; + write_ctx_ = nullptr; + } + CompletePendingIo(); write_in_flight_ = false; } } diff --git a/src/connecting.h b/src/connecting.h index b20f98b2..e4ae2622 100644 --- a/src/connecting.h +++ b/src/connecting.h @@ -126,7 +126,9 @@ class Connecting : NonCopyableMovable { bool IsDisconnected() const { return ConnectionState::kDisconnected == state_.load(); } - bool HasPendingIo() const { return read_in_flight_ || write_in_flight_; } + bool HasPendingIo() const { + return pending_io_.load(std::memory_order_relaxed) > 0; + } int GetPendingIoWaitMs() const { return pending_io_wait_ms_; } int GetPendingIoRetries() const { return pending_io_retries_; } void BumpPendingIoWait(int delta_ms = 1) { @@ -164,6 +166,10 @@ class Connecting : NonCopyableMovable { static void OnReadComplete(struct io_uring_cqe* cqe, Poller::IoUringOp* op); static void OnWriteComplete(struct io_uring_cqe* cqe, Poller::IoUringOp* op); void CancelPendingIo(); + void BumpPendingIo() { pending_io_.fetch_add(1, std::memory_order_relaxed); } + void CompletePendingIo() { + pending_io_.fetch_sub(1, std::memory_order_relaxed); + } enum class ConnectionState { kDisconnected, @@ -227,8 +233,11 @@ class Connecting : NonCopyableMovable { // Connection state (atomic) std::atomic state_; + std::atomic pending_io_{0}; bool read_in_flight_{false}; bool write_in_flight_{false}; + ReadContext* read_ctx_{nullptr}; + WriteContext* write_ctx_{nullptr}; uint64_t next_io_key_{1}; uint64_t read_cancel_key_{0}; uint64_t write_cancel_key_{0}; diff --git a/src/connector.cc b/src/connector.cc index ef5aba64..4012e966 100644 --- a/src/connector.cc +++ b/src/connector.cc @@ -93,7 +93,14 @@ Connector::Connector(EventManager* event_manager, void Connector::Start() { can_connect_ = true; - event_manager_->RunSoon([this]() { this->Connect(); }); + std::weak_ptr weak_self = shared_from_this(); + event_manager_->RunSoon([weak_self]() { + if (auto self = weak_self.lock()) { + if (self->can_connect_) { + self->Connect(); + } + } + }); } void Connector::Restart() { SetState(ConnectState::kDisconnected); @@ -183,8 +190,14 @@ void Connector::DoRetrying(int conn_fd) { SetState(ConnectState::kDisconnected); if (can_connect_) { LOG_DEBUG("Connector fd(%d) is retrying to connect.", conn_fd); - event_manager_->RunAfter(retry_delay_microseconds_, - [this]() { this->Start(); }); + std::weak_ptr weak_self = shared_from_this(); + event_manager_->RunAfter(retry_delay_microseconds_, [weak_self]() { + if (auto self = weak_self.lock()) { + if (self->can_connect_) { + self->Start(); + } + } + }); retry_delay_microseconds_ = std::min(retry_delay_microseconds_ * 2, static_cast(kMaxRetryDelayMicroseconds)); diff --git a/src/connector.h b/src/connector.h index a20f9ce2..734a244c 100644 --- a/src/connector.h +++ b/src/connector.h @@ -13,6 +13,7 @@ #define TAOTU_SRC_CONNECTOR_H_ #include +#include #include "connecting.h" #include "event_manager.h" @@ -28,7 +29,8 @@ namespace taotu { * threads). * */ -class Connector : NonCopyableMovable { +class Connector : public std::enable_shared_from_this, + public NonCopyableMovable { public: typedef std::function NewConnectionCallback; diff --git a/src/event_manager.cc b/src/event_manager.cc index dad7623f..c78e266b 100644 --- a/src/event_manager.cc +++ b/src/event_manager.cc @@ -61,6 +61,7 @@ EventManager::EventManager() wake_up_eventer_.EnableReadEvents(); } EventManager::~EventManager() { + is_destroying_.store(true, std::memory_order_release); Quit(); if (thread_ && thread_->joinable()) { thread_->join(); @@ -91,6 +92,13 @@ Connecting* EventManager::InsertNewConnection(int socket_fd, Connecting* ref_conn = nullptr; { LockGuard lock_guard(connection_map_mutex_lock_); + auto existing = connection_map_.find(socket_fd); + if (existing != connection_map_.end()) { + LOG_WARN("Connection fd(%d) already tracked, drop new connection.", + socket_fd); + ::close(socket_fd); + return nullptr; + } if (CreateConnectionCallback_) { // CreateConnectionCallback_ returns raw pointer, wrap it in unique_ptr connection_map_[socket_fd].reset(CreateConnectionCallback_( @@ -113,6 +121,9 @@ Connecting* EventManager::InsertNewConnection(int socket_fd, void EventManager::RunAt(const TimePoint& time_point, Timer::TimeCallback TimeTask) { + if (is_destroying_.load(std::memory_order_acquire)) { + return; + } const TimePoint& tmp_time_point = time_point; timer_.AddTimeTask(time_point, std::move(TimeTask)); if (timer_.GetMinTimeDuration() >= @@ -123,6 +134,9 @@ void EventManager::RunAt(const TimePoint& time_point, } void EventManager::RunAfter(int64_t delay_microseconds, Timer::TimeCallback TimeTask) { + if (is_destroying_.load(std::memory_order_acquire)) { + return; + } TimePoint tmp_time_point{delay_microseconds}; timer_.AddTimeTask(tmp_time_point, std::move(TimeTask)); if (timer_.GetMinTimeDuration() >= @@ -135,6 +149,9 @@ void EventManager::RunEveryUntil(int64_t interval_microseconds, Timer::TimeCallback TimeTask, const TimePoint& start_time_point, std::function IsContinue) { + if (is_destroying_.load(std::memory_order_acquire)) { + return; + } TimePoint time_point{interval_microseconds, start_time_point, true}; TimePoint tmp_time_point = time_point; // Check if the function which decides whether to continue the cycle should be @@ -151,6 +168,9 @@ void EventManager::RunEveryUntil(int64_t interval_microseconds, } void EventManager::RunSoon(Timer::TimeCallback TimeTask) { + if (is_destroying_.load(std::memory_order_acquire)) { + return; + } timer_.AddTimeTask(TimePoint{}, std::move(TimeTask)); WakeUp(); } @@ -262,30 +282,51 @@ void EventManager::DoExpiredTimeTasks(const TimePoint& return_time) { void EventManager::DestroyClosedConnections() { LockGuard lock_guard_cf(closed_fds_lock_); Fds remaining_fds; - static constexpr int kMaxPendingIoRetries = 1000; - static constexpr int kMaxPendingIoTimeoutMs = 2000; + static constexpr int kPendingIoWarnRetries = 1000; + static constexpr int kPendingIoWarnTimeoutMs = 2000; for (auto fd : closed_fds_) { std::unique_ptr connection_ptr; + bool detach_connection = false; { LockGuard lock_guard_cm(connection_map_mutex_lock_); auto it = connection_map_.find(fd); if (it != connection_map_.end() && it->second && it->second->IsDisconnected()) { if (it->second->HasPendingIo()) { + it->second->BumpPendingIoWait(); int retries = it->second->GetPendingIoRetries(); int64_t waited_ms = it->second->GetPendingIoWaitMs(); - if (retries < kMaxPendingIoRetries && - waited_ms < kMaxPendingIoTimeoutMs) { - it->second->BumpPendingIoWait(); + if (retries == kPendingIoWarnRetries || + waited_ms == kPendingIoWarnTimeoutMs) { + LOG_WARN("Waiting for pending IO to finish on fd(%d)", fd); + } + if (should_quit_.load(std::memory_order_acquire) && + (retries >= kPendingIoWarnRetries || + waited_ms >= kPendingIoWarnTimeoutMs)) { + connection_ptr = std::move(it->second); + connection_map_.erase(it); + detach_connection = true; + } else { remaining_fds.insert(fd); - continue; } - LOG_WARN("Force destroy connection fd(%d) after pending IO wait", fd); + continue; } connection_ptr = std::move(it->second); connection_map_.erase(it); } } + if (detach_connection && connection_ptr) { + connection_ptr->RegisterOnConnectionCallback( + Connecting::NormalCallback{}); + connection_ptr->RegisterOnMessageCallback( + Connecting::OnMessageCallback{}); + connection_ptr->RegisterWriteCallback(Connecting::NormalCallback{}); + connection_ptr->RegisterHighWaterMarkCallback( + Connecting::HighWaterMarkCallback{}, 0); + connection_ptr->RegisterCloseCallback(Connecting::NormalCallback{}); + connection_ptr.release(); + continue; + } if (connection_ptr) { if (DestroyConnectionCallback_) { // Release ownership to callback (callback takes ownership) diff --git a/src/event_manager.h b/src/event_manager.h index e01aec59..58d4f15f 100644 --- a/src/event_manager.h +++ b/src/event_manager.h @@ -128,7 +128,10 @@ class EventManager : NonCopyableMovable { mutable MutexLock connection_map_mutex_lock_; // The flag for deciding whether the event loop should quit - std::atomic_bool should_quit_; + std::atomic_bool should_quit_{false}; + + // The flag for deciding whether the event loop is destroying + std::atomic_bool is_destroying_{false}; // List for active events returned from the I/O multiplexing waiting each loop Poller::EventerList active_events_; diff --git a/src/io_buffer.cc b/src/io_buffer.cc index 1c02b9ab..7fa8242f 100644 --- a/src/io_buffer.cc +++ b/src/io_buffer.cc @@ -232,6 +232,9 @@ void IoBuffer::ShrinkWritableSpace(size_t len) { } ssize_t IoBuffer::ReadFromFd(int fd, int* tmp_errno) { + if (tmp_errno) { + *tmp_errno = 0; + } char extra_buffer[64 * 1024]; // 64k bytes struct iovec discrete_buffers[2]; int writable_bytes = GetWritableBytes(); @@ -251,19 +254,28 @@ ssize_t IoBuffer::ReadFromFd(int fd, int* tmp_errno) { message.msg_iovlen = iov_seq; ssize_t n = ::recvmsg(fd, &message, MSG_NOSIGNAL); if (n < 0) { - *tmp_errno = errno; + int err = errno; + if (tmp_errno) { + *tmp_errno = err; + } // EAGAIN/EWOULDBLOCK/EINTR are ignorable, non-fatal conditions. - if (*tmp_errno != EAGAIN && *tmp_errno != EWOULDBLOCK && - *tmp_errno != EINTR) { + if (err != EAGAIN && err != EWOULDBLOCK && err != EINTR) { char errbuf[128]; errbuf[0] = '\0'; - (void)::strerror_r(*tmp_errno, errbuf, sizeof(errbuf)); - LOG_ERROR("Discrete reading in Fd(%d) failed!!! errno(%d): %s", fd, - *tmp_errno, errbuf); + char* msg = ::strerror_r(err, errbuf, sizeof(errbuf)); + (void)msg; + LOG_ERROR("Discrete reading in Fd(%d) failed!!! errno(%d): %s", fd, err, + errbuf); } } else if (static_cast(n) <= static_cast(writable_bytes)) { + if (tmp_errno) { + *tmp_errno = 0; + } writing_index_ += n; } else { + if (tmp_errno) { + *tmp_errno = 0; + } writing_index_ = buffer_.size(); Append(static_cast(extra_buffer), static_cast(n - writable_bytes)); @@ -273,13 +285,47 @@ ssize_t IoBuffer::ReadFromFd(int fd, int* tmp_errno) { ssize_t IoBuffer::ReadFromFd(int fd, size_t read_len, int* tmp_errno) { EnsureWritableSpace(read_len); ssize_t res = 0; + if (tmp_errno) { + *tmp_errno = 0; + } while (read_len > 0) { auto bytes_read = ::recv(fd, static_cast(const_cast(GetWritablePosition())), read_len, MSG_NOSIGNAL); - read_len -= bytes_read; - writing_index_ += bytes_read; - res += bytes_read; + if (bytes_read > 0) { + read_len -= static_cast(bytes_read); + writing_index_ += static_cast(bytes_read); + res += bytes_read; + if (tmp_errno) { + *tmp_errno = 0; + } + continue; + } + if (bytes_read == 0) { + if (tmp_errno) { + *tmp_errno = 0; + } + break; + } + int err = errno; + if (err == EINTR) { + if (tmp_errno) { + *tmp_errno = 0; + } + continue; + } + if (tmp_errno) { + *tmp_errno = err; + } + if (err == EAGAIN || err == EWOULDBLOCK) { + break; + } + char errbuf[128]; + errbuf[0] = '\0'; + char* msg = ::strerror_r(err, errbuf, sizeof(errbuf)); + (void)msg; + LOG_ERROR("ReadFromFd(%d) failed!!! errno(%d): %s", fd, err, errbuf); + break; } return res; } diff --git a/src/logger.cc b/src/logger.cc index fdb3a733..9df2c661 100644 --- a/src/logger.cc +++ b/src/logger.cc @@ -43,7 +43,7 @@ void Logger::StartLogger(const std::string& log_file_name) { } void Logger::StartLogger(std::string&& log_file_name) { if (!is_initialized.load(std::memory_order_acquire)) { - std::lock_guard lock(log_mutex_); + LockGuard lock(log_mutex_); if (!is_initialized.load(std::memory_order_acquire)) { is_stopping_.store(false, std::memory_order_release); write_index_.store(0, std::memory_order_relaxed); @@ -85,7 +85,7 @@ void Logger::RecordLogs(LogLevel log_type, std::string&& log_info) { std::string Logger::UpdateLoggerTime() { time_t tmp_time; ::time(&tmp_time); - std::lock_guard lock(time_mutex_); + LockGuard lock(time_mutex_); if (tmp_time > time_now_sec_) { time_now_sec_ = tmp_time; // In consideration of time zone @@ -130,7 +130,7 @@ void Logger::WriteDownLogs() { ::fflush(log_file_); // Block when the buffer is empty if (pending_.load(std::memory_order_acquire) == 0) { - std::unique_lock lock(log_mutex_); + std::unique_lock lock(log_mutex_); if (!is_stopping_.load(std::memory_order_acquire) && pending_.load(std::memory_order_acquire) == 0) { log_cond_var_.wait(lock); @@ -150,11 +150,11 @@ void Logger::RecordLogs(std::string&& log_info) { // Splice this log record std::string time_now_str{UpdateLoggerTime()}; std::string log_data(time_now_str.size() + log_info.size() + 2, ' '); - ::memcpy(reinterpret_cast(const_cast(log_data.c_str())), - time_now_str.c_str(), time_now_str.size()); - ::memcpy(reinterpret_cast(const_cast(log_data.c_str()) + - time_now_str.size() + 1), - log_info.c_str(), log_info.size()); + char* buffer = log_data.data(); + ::memcpy(reinterpret_cast(buffer), time_now_str.data(), + time_now_str.size()); + ::memcpy(reinterpret_cast(buffer + time_now_str.size() + 1), + log_info.data(), log_info.size()); log_data.back() = '\n'; // Put this log record into ring buffer (drop if full) (void)Enqueue(std::move(log_data)); @@ -173,7 +173,7 @@ bool Logger::Enqueue(std::string&& log_data) { slot.seq.store(pos + 1, std::memory_order_release); size_t prev = pending_.fetch_add(1, std::memory_order_release); if (prev == 0) { - std::lock_guard lock(log_mutex_); + LockGuard lock(log_mutex_); log_cond_var_.notify_one(); } return true; @@ -228,6 +228,10 @@ Logger::Logger() } Logger::~Logger() { + if (is_initialized.load(std::memory_order_acquire)) { + EndLogger(); + return; + } if (thread_.joinable()) { thread_.join(); } diff --git a/src/logger.h b/src/logger.h index ea4efe63..3b4e9361 100644 --- a/src/logger.h +++ b/src/logger.h @@ -26,6 +26,7 @@ #include #include "non_copyable_movable.h" +#include "spin_lock.h" namespace taotu { @@ -161,8 +162,8 @@ class Logger : NonCopyableMovable { alignas(256) std::atomic is_stopping_; alignas(256) char filler1_; // Only for solving "False Sharing" - std::mutex log_mutex_; - std::condition_variable log_cond_var_; + MutexLock log_mutex_; + std::condition_variable_any log_cond_var_; int64_t cur_log_file_byte_; int64_t cur_log_file_seq_; @@ -172,7 +173,7 @@ class Logger : NonCopyableMovable { std::thread thread_; - std::mutex time_mutex_; + MutexLock time_mutex_; std::string time_now_str_; time_t time_now_sec_; diff --git a/src/poller.cc b/src/poller.cc index c1a65a1b..69533817 100644 --- a/src/poller.cc +++ b/src/poller.cc @@ -50,7 +50,19 @@ uint32_t GetIoUringEntries() { Poller::Poller() { ::memset(static_cast(&ring_), 0, sizeof(ring_)); struct io_uring_params params {}; - params.flags = IORING_SETUP_SQPOLL; + bool want_sqpoll = false; + const char* enable_sqpoll = ::getenv("TAOTU_ENABLE_SQPOLL"); + if (enable_sqpoll && *enable_sqpoll != '\0' && *enable_sqpoll != '0') { + want_sqpoll = true; + } + const char* disable_sqpoll = ::getenv("TAOTU_DISABLE_SQPOLL"); + if (disable_sqpoll && *disable_sqpoll != '\0' && *disable_sqpoll != '0') { + want_sqpoll = false; + } + const bool requested_sqpoll = want_sqpoll; + if (requested_sqpoll) { + params.flags = IORING_SETUP_SQPOLL; + } uint32_t entries = GetIoUringEntries(); int ret = -ENOMEM; while (entries >= kMinEntries) { @@ -61,7 +73,7 @@ Poller::Poller() { } break; } - if (ret == -EPERM || ret == -EINVAL) { + if (requested_sqpoll && (ret == -EPERM || ret == -EINVAL)) { LOG_WARN("io_uring SQPOLL unavailable, fallback to default: %s", ::strerror(-ret)); ::memset(static_cast(&ring_), 0, sizeof(ring_)); @@ -76,7 +88,7 @@ Poller::Poller() { } break; } - } else { + } else if (requested_sqpoll) { use_sqpoll_ = true; } if (ret < 0) { @@ -101,7 +113,7 @@ Poller::~Poller() { // Clean up user-space ops still in the queue to avoid leaks on early exit. LOG_DEBUG("Destroying Poller, pending ops: %zu", ops_.size()); { - std::lock_guard lock(ops_mutex_); + LockGuard lock(ops_mutex_); for (auto& item : ops_) { CleanupOpContext(item.second.get()); } @@ -125,7 +137,7 @@ std::unique_ptr Poller::LookupOp(uint64_t key) { if (key == 0) { return nullptr; } - std::lock_guard lock(ops_mutex_); + LockGuard lock(ops_mutex_); auto it = ops_.find(key); if (it == ops_.end()) { return nullptr; @@ -174,7 +186,7 @@ uint64_t Poller::SubmitRead(Eventer* eventer, struct iovec* iov, int iovcnt, eventer->Fd(), completion, key, context_deleter}); { - std::lock_guard lock(ops_mutex_); + LockGuard lock(ops_mutex_); ops_[key] = std::move(op); } ::io_uring_prep_readv(sqe, eventer->Fd(), iov, iovcnt, 0); @@ -199,7 +211,7 @@ uint64_t Poller::SubmitReadMultishot(Eventer* eventer, int buf_group, eventer->Fd(), completion, key, context_deleter}); { - std::lock_guard lock(ops_mutex_); + LockGuard lock(ops_mutex_); ops_[key] = std::move(op); } ::io_uring_prep_recv_multishot(sqe, eventer->Fd(), nullptr, 0, 0); @@ -233,7 +245,7 @@ uint64_t Poller::SubmitWrite(Eventer* eventer, struct iovec* iov, int iovcnt, eventer->Fd(), completion, key, context_deleter}); { - std::lock_guard lock(ops_mutex_); + LockGuard lock(ops_mutex_); ops_[key] = std::move(op); } ::io_uring_prep_writev(sqe, eventer->Fd(), iov, iovcnt, 0); @@ -254,7 +266,7 @@ uint64_t Poller::SubmitAccept(int fd, struct sockaddr* addr, socklen_t* addrlen, auto op = std::make_unique(IoUringOp{ OpType::kAccept, nullptr, ctx, fd, completion, key, context_deleter}); { - std::lock_guard lock(ops_mutex_); + LockGuard lock(ops_mutex_); ops_[key] = std::move(op); } if (multishot && use_multishot_accept_) { @@ -274,28 +286,31 @@ uint64_t Poller::SubmitAccept(int fd, struct sockaddr* addr, socklen_t* addrlen, return key; } -void Poller::CancelOp(uint64_t user_data_key) { +bool Poller::CancelOp(uint64_t user_data_key) { if (user_data_key == 0) { - return; + return false; } + bool found = false; { - std::lock_guard lock(ops_mutex_); + LockGuard lock(ops_mutex_); auto it = ops_.find(user_data_key); if (it != ops_.end()) { // Mark canceled: keep op until its CQE arrives, so the kernel won't // touch freed context/iov memory. it->second->eventer = nullptr; it->second->completion = nullptr; + found = true; } } struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when cancel op"); - return; + return found; } ::io_uring_prep_cancel64(sqe, user_data_key, 0); ::io_uring_sqe_set_data64(sqe, 0); // Cancellation CQE needs no handling. SubmitPending(); + return found; } TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { struct __kernel_timespec ts {}; @@ -375,7 +390,7 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { op_ptr->completion(cqe, op_ptr); ReleaseBufferFromCqe(cqe); if (keep_op && op && op->context != nullptr) { - std::lock_guard lock(ops_mutex_); + LockGuard lock(ops_mutex_); ops_[key] = std::move(op); // keep for the next CQE } else { op.reset(); @@ -468,7 +483,7 @@ void Poller::SubmitPoll(Eventer* eventer) { auto op = std::make_unique( IoUringOp{OpType::kPoll, eventer, nullptr, eventer->Fd(), nullptr, key}); { - std::lock_guard lock(ops_mutex_); + LockGuard lock(ops_mutex_); ops_[key] = std::move(op); } ::io_uring_prep_poll_add(sqe, eventer->Fd(), diff --git a/src/poller.h b/src/poller.h index e9807398..b39bcac5 100644 --- a/src/poller.h +++ b/src/poller.h @@ -18,11 +18,11 @@ #include #include #include -#include #include #include #include "non_copyable_movable.h" +#include "spin_lock.h" #include "time_point.h" #ifndef __linux__ @@ -81,7 +81,7 @@ class Poller : NonCopyableMovable { uint64_t key = 0, bool multishot = false, ContextDeleter context_deleter = nullptr); - void CancelOp(uint64_t user_data_key); + bool CancelOp(uint64_t user_data_key); // Limit CQE handling per poll to avoid starving timers. void SetCqeBatchLimit(size_t limit) { cqe_batch_limit_ = limit; } @@ -122,7 +122,7 @@ class Poller : NonCopyableMovable { std::unordered_map states_; std::unordered_map> ops_; std::atomic_uint64_t next_key_{1}; - mutable std::mutex ops_mutex_; + mutable MutexLock ops_mutex_; bool use_sqpoll_{false}; bool use_multishot_accept_{true}; bool buffers_registered_{false}; diff --git a/src/reactor_manager.cc b/src/reactor_manager.cc index fe09fedf..32fafff0 100644 --- a/src/reactor_manager.cc +++ b/src/reactor_manager.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -100,12 +101,20 @@ void ServerReactorManager::Loop() { void ServerReactorManager::AcceptNewConnectionCallback( int socket_fd, const NetAddress& peer_address) { auto* event_manager = balancer_->PickOneEventManager(); + if (!event_manager) { + LOG_ERROR("No EventManager available, drop connection fd(%d)", socket_fd); + ::close(socket_fd); + return; + } NetAddress local_address = GetLocalAddress(socket_fd); event_manager->RunSoon( [this, event_manager, socket_fd, local_address, peer_address]() { auto new_connection = event_manager->InsertNewConnection( socket_fd, local_address, peer_address); // Insert the new connection in its own I/O thread + if (!new_connection) { + return; + } new_connection->RegisterOnConnectionCallback(ConnectionCallback_); new_connection->RegisterOnMessageCallback(MessageCallback_); new_connection->RegisterWriteCallback(WriteCompleteCallback_); @@ -118,11 +127,11 @@ void ServerReactorManager::AcceptNewConnectionCallback( ClientReactorManager::ClientReactorManager(EventManager* event_manager, const NetAddress& server_address) : event_manager_(event_manager), - connector_(event_manager_, server_address), + connector_(std::make_shared(event_manager_, server_address)), connection_(nullptr), should_retry_(false), can_connect_(true) { - connector_.RegisterNewConnectionCallback( + connector_->RegisterNewConnectionCallback( [this](int socket_fd) { this->LaunchNewConnectionCallback(socket_fd); }); } ClientReactorManager::~ClientReactorManager() { @@ -131,27 +140,37 @@ ClientReactorManager::~ClientReactorManager() { LockGuard lock_guard(connection_mutex_); connection_ = nullptr; } - connector_.Stop(); + if (connector_) { + connector_->Stop(); + } } void ClientReactorManager::Connect() { LOG_DEBUG("Connect to [ IP(%s) Port(%u) ].", - connector_.GetServerAddress().GetIp().c_str(), - connector_.GetServerAddress().GetPort()); + connector_->GetServerAddress().GetIp().c_str(), + connector_->GetServerAddress().GetPort()); should_retry_ = false; // No auto-reconnect for client mode to avoid loops. can_connect_ = true; - connector_.Start(); + connector_->Start(); } void ClientReactorManager::Disconnect() { - event_manager_->RunSoon([this]() { this->DisconnectInLoop(); }); + auto self = shared_from_this(); + event_manager_->RunSoon([self]() { self->DisconnectInLoop(); }); } void ClientReactorManager::Stop() { - event_manager_->RunSoon([this]() { this->StopInLoop(); }); + auto self = shared_from_this(); + event_manager_->RunSoon([self]() { self->StopInLoop(); }); +} +void ClientReactorManager::StopWithoutQuit() { + auto self = shared_from_this(); + event_manager_->RunSoon([self]() { self->StopInLoopWithoutQuit(); }); } void ClientReactorManager::DisconnectInLoop() { should_retry_ = false; can_connect_ = false; - connector_.Stop(); + if (connector_) { + connector_->Stop(); + } Connecting* connection_to_close = nullptr; { LockGuard lock_guard(connection_mutex_); @@ -169,7 +188,9 @@ void ClientReactorManager::DisconnectInLoop() { void ClientReactorManager::StopInLoop() { should_retry_ = false; can_connect_ = false; - connector_.Stop(); + if (connector_) { + connector_->Stop(); + } Connecting* connection_to_close = nullptr; { LockGuard lock_guard(connection_mutex_); @@ -185,11 +206,33 @@ void ClientReactorManager::StopInLoop() { event_manager_->WakeUp(); } +void ClientReactorManager::StopInLoopWithoutQuit() { + should_retry_ = false; + can_connect_ = false; + if (connector_) { + connector_->Stop(); + } + Connecting* connection_to_close = nullptr; + { + LockGuard lock_guard(connection_mutex_); + if (connection_ != nullptr) { + connection_to_close = connection_; + connection_ = nullptr; + } + } + if (connection_to_close) { + connection_to_close->ForceClose(); + } +} + void ClientReactorManager::LaunchNewConnectionCallback(int socket_fd) { NetAddress peer_address(GetPeerAddress(socket_fd)); NetAddress local_address(GetLocalAddress(socket_fd)); auto new_connection = event_manager_->InsertNewConnection( socket_fd, local_address, peer_address); + if (!new_connection) { + return; + } new_connection->RegisterOnConnectionCallback(ConnectionCallback_); new_connection->RegisterOnMessageCallback(MessageCallback_); new_connection->RegisterWriteCallback(WriteCompleteCallback_); diff --git a/src/reactor_manager.h b/src/reactor_manager.h index d9236acd..5957b6dc 100644 --- a/src/reactor_manager.h +++ b/src/reactor_manager.h @@ -122,7 +122,9 @@ class ServerReactorManager : NonCopyableMovable { * do by some flags and different callback functions. * */ -class ClientReactorManager : NonCopyableMovable { +class ClientReactorManager + : public NonCopyableMovable, + public std::enable_shared_from_this { public: typedef Connecting::NormalCallback NormalCallback; typedef Connecting::OnMessageCallback MessageCallback; @@ -140,6 +142,8 @@ class ClientReactorManager : NonCopyableMovable { // Stop the TCP connection (if because of acceptable exceptions in // hardware-level, just retry) void Stop(); + // Stop the TCP connection but keep the event loop running (shared loops). + void StopWithoutQuit(); void SetConnectionCallback(const NormalCallback& cb) { ConnectionCallback_ = cb; @@ -154,6 +158,7 @@ class ClientReactorManager : NonCopyableMovable { private: void DisconnectInLoop(); void StopInLoop(); + void StopInLoopWithoutQuit(); // Build a new TCP connection and insert it into the corresponding I/O thread void LaunchNewConnectionCallback(int socket_fd); @@ -162,7 +167,7 @@ class ClientReactorManager : NonCopyableMovable { EventManager* event_manager_; // Connector for creating a new TCP connection in main thread - Connector connector_; + std::shared_ptr connector_; // Connection created Connecting* connection_; diff --git a/src/rpc_codec.cc b/src/rpc_codec.cc index 03f480ec..a1fc12a4 100644 --- a/src/rpc_codec.cc +++ b/src/rpc_codec.cc @@ -122,11 +122,15 @@ void RpcCodec::OnMessage(int sock_fd, IoBuffer* io_buffer, const ssize_t min_header_len = static_cast(kMinMessageLength + kHeaderLength); while (static_cast(min_header_len) > io_buffer->GetReadableBytes()) { - io_buffer->ReadFromFd( + ssize_t n = io_buffer->ReadFromFd( sock_fd, min_header_len - io_buffer->GetReadableBytes(), &saved_errno); + if (n <= 0) { + return; + } if (saved_errno != 0) { LOG_ERROR("RpcCodec::OnMessage() - Fd(%d) with errno(%d)", sock_fd, saved_errno); + return; } } const int32_t len = io_buffer->GetReadableInt32(); @@ -137,10 +141,18 @@ void RpcCodec::OnMessage(int sock_fd, IoBuffer* io_buffer, } while (static_cast(kHeaderLength + len) > io_buffer->GetReadableBytes()) { - io_buffer->ReadFromFd(sock_fd, - static_cast(kHeaderLength + len) - - io_buffer->GetReadableBytes(), - &saved_errno); + ssize_t n = io_buffer->ReadFromFd(sock_fd, + static_cast(kHeaderLength + len) - + io_buffer->GetReadableBytes(), + &saved_errno); + if (n <= 0) { + return; + } + if (saved_errno != 0) { + LOG_ERROR("RpcCodec::OnMessage() - Fd(%d) with errno(%d)", sock_fd, + saved_errno); + return; + } } if (AsyncRawCallback_ && !SyncRawCallback_(sock_fd, diff --git a/src/spin_lock.h b/src/spin_lock.h index ed35de27..03d1a481 100644 --- a/src/spin_lock.h +++ b/src/spin_lock.h @@ -34,6 +34,12 @@ class MutexLock : NonCopyableMovable { } } void Unlock() { value_.store(true); } + void lock() { Lock(); } + void unlock() { Unlock(); } + bool try_lock() { + bool exp = true; + return value_.compare_exchange_strong(exp, false); + } private: std::atomic_bool value_; diff --git a/src/thread_pool.cc b/src/thread_pool.cc index a198284c..9fe56f6b 100644 --- a/src/thread_pool.cc +++ b/src/thread_pool.cc @@ -24,7 +24,7 @@ ThreadPool::ThreadPool(size_t thread_amount) while (true) { std::function CurTask; { - std::unique_lock lock(this->pdt_csm_mutex_); + std::unique_lock lock(this->pdt_csm_mutex_); this->pdt_csm_cond_var_.wait(lock, [this]() { return this->should_stop_ || !(this->task_queues_[this->que_pdt_idx_].empty()) || @@ -43,7 +43,7 @@ ThreadPool::ThreadPool(size_t thread_amount) .empty()) { // If there is no task for consuming, make the // index of the task queue for producers as the // index of the task queue for consumers - std::lock_guard csm_lock(this->que_csm_mutex_); + LockGuard csm_lock(this->que_csm_mutex_); this->que_pdt_idx_ = que_csm_idx; } que_csm_idx = @@ -64,7 +64,7 @@ ThreadPool::ThreadPool(size_t thread_amount) } ThreadPool::~ThreadPool() { { - std::lock_guard lock(pdt_csm_mutex_); + LockGuard lock(pdt_csm_mutex_); should_stop_ = true; pdt_csm_cond_var_.notify_all(); } diff --git a/src/thread_pool.h b/src/thread_pool.h index 9d9079a9..d2838e10 100644 --- a/src/thread_pool.h +++ b/src/thread_pool.h @@ -28,6 +28,7 @@ #include "logger.h" #include "non_copyable_movable.h" +#include "spin_lock.h" namespace taotu { @@ -53,7 +54,7 @@ class ThreadPool : NonCopyableMovable { auto task_future_pkg = std::make_shared>( std::forward>(task)); auto result_future = task_future_pkg->get_future(); - std::lock_guard lock(que_csm_mutex_); + LockGuard lock(que_csm_mutex_); task_queues_[que_pdt_idx_].emplace( [task_future_pkg]() { (*task_future_pkg)(); }); pdt_csm_cond_var_ @@ -72,14 +73,14 @@ class ThreadPool : NonCopyableMovable { size_t que_pdt_idx_; // Mutex lock protecting the 2 task queues - std::mutex pdt_csm_mutex_; + MutexLock pdt_csm_mutex_; // Condition Variable for blocking the current leisure thread (and release the // lock temporarily) and awaking a ready thread - std::condition_variable pdt_csm_cond_var_; + std::condition_variable_any pdt_csm_cond_var_; // Mutex lock protecting the task queue for consumers - std::mutex que_csm_mutex_; + MutexLock que_csm_mutex_; bool should_stop_; }; From ea2f643cb7fd2a54881a217cde1e5d3266d1e547 Mon Sep 17 00:00:00 2001 From: Sigma711 <1979934715@qq.com> Date: Thu, 29 Jan 2026 14:03:41 +0800 Subject: [PATCH 2/9] [src] Optimize io_uring read path and enable multishot --- src/connecting.cc | 126 ++++++++++++++++++++++++++++++---------------- src/connecting.h | 7 ++- src/poller.cc | 17 +++++-- 3 files changed, 101 insertions(+), 49 deletions(-) diff --git a/src/connecting.cc b/src/connecting.cc index fc5818d0..ca2a463b 100644 --- a/src/connecting.cc +++ b/src/connecting.cc @@ -76,7 +76,6 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, Poller::IoUringOp* op) { auto* ctx = static_cast(op->context); if (!ctx || ctx->self == nullptr) { - delete ctx; op->context = nullptr; return; } @@ -96,7 +95,7 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, if (connecting->read_ctx_ == ctx) { connecting->read_ctx_ = nullptr; } - delete ctx; + ctx->self = nullptr; op->context = nullptr; return; } @@ -104,6 +103,7 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, connecting->read_in_flight_ = false; } if (res > 0) { + bool rearmed = false; // Update the input buffer. if (ctx->multishot && has_buffer) { auto* buf = @@ -131,13 +131,44 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, // when multishot completes). if (!more) { connecting->SubmitReadOnce(); + rearmed = true; + } + if (!more) { + connecting->CompletePendingIo(); + if (!rearmed) { + if (connecting->read_ctx_ == ctx) { + connecting->read_ctx_ = nullptr; + } + ctx->self = nullptr; + } + op->context = nullptr; } } else if (res == 0) { // Peer closed. connecting->DoClosing(); + if (!more) { + connecting->CompletePendingIo(); + if (connecting->read_ctx_ == ctx) { + connecting->read_ctx_ = nullptr; + } + ctx->self = nullptr; + op->context = nullptr; + } } else { // res < 0 if (err == EAGAIN || err == EWOULDBLOCK || err == EINTR) { + bool rearmed = false; if (!more) { connecting->SubmitReadOnce(); + rearmed = true; + } + if (!more) { + connecting->CompletePendingIo(); + if (!rearmed) { + if (connecting->read_ctx_ == ctx) { + connecting->read_ctx_ = nullptr; + } + ctx->self = nullptr; + } + op->context = nullptr; } } else if (err == ECONNRESET || err == ECONNABORTED || err == EPIPE) { char errbuf[128]; @@ -148,27 +179,34 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, LOG_INFO("Peer closed/reset the connection fd(%d) err(%d - %s)", connecting->Fd(), err, err_str); connecting->DoClosing(); + if (!more) { + connecting->CompletePendingIo(); + if (connecting->read_ctx_ == ctx) { + connecting->read_ctx_ = nullptr; + } + ctx->self = nullptr; + op->context = nullptr; + } } else { LOG_ERROR("OnReadComplete error: fd(%d) res(%zd) err(%d)", connecting->Fd(), res, err); connecting->DoWithError(err); + if (!more) { + connecting->CompletePendingIo(); + if (connecting->read_ctx_ == ctx) { + connecting->read_ctx_ = nullptr; + } + ctx->self = nullptr; + op->context = nullptr; + } } } - if (!more) { - connecting->CompletePendingIo(); - if (connecting->read_ctx_ == ctx) { - connecting->read_ctx_ = nullptr; - } - delete ctx; - op->context = nullptr; - } } void Connecting::OnWriteComplete(struct io_uring_cqe* cqe, Poller::IoUringOp* op) { auto* ctx = static_cast(op->context); if (!ctx || ctx->self == nullptr) { - delete ctx; op->context = nullptr; return; } @@ -190,7 +228,6 @@ void Connecting::OnWriteComplete(struct io_uring_cqe* cqe, if (connecting->write_ctx_ == ctx) { connecting->write_ctx_ = nullptr; } - delete ctx; return; } if (connecting->WriteCompleteCallback_) { @@ -216,7 +253,7 @@ void Connecting::OnWriteComplete(struct io_uring_cqe* cqe, if (connecting->write_ctx_ == ctx) { connecting->write_ctx_ = nullptr; } - delete ctx; + ctx->self = nullptr; op->context = nullptr; } @@ -224,40 +261,49 @@ void Connecting::SubmitReadOnce() { if (read_in_flight_) { return; } - auto* ctx = new ReadContext(); + auto* ctx = &read_ctx_storage_; ctx->self = this; read_ctx_ = ctx; + ctx->extra_buffer = nullptr; + ctx->extra_len = 0; + ctx->key = 0; + ctx->multishot = false; + ctx->buf_id = 0; ctx->writable = input_buffer_.GetWritableBytes(); ctx->iov[0].iov_base = const_cast(input_buffer_.GetWritablePosition()); ctx->iov[0].iov_len = ctx->writable; + if (!event_manager_->GetPoller()->BuffersRegistered()) { + if (!extra_read_buffer_) { + extra_read_buffer_.reset(new char[64 * 1024]); + } + ctx->extra_buffer = extra_read_buffer_.get(); + ctx->extra_len = 64 * 1024; + } ctx->iov[1].iov_base = ctx->extra_buffer; - ctx->iov[1].iov_len = sizeof(ctx->extra_buffer); + ctx->iov[1].iov_len = ctx->extra_len; int iovcnt; if (ctx->writable == 0) { iovcnt = 1; ctx->iov[0] = ctx->iov[1]; } else { - iovcnt = ctx->writable < sizeof(ctx->extra_buffer) ? 2 : 1; + iovcnt = (ctx->extra_len > 0 && ctx->writable < ctx->extra_len) ? 2 : 1; } // ctx->key = next_io_key_++; // Deprecated: let Poller generate key // read_cancel_key_ = ctx->key; // Do not set yet read_in_flight_ = true; -#ifdef IORING_OP_RECV_MULTISHOT +#ifdef IORING_RECV_MULTISHOT if (event_manager_->GetPoller()->BuffersRegistered()) { ctx->multishot = true; uint64_t key = event_manager_->GetPoller()->SubmitReadMultishot( &eventer_, Poller::kBufferGroupId, &Connecting::OnReadComplete, ctx, 0, - [](void* ptr) { - auto* ctx = static_cast(ptr); - if (ctx && ctx->self) { - ctx->self->CompletePendingIo(); - } - delete ctx; - }); + nullptr); if (key == 0) { read_in_flight_ = false; read_cancel_key_ = 0; - delete ctx; + if (read_ctx_ == ctx) { + read_ctx_ = nullptr; + } + ctx->self = nullptr; return; } ctx->key = key; @@ -269,17 +315,14 @@ void Connecting::SubmitReadOnce() { ctx->multishot = false; uint64_t key = event_manager_->GetPoller()->SubmitRead( &eventer_, ctx->iov.data(), iovcnt, &Connecting::OnReadComplete, ctx, 0, - [](void* ptr) { - auto* ctx = static_cast(ptr); - if (ctx && ctx->self) { - ctx->self->CompletePendingIo(); - } - delete ctx; - }); + nullptr); if (key == 0) { read_in_flight_ = false; read_cancel_key_ = 0; - delete ctx; + if (read_ctx_ == ctx) { + read_ctx_ = nullptr; + } + ctx->self = nullptr; return; } ctx->key = key; @@ -295,9 +338,10 @@ void Connecting::SubmitWriteOnce() { if (write_in_flight_ || output_buffer_.GetReadableBytes() == 0) { return; } - auto* ctx = new WriteContext(); + auto* ctx = &write_ctx_storage_; ctx->self = this; write_ctx_ = ctx; + ctx->key = 0; ctx->to_send = output_buffer_.GetReadableBytes(); ctx->iov.iov_base = const_cast(output_buffer_.GetReadablePosition()); ctx->iov.iov_len = ctx->to_send; @@ -305,18 +349,14 @@ void Connecting::SubmitWriteOnce() { // write_cancel_key_ = ctx->key; write_in_flight_ = true; uint64_t key = event_manager_->GetPoller()->SubmitWrite( - &eventer_, &ctx->iov, 1, &Connecting::OnWriteComplete, ctx, 0, - [](void* ptr) { - auto* ctx = static_cast(ptr); - if (ctx && ctx->self) { - ctx->self->CompletePendingIo(); - } - delete ctx; - }); + &eventer_, &ctx->iov, 1, &Connecting::OnWriteComplete, ctx, 0, nullptr); if (key == 0) { write_in_flight_ = false; write_cancel_key_ = 0; - delete ctx; + if (write_ctx_ == ctx) { + write_ctx_ = nullptr; + } + ctx->self = nullptr; return; } ctx->key = key; diff --git a/src/connecting.h b/src/connecting.h index e4ae2622..7a1d3aba 100644 --- a/src/connecting.h +++ b/src/connecting.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -148,7 +149,8 @@ class Connecting : NonCopyableMovable { Connecting* self{nullptr}; std::array iov{}; size_t writable{0}; - char extra_buffer[64 * 1024]; + char* extra_buffer{nullptr}; + size_t extra_len{0}; uint64_t key{0}; bool multishot{false}; uint16_t buf_id{0}; @@ -238,6 +240,9 @@ class Connecting : NonCopyableMovable { bool write_in_flight_{false}; ReadContext* read_ctx_{nullptr}; WriteContext* write_ctx_{nullptr}; + ReadContext read_ctx_storage_{}; + WriteContext write_ctx_storage_{}; + std::unique_ptr extra_read_buffer_{}; uint64_t next_io_key_{1}; uint64_t read_cancel_key_{0}; uint64_t write_cancel_key_{0}; diff --git a/src/poller.cc b/src/poller.cc index 69533817..16a85b35 100644 --- a/src/poller.cc +++ b/src/poller.cc @@ -199,7 +199,7 @@ uint64_t Poller::SubmitReadMultishot(Eventer* eventer, int buf_group, CompletionFn completion, void* ctx, uint64_t key, ContextDeleter context_deleter) { -#ifdef IORING_OP_RECV_MULTISHOT +#ifdef IORING_RECV_MULTISHOT key = NormalizeKey(key); struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { @@ -214,7 +214,8 @@ uint64_t Poller::SubmitReadMultishot(Eventer* eventer, int buf_group, LockGuard lock(ops_mutex_); ops_[key] = std::move(op); } - ::io_uring_prep_recv_multishot(sqe, eventer->Fd(), nullptr, 0, 0); + ::io_uring_prep_recv(sqe, eventer->Fd(), nullptr, 0, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; ::io_uring_sqe_set_flags(sqe, IOSQE_BUFFER_SELECT); sqe->buf_group = static_cast<__u16>(buf_group); ::io_uring_sqe_set_data64(sqe, key); @@ -517,7 +518,13 @@ void Poller::SubmitPending() { } void Poller::RegisterBuffers() { -#ifdef IORING_OP_RECV_MULTISHOT +#ifdef IORING_RECV_MULTISHOT + const char* disable_multishot = ::getenv("TAOTU_DISABLE_RECV_MULTISHOT"); + if (disable_multishot && *disable_multishot != '\0' && + *disable_multishot != '0') { + LOG_DEBUG("recv-multishot disabled by TAOTU_DISABLE_RECV_MULTISHOT."); + return; + } struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { LOG_WARN("io_uring_get_sqe failed when registering buffers, skip."); @@ -545,7 +552,7 @@ void Poller::RegisterBuffers() { } void Poller::UnregisterBuffers() { -#ifdef IORING_OP_RECV_MULTISHOT +#ifdef IORING_RECV_MULTISHOT if (!buffers_registered_) { return; } @@ -561,7 +568,7 @@ void Poller::UnregisterBuffers() { } void Poller::ReleaseBufferFromCqe(struct io_uring_cqe* cqe) { -#ifdef IORING_OP_RECV_MULTISHOT +#ifdef IORING_RECV_MULTISHOT if (!buffers_registered_) { return; } From b08baff2ff14538b5f874465dd42e8fc1bfd9545 Mon Sep 17 00:00:00 2001 From: Sigma711 <1979934715@qq.com> Date: Tue, 3 Feb 2026 13:33:43 +0800 Subject: [PATCH 3/9] [src] Reduce poll-loop clock calls and simplify io_uring event state --- src/eventer.cc | 5 +- src/eventer.h | 7 ++ src/poller.cc | 285 ++++++++++++++++++++-------------------------- src/poller.h | 18 +-- src/time_point.cc | 32 ++++++ src/time_point.h | 18 +++ 6 files changed, 187 insertions(+), 178 deletions(-) diff --git a/src/eventer.cc b/src/eventer.cc index e3934713..69946659 100644 --- a/src/eventer.cc +++ b/src/eventer.cc @@ -26,7 +26,10 @@ Eventer::Eventer(Poller* poller, int fd) fd_(fd), in_events_(0x0000), out_events_(0x0000), - is_handling_(false) { + is_handling_(false), + poll_mask_(0), + poll_armed_(false), + poll_token_(0) { poller_->AddEventer(this); } Eventer::~Eventer() { diff --git a/src/eventer.h b/src/eventer.h index 330c4d90..d7433457 100644 --- a/src/eventer.h +++ b/src/eventer.h @@ -31,6 +31,8 @@ class Poller; * */ class Eventer : NonCopyableMovable { + friend class Poller; + public: typedef std::function NormalCallback; typedef std::function ReadCallback; @@ -112,6 +114,11 @@ class Eventer : NonCopyableMovable { bool is_handling_; + // Poll state stored directly on Eventer to avoid Poller-side hash lookup. + uint32_t poll_mask_{0}; + bool poll_armed_{false}; + uint64_t poll_token_{0}; + // Callback function which will be called after each reading ReadCallback ReadCallback_; diff --git a/src/poller.cc b/src/poller.cc index 16a85b35..58f79463 100644 --- a/src/poller.cc +++ b/src/poller.cc @@ -110,57 +110,32 @@ Poller::Poller() { } Poller::~Poller() { - // Clean up user-space ops still in the queue to avoid leaks on early exit. - LOG_DEBUG("Destroying Poller, pending ops: %zu", ops_.size()); - { - LockGuard lock(ops_mutex_); - for (auto& item : ops_) { - CleanupOpContext(item.second.get()); - } - ops_.clear(); // unique_ptr releases automatically. - } UnregisterBuffers(); ::io_uring_queue_exit(&ring_); } -uint64_t Poller::NormalizeKey(uint64_t key) { - if (key == 0) { - key = next_key_.fetch_add(1, std::memory_order_relaxed); - if (key == 0) { // overflow protection: skip 0 - key = next_key_.fetch_add(1, std::memory_order_relaxed); - } - } - return key; +uint64_t Poller::EncodeOp(IoUringOp* op) { + return static_cast(reinterpret_cast(op)); } -std::unique_ptr Poller::LookupOp(uint64_t key) { - if (key == 0) { +Poller::IoUringOp* Poller::DecodeOp(uint64_t token) { + if (token == 0) { return nullptr; } - LockGuard lock(ops_mutex_); - auto it = ops_.find(key); - if (it == ops_.end()) { - return nullptr; - } - auto op = std::move(it->second); - ops_.erase(it); - return op; + return reinterpret_cast(static_cast(token)); } void Poller::AddEventer(Eventer* eventer) { - states_[eventer] = EventerState{eventer->Events(), false}; + eventer->poll_mask_ = eventer->Events(); + eventer->poll_armed_ = false; + eventer->poll_token_ = 0; SubmitPoll(eventer); SubmitPending(); } void Poller::ModifyEventer(Eventer* eventer) { - auto itr = states_.find(eventer); - if (itr == states_.end()) { - AddEventer(eventer); - return; - } - itr->second.mask = eventer->Events(); - if (itr->second.mask == 0) { + eventer->poll_mask_ = eventer->Events(); + if (eventer->poll_mask_ == 0) { CancelPoll(eventer); } else { SubmitPoll(eventer); @@ -170,29 +145,28 @@ void Poller::ModifyEventer(Eventer* eventer) { void Poller::RemoveEventer(Eventer* eventer) { CancelPoll(eventer); - states_.erase(eventer); + eventer->poll_mask_ = 0; + eventer->poll_armed_ = false; + eventer->poll_token_ = 0; } uint64_t Poller::SubmitRead(Eventer* eventer, struct iovec* iov, int iovcnt, CompletionFn completion, void* ctx, uint64_t key, ContextDeleter context_deleter) { - key = NormalizeKey(key); + (void)key; struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when submit read fd(%d)", eventer->Fd()); return 0; } - auto op = std::make_unique(IoUringOp{OpType::kRead, eventer, ctx, - eventer->Fd(), completion, - key, context_deleter}); - { - LockGuard lock(ops_mutex_); - ops_[key] = std::move(op); - } + auto* op = new IoUringOp{OpType::kRead, eventer, ctx, + eventer->Fd(), completion, context_deleter}; + op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + uint64_t token = EncodeOp(op); ::io_uring_prep_readv(sqe, eventer->Fd(), iov, iovcnt, 0); - ::io_uring_sqe_set_data64(sqe, key); + ::io_uring_sqe_set_data64(sqe, token); SubmitPending(); - return key; + return token; } uint64_t Poller::SubmitReadMultishot(Eventer* eventer, int buf_group, @@ -200,27 +174,24 @@ uint64_t Poller::SubmitReadMultishot(Eventer* eventer, int buf_group, uint64_t key, ContextDeleter context_deleter) { #ifdef IORING_RECV_MULTISHOT - key = NormalizeKey(key); + (void)key; struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when submit recv-multishot fd(%d)", eventer->Fd()); return 0; } - auto op = std::make_unique(IoUringOp{OpType::kRead, eventer, ctx, - eventer->Fd(), completion, - key, context_deleter}); - { - LockGuard lock(ops_mutex_); - ops_[key] = std::move(op); - } + auto* op = new IoUringOp{OpType::kRead, eventer, ctx, + eventer->Fd(), completion, context_deleter}; + op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + uint64_t token = EncodeOp(op); ::io_uring_prep_recv(sqe, eventer->Fd(), nullptr, 0, 0); sqe->ioprio |= IORING_RECV_MULTISHOT; ::io_uring_sqe_set_flags(sqe, IOSQE_BUFFER_SELECT); sqe->buf_group = static_cast<__u16>(buf_group); - ::io_uring_sqe_set_data64(sqe, key); + ::io_uring_sqe_set_data64(sqe, token); SubmitPending(); - return key; + return token; #else (void)eventer; (void)buf_group; @@ -235,41 +206,36 @@ uint64_t Poller::SubmitReadMultishot(Eventer* eventer, int buf_group, uint64_t Poller::SubmitWrite(Eventer* eventer, struct iovec* iov, int iovcnt, CompletionFn completion, void* ctx, uint64_t key, ContextDeleter context_deleter) { - key = NormalizeKey(key); + (void)key; struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when submit write fd(%d)", eventer->Fd()); return 0; } - auto op = std::make_unique(IoUringOp{OpType::kWrite, eventer, ctx, - eventer->Fd(), completion, - key, context_deleter}); - { - LockGuard lock(ops_mutex_); - ops_[key] = std::move(op); - } + auto* op = new IoUringOp{OpType::kWrite, eventer, ctx, + eventer->Fd(), completion, context_deleter}; + op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + uint64_t token = EncodeOp(op); ::io_uring_prep_writev(sqe, eventer->Fd(), iov, iovcnt, 0); - ::io_uring_sqe_set_data64(sqe, key); + ::io_uring_sqe_set_data64(sqe, token); SubmitPending(); - return key; + return token; } uint64_t Poller::SubmitAccept(int fd, struct sockaddr* addr, socklen_t* addrlen, void* ctx, CompletionFn completion, uint64_t key, bool multishot, ContextDeleter context_deleter) { - key = NormalizeKey(key); + (void)key; struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when submit accept fd(%d)", fd); return 0; } - auto op = std::make_unique(IoUringOp{ - OpType::kAccept, nullptr, ctx, fd, completion, key, context_deleter}); - { - LockGuard lock(ops_mutex_); - ops_[key] = std::move(op); - } + auto* op = new IoUringOp{OpType::kAccept, nullptr, ctx, fd, + completion, context_deleter}; + op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + uint64_t token = EncodeOp(op); if (multishot && use_multishot_accept_) { #ifdef IORING_ACCEPT_MULTISHOT ::io_uring_prep_multishot_accept(sqe, fd, addr, addrlen, @@ -282,36 +248,33 @@ uint64_t Poller::SubmitAccept(int fd, struct sockaddr* addr, socklen_t* addrlen, ::io_uring_prep_accept(sqe, fd, addr, addrlen, SOCK_NONBLOCK | SOCK_CLOEXEC); } - ::io_uring_sqe_set_data64(sqe, key); + ::io_uring_sqe_set_data64(sqe, token); SubmitPending(); - return key; + return token; } bool Poller::CancelOp(uint64_t user_data_key) { if (user_data_key == 0) { return false; } - bool found = false; - { - LockGuard lock(ops_mutex_); - auto it = ops_.find(user_data_key); - if (it != ops_.end()) { - // Mark canceled: keep op until its CQE arrives, so the kernel won't - // touch freed context/iov memory. - it->second->eventer = nullptr; - it->second->completion = nullptr; - found = true; - } + auto* op = DecodeOp(user_data_key); + if (!op) { + return false; } + // Mark canceled: keep op until its CQE arrives, so the kernel won't + // touch freed context/iov memory. + op->state.store(IoUringOp::State::kCanceled, std::memory_order_relaxed); + op->eventer = nullptr; + op->completion = nullptr; struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when cancel op"); - return found; + return true; } ::io_uring_prep_cancel64(sqe, user_data_key, 0); ::io_uring_sqe_set_data64(sqe, 0); // Cancellation CQE needs no handling. SubmitPending(); - return found; + return true; } TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { struct __kernel_timespec ts {}; @@ -326,25 +289,34 @@ TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { int ret = ::io_uring_wait_cqe_timeout(&ring_, &cqe, tsp); if (ret == -ETIME) { SubmitPending(); - return TimePoint{}; + return TimePoint::FromMicroseconds(TimePoint::FNowRaw()); } if (ret < 0) { LOG_ERROR("io_uring_wait_cqe_timeout failed: %s", ::strerror(-ret)); SubmitPending(); - return TimePoint{}; + return TimePoint::FromMicroseconds(TimePoint::FNowRaw()); } - const int64_t start_us = TimePoint::FNow(); + int64_t now_us = TimePoint::FNowRaw(); + const int64_t start_us = now_us; + TimePoint::NowCacheGuard now_cache(now_us); HandleCqe(cqe, active_eventers); ::io_uring_cqe_seen(&ring_, cqe); // Continue draining all completed CQEs. const size_t limit = cqe_batch_limit_; const int64_t budget_us = cqe_time_budget_us_; + size_t since_last_clock_check = 0; size_t handled = 1; while (limit == 0 || handled < limit) { - if (budget_us > 0 && (TimePoint::FNow() - start_us) >= budget_us) { - break; + // Avoid querying time for every CQE. We only refresh cached "now" + // periodically, which keeps timer precision within one CQE batch chunk. + if (budget_us > 0 && (++since_last_clock_check & 31U) == 0U) { + now_us = TimePoint::FNowRaw(); + now_cache.Update(now_us); + if ((now_us - start_us) >= budget_us) { + break; + } } ret = ::io_uring_peek_cqe(&ring_, &cqe); if (ret == -EAGAIN) { @@ -358,73 +330,72 @@ TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { ++handled; } + now_us = TimePoint::FNowRaw(); + now_cache.Update(now_us); SubmitPending(); - return TimePoint{}; + return TimePoint::FromMicroseconds(now_us); } void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { - uint64_t key = cqe->user_data; - if (key == 0) { + uint64_t token = cqe->user_data; + if (token == 0) { ReleaseBufferFromCqe(cqe); return; // cancellation or ignored CQE } - auto op = LookupOp(key); + IoUringOp* op = DecodeOp(token); if (!op) { ReleaseBufferFromCqe(cqe); return; } - auto* op_ptr = op.get(); LOG_DEBUG("CQE type(%d) res(%d) user_data(%llu) completion(%p)", - static_cast(op_ptr->type), cqe->res, - static_cast(key), - reinterpret_cast(op_ptr->completion)); + static_cast(op->type), cqe->res, + static_cast(token), + reinterpret_cast(op->completion)); bool keep_op = (cqe->flags & IORING_CQE_F_MORE) != 0; - if (op_ptr->completion) { - if ((op_ptr->type == OpType::kRead || op_ptr->type == OpType::kWrite) && - (op_ptr->eventer == nullptr || - states_.find(op_ptr->eventer) == states_.end())) { - CleanupOpContext(op_ptr); + if (op->completion) { + if ((op->type == OpType::kRead || op->type == OpType::kWrite) && + op->eventer == nullptr) { + CleanupOpContext(op); ReleaseBufferFromCqe(cqe); + op->state.store(IoUringOp::State::kDone, std::memory_order_relaxed); + if (!keep_op) { + delete op; + } return; } - LOG_DEBUG("Call completion for type(%d)", static_cast(op_ptr->type)); - op_ptr->completion(cqe, op_ptr); + LOG_DEBUG("Call completion for type(%d)", static_cast(op->type)); + op->completion(cqe, op); ReleaseBufferFromCqe(cqe); - if (keep_op && op && op->context != nullptr) { - LockGuard lock(ops_mutex_); - ops_[key] = std::move(op); // keep for the next CQE - } else { - op.reset(); + if (!keep_op) { + op->state.store(IoUringOp::State::kDone, std::memory_order_relaxed); + delete op; } return; } - if (op_ptr->context != nullptr) { - CleanupOpContext(op_ptr); + if (op->context != nullptr) { + CleanupOpContext(op); } - switch (op_ptr->type) { + switch (op->type) { case OpType::kPoll: { - auto* eventer = op_ptr->eventer; + auto* eventer = op->eventer; if (eventer == nullptr) { break; // Eventer was removed, ignore this CQE } - auto itr = states_.find(eventer); - if (itr != states_.end()) { - itr->second.armed = false; - if (cqe->res >= 0) { - eventer->ReceiveEvents(static_cast(cqe->res)); - active_eventers->push_back(eventer); - } else { - LOG_ERROR("io_uring poll on fd(%d) failed: %s", eventer->Fd(), - ::strerror(-cqe->res)); - } - SubmitPoll(eventer); + eventer->poll_armed_ = false; + eventer->poll_token_ = 0; + if (cqe->res >= 0) { + eventer->ReceiveEvents(static_cast(cqe->res)); + active_eventers->push_back(eventer); + } else { + LOG_ERROR("io_uring poll on fd(%d) failed: %s", eventer->Fd(), + ::strerror(-cqe->res)); } - // If not in states_, eventer was removed - ignore this late CQE + SubmitPoll(eventer); break; } case OpType::kRead: { - auto* eventer = op_ptr->eventer; - if (eventer == nullptr || states_.find(eventer) == states_.end()) { + auto* eventer = op->eventer; + if (eventer == nullptr) { ReleaseBufferFromCqe(cqe); break; // Eventer was removed, ignore } @@ -435,8 +406,8 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { break; } case OpType::kWrite: { - auto* eventer = op_ptr->eventer; - if (eventer == nullptr || states_.find(eventer) == states_.end()) { + auto* eventer = op->eventer; + if (eventer == nullptr) { break; // Eventer was removed, ignore } Eventer::WriteResult wr{.bytes = cqe->res, @@ -445,7 +416,7 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { break; } case OpType::kAccept: { - auto* eventer = op_ptr->eventer; + auto* eventer = op->eventer; if (eventer) { eventer->OnAcceptDone(static_cast(cqe->res), nullptr, 0); } @@ -455,7 +426,10 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { case OpType::kNone: break; } - op.reset(); + op->state.store(IoUringOp::State::kDone, std::memory_order_relaxed); + if (!keep_op) { + delete op; + } } void Poller::CleanupOpContext(IoUringOp* op) { @@ -467,47 +441,32 @@ void Poller::CleanupOpContext(IoUringOp* op) { } void Poller::SubmitPoll(Eventer* eventer) { - auto itr = states_.find(eventer); - if (itr == states_.end()) { - return; - } - auto& state = itr->second; - if (state.mask == 0 || state.armed) { + if (eventer->poll_mask_ == 0 || eventer->poll_armed_) { return; } - uint64_t key = NormalizeKey(0); struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when arming fd(%d)", eventer->Fd()); return; } - auto op = std::make_unique( - IoUringOp{OpType::kPoll, eventer, nullptr, eventer->Fd(), nullptr, key}); - { - LockGuard lock(ops_mutex_); - ops_[key] = std::move(op); - } + auto* op = new IoUringOp{OpType::kPoll, eventer, nullptr, + eventer->Fd(), nullptr, nullptr}; + op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + uint64_t token = EncodeOp(op); ::io_uring_prep_poll_add(sqe, eventer->Fd(), - static_cast(state.mask)); - ::io_uring_sqe_set_data64(sqe, key); - state.armed = true; - state.poll_key = key; + static_cast(eventer->poll_mask_)); + ::io_uring_sqe_set_data64(sqe, token); + eventer->poll_armed_ = true; + eventer->poll_token_ = token; } void Poller::CancelPoll(Eventer* eventer) { - auto itr = states_.find(eventer); - if (itr == states_.end() || !itr->second.armed) { - return; - } - struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); - if (!sqe) { - LOG_ERROR("io_uring_get_sqe failed when canceling fd(%d)", eventer->Fd()); + if (!eventer->poll_armed_ || eventer->poll_token_ == 0) { return; } - ::io_uring_prep_poll_remove(sqe, itr->second.poll_key); - ::io_uring_sqe_set_data(sqe, nullptr); // Ignore cancellation result. - itr->second.armed = false; - itr->second.poll_key = 0; + (void)CancelOp(eventer->poll_token_); + eventer->poll_armed_ = false; + eventer->poll_token_ = 0; } void Poller::SubmitPending() { diff --git a/src/poller.h b/src/poller.h index b39bcac5..ef8e4c6f 100644 --- a/src/poller.h +++ b/src/poller.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include "non_copyable_movable.h" @@ -44,13 +43,14 @@ class Poller : NonCopyableMovable { typedef void (*ContextDeleter)(void* context); struct IoUringOp { + enum class State { kInit, kInflight, kCanceled, kDone }; OpType type{OpType::kNone}; Eventer* eventer{nullptr}; void* context{nullptr}; int fd{-1}; CompletionFn completion{nullptr}; - uint64_t key{0}; ContextDeleter context_deleter{nullptr}; + std::atomic state{State::kInit}; }; Poller(); @@ -100,14 +100,8 @@ class Poller : NonCopyableMovable { static constexpr size_t kBufCount = 64; private: - struct EventerState { - uint32_t mask{0}; // Event mask of interest (POLLIN/POLLOUT). - bool armed{false}; // Whether a poll request is already pending. - uint64_t poll_key{0}; - }; - - uint64_t NormalizeKey(uint64_t key); - std::unique_ptr LookupOp(uint64_t key); + static uint64_t EncodeOp(IoUringOp* op); + static IoUringOp* DecodeOp(uint64_t token); void CleanupOpContext(IoUringOp* op); void SubmitPoll(Eventer* eventer); @@ -119,10 +113,6 @@ class Poller : NonCopyableMovable { void ReleaseBufferFromCqe(struct io_uring_cqe* cqe); struct io_uring ring_; - std::unordered_map states_; - std::unordered_map> ops_; - std::atomic_uint64_t next_key_{1}; - mutable MutexLock ops_mutex_; bool use_sqpoll_{false}; bool use_multishot_accept_{true}; bool buffers_registered_{false}; diff --git a/src/time_point.cc b/src/time_point.cc index 5e5e0848..6ad3f13b 100644 --- a/src/time_point.cc +++ b/src/time_point.cc @@ -16,6 +16,25 @@ #include namespace taotu { +namespace { +thread_local bool g_now_cache_enabled = false; +thread_local int64_t g_now_cache_us = 0; +} // namespace + +TimePoint::NowCacheGuard::NowCacheGuard(int64_t now_microseconds) + : old_enabled_(g_now_cache_enabled), old_now_microseconds_(g_now_cache_us) { + g_now_cache_enabled = true; + g_now_cache_us = now_microseconds; +} + +TimePoint::NowCacheGuard::~NowCacheGuard() { + g_now_cache_enabled = old_enabled_; + g_now_cache_us = old_now_microseconds_; +} + +void TimePoint::NowCacheGuard::Update(int64_t now_microseconds) { + g_now_cache_us = now_microseconds; +} TimePoint::TimePoint() : time_point_microseconds_(FNow()), context_(0) {} TimePoint::TimePoint(int64_t duration_microseconds, bool repeated) @@ -26,6 +45,8 @@ TimePoint::TimePoint(int64_t duration_microseconds, : time_point_microseconds_(start_time_point.GetMicroseconds() + duration_microseconds), context_(repeated ? duration_microseconds : 0) {} +TimePoint::TimePoint(int64_t absolute_microseconds, int) + : time_point_microseconds_(absolute_microseconds), context_(0) {} int64_t TimePoint::GetMicroseconds() const { return time_point_microseconds_; } @@ -46,9 +67,20 @@ std::function TimePoint::GetTaskContinueCallback() const { } int64_t TimePoint::FNow() { + if (g_now_cache_enabled) { + return g_now_cache_us; + } + return FNowRaw(); +} + +int64_t TimePoint::FNowRaw() { struct timeval tv; ::gettimeofday(&tv, NULL); return static_cast(tv.tv_sec * 1000 * 1000 + tv.tv_usec); } +TimePoint TimePoint::FromMicroseconds(int64_t absolute_microseconds) { + return TimePoint(absolute_microseconds, 0); +} + } // namespace taotu diff --git a/src/time_point.h b/src/time_point.h index 07083707..aead40c0 100644 --- a/src/time_point.h +++ b/src/time_point.h @@ -26,6 +26,18 @@ namespace taotu { */ class TimePoint { public: + class NowCacheGuard { + public: + explicit NowCacheGuard(int64_t now_microseconds); + ~NowCacheGuard(); + + void Update(int64_t now_microseconds); + + private: + bool old_enabled_; + int64_t old_now_microseconds_; + }; + // Current time point TimePoint(); @@ -60,8 +72,14 @@ class TimePoint { // Get current time point static int64_t FNow(); + // Get current time point without thread-local cache. + static int64_t FNowRaw(); + // Construct one time point directly from absolute microseconds. + static TimePoint FromMicroseconds(int64_t absolute_microseconds); private: + explicit TimePoint(int64_t absolute_microseconds, int); + // The time point in microsecond saved int64_t time_point_microseconds_; From d23150a81288fc300dfcb800e81ad456a8a480f7 Mon Sep 17 00:00:00 2001 From: Sigma711 <1979934715@qq.com> Date: Wed, 4 Feb 2026 09:58:39 +0800 Subject: [PATCH 4/9] [src] Improve send-buffer reuse and reduce io_buffer relocation --- src/connecting.cc | 38 +++++++++++++++++++++++++++++++++++++- src/io_buffer.cc | 38 ++++++++++++++++++++++++++------------ 2 files changed, 63 insertions(+), 13 deletions(-) diff --git a/src/connecting.cc b/src/connecting.cc index ca2a463b..0dfeff18 100644 --- a/src/connecting.cc +++ b/src/connecting.cc @@ -445,8 +445,44 @@ void Connecting::Send(const std::string& message) { Send(static_cast(message.c_str()), message.size()); } void Connecting::Send(IoBuffer* io_buffer) { - Send(io_buffer->GetReadablePosition(), io_buffer->GetReadableBytes()); + if (io_buffer == nullptr) { + return; + } + size_t msg_len = io_buffer->GetReadableBytes(); + if (msg_len == 0) { + return; + } + if (ConnectionState::kDisconnected == state_.load()) { + LOG_ERROR("Fd(%d) is disconnected, so give up sending the message!!!", + Fd()); + return; + } + if (ConnectionState::kConnected != state_.load()) { + return; + } + size_t queued_len = output_buffer_.GetReadableBytes() + + pending_output_buffer_.GetReadableBytes(); + if (HighWaterMarkCallback_ && queued_len + msg_len >= high_water_mark_ && + queued_len < high_water_mark_) { + HighWaterMarkCallback_(*this, queued_len + msg_len); + } + if (write_in_flight_) { + if (pending_output_buffer_.GetReadableBytes() == 0) { + pending_output_buffer_.Swap(*io_buffer); + } else { + pending_output_buffer_.Append(io_buffer->GetReadablePosition(), msg_len); + io_buffer->RefreshRW(); + } + return; + } + if (output_buffer_.GetReadableBytes() == 0) { + output_buffer_.Swap(*io_buffer); + SubmitWriteOnce(); + return; + } + output_buffer_.Append(io_buffer->GetReadablePosition(), msg_len); io_buffer->RefreshRW(); + SubmitWriteOnce(); } void Connecting::ShutDownWrite() { diff --git a/src/io_buffer.cc b/src/io_buffer.cc index 7fa8242f..49d1845b 100644 --- a/src/io_buffer.cc +++ b/src/io_buffer.cc @@ -24,6 +24,18 @@ namespace taotu { namespace { constexpr char kCrlf[] = "\r\n"; +constexpr size_t kBufferGrowChunk = 64 * 1024; + +size_t AlignUp(size_t value, size_t align) { + if (align == 0) { + return value; + } + size_t rem = value % align; + if (rem == 0) { + return value; + } + return value + (align - rem); +} } // namespace IoBuffer::IoBuffer(size_t initial_capacity) @@ -341,19 +353,21 @@ ssize_t IoBuffer::WriteToFd(int fd) { } void IoBuffer::ReserveWritableSpace(size_t len) { - if (GetWritableBytes() + GetReservedBytes() - kReservedCapacity < len) { - buffer_.resize(writing_index_ + len); - } else { - // Move forward to-read contents if too much space are reserved in the - // front of the buffer, and then the writable space will be enough without - // dilatation - ::memmove(static_cast( - const_cast(GetBufferBegin() + kReservedCapacity)), - static_cast(GetBufferBegin() + reading_index_), - GetReadableBytes()); - reading_index_ = kReservedCapacity; - writing_index_ = reading_index_ + GetReadableBytes(); + // Grow in fixed-size chunks and avoid memmove in hot paths. + // This keeps the implementation simple and removes large data relocation. + if (GetWritableBytes() >= len) { + return; + } + const size_t required = writing_index_ + len; + const size_t chunked_required = AlignUp(required, kBufferGrowChunk); + size_t next_size = buffer_.size(); + if (next_size == 0) { + next_size = kReservedCapacity + kInitialCapacity; + } + while (next_size < chunked_required) { + next_size = AlignUp(next_size * 2, kBufferGrowChunk); } + buffer_.resize(next_size); } } // namespace taotu From 24083a4a95e6a076c872bb4d65d6dbaa472299ac Mon Sep 17 00:00:00 2001 From: Sigma711 <1979934715@qq.com> Date: Fri, 6 Feb 2026 01:18:04 +0800 Subject: [PATCH 5/9] [src && build] Reduce poller allocations, fast-path timers, and document configuration --- CMakeLists.txt | 2 +- README.md | 66 ++++++++++++++++- README_zh-Hans.md | 66 ++++++++++++++++- src/event_manager.cc | 3 + src/poller.cc | 171 ++++++++++++++++++++++++++++++++++++------- src/poller.h | 9 ++- src/timer.cc | 8 ++ src/timer.h | 7 ++ 8 files changed, 301 insertions(+), 31 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e7f6c018..f059c64b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ SET(CMAKE_EXPORT_COMPILE_COMMANDS ON) OPTION(TAOTU_ENABLE_CLANG_FORMAT "Enable clang-format checks." OFF) OPTION(TAOTU_ENABLE_CLANG_TIDY "Enable clang-tidy checks." OFF) -SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") +SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -march=native -flto") SET(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -fsanitize=address -DTAOTU_DEBUG") diff --git a/README.md b/README.md index 4c069192..51469b90 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,50 @@ cmake --build build_release -j Notes: - Requires a C++17 compiler, CMake, and liburing. - RPC demo uses protobuf. -- You can tune io_uring entries with `TAOTU_IORING_ENTRIES` if memory is tight. + +## Configuration + +### CMake options + +You can configure build behavior with: + +- `-DCMAKE_BUILD_TYPE=Release|Debug` (or other CMake build types) +- `-DTAOTU_ENABLE_CLANG_TIDY=ON|OFF` (default `OFF`) +- `-DTAOTU_ENABLE_CLANG_FORMAT=ON|OFF` (default `OFF`) + +### Runtime environment variables (core io_uring backend) + +These are read at process startup by `Poller`: + +- `TAOTU_IORING_ENTRIES` + - Default: `32768` + - Clamp range: `[1024, 32768]` + - Effect: io_uring queue depth (can reduce memory footprint when lowered). +- `TAOTU_ENABLE_SQPOLL` + - Default: disabled + - Effect: request io_uring SQPOLL mode (`non-empty` and not `'0'` enables). +- `TAOTU_DISABLE_SQPOLL` + - Default: disabled + - Effect: force-disable SQPOLL (`non-empty` and not `'0'` disables), takes precedence over `TAOTU_ENABLE_SQPOLL`. +- `TAOTU_DISABLE_RECV_MULTISHOT` + - Default: disabled + - Effect: disable recv-multishot + provided-buffer registration path. +- `TAOTU_IORING_SUBMIT_BATCH` + - Default: `1` + - Max clamp: `256` + - Effect: submit SQEs when pending count reaches this threshold (or when forced by the loop). +- `TAOTU_IORING_OP_POOL_LIMIT` + - Default: `65536` + - Max clamp: `1048576` + - Effect: max cached io_uring operation objects per `Poller` for allocation reuse. + +Example: + +```bash +TAOTU_IORING_ENTRIES=16384 \ +TAOTU_IORING_SUBMIT_BATCH=16 \ +./pingpong_server 4567 8 +``` ## Run demos @@ -36,6 +79,27 @@ cd build/output/bin ./simple_echo 4567 4 ``` +### Demo command-line options + +Server demos: + +- `simple_echo [port [io_threads]]` +- `simple_discard [port [io_threads]]` +- `simple_time [port [io_threads]]` +- `http_server [port [io_threads]]` +- `chat_server [port [io_threads]]` +- `pingpong_server [port [io_threads]]` + +Client demos: + +- `chat_client [host_ip] ` (if only `host_ip` is provided, port defaults to `4567`) +- `pingpong_client ` +- `time_service_sync_client` (no CLI args) + +RPC server demo: + +- `time_service_server` (no CLI args) + ## Basic usage High-level flow: diff --git a/README_zh-Hans.md b/README_zh-Hans.md index 473337f0..4fab5639 100644 --- a/README_zh-Hans.md +++ b/README_zh-Hans.md @@ -23,7 +23,50 @@ cmake --build build_release -j 说明: - 需要 C++17 编译器、CMake 和 liburing。 - RPC 示例需要 protobuf。 -- 如果内存吃紧,可以通过 `TAOTU_IORING_ENTRIES` 调小 io_uring 队列大小。 + +## 配置项 + +### CMake 选项 + +可通过以下参数配置构建行为: + +- `-DCMAKE_BUILD_TYPE=Release|Debug`(或其他 CMake 构建类型) +- `-DTAOTU_ENABLE_CLANG_TIDY=ON|OFF`(默认 `OFF`) +- `-DTAOTU_ENABLE_CLANG_FORMAT=ON|OFF`(默认 `OFF`) + +### 运行时环境变量(核心 io_uring 后端) + +以下变量会在进程启动时由 `Poller` 读取: + +- `TAOTU_IORING_ENTRIES` + - 默认值:`32768` + - 限制范围:`[1024, 32768]` + - 作用:设置 io_uring 队列深度(调小可降低内存占用)。 +- `TAOTU_ENABLE_SQPOLL` + - 默认:关闭 + - 作用:请求启用 io_uring 的 SQPOLL 模式(值非空且不为 `'0'` 时启用)。 +- `TAOTU_DISABLE_SQPOLL` + - 默认:关闭 + - 作用:强制关闭 SQPOLL(值非空且不为 `'0'` 时关闭),优先级高于 `TAOTU_ENABLE_SQPOLL`。 +- `TAOTU_DISABLE_RECV_MULTISHOT` + - 默认:关闭 + - 作用:关闭 recv-multishot + provided-buffer 注册路径。 +- `TAOTU_IORING_SUBMIT_BATCH` + - 默认值:`1` + - 最大限制:`256` + - 作用:当待提交 SQE 数达到该阈值时触发提交(事件循环中也可能被强制提交)。 +- `TAOTU_IORING_OP_POOL_LIMIT` + - 默认值:`65536` + - 最大限制:`1048576` + - 作用:每个 `Poller` 可缓存复用的 io_uring 操作对象上限。 + +示例: + +```bash +TAOTU_IORING_ENTRIES=16384 \ +TAOTU_IORING_SUBMIT_BATCH=16 \ +./pingpong_server 4567 8 +``` ## 运行示例 @@ -36,6 +79,27 @@ cd build/output/bin ./simple_echo 4567 4 ``` +### Demo 命令行参数 + +服务端示例: + +- `simple_echo [port [io_threads]]` +- `simple_discard [port [io_threads]]` +- `simple_time [port [io_threads]]` +- `http_server [port [io_threads]]` +- `chat_server [port [io_threads]]` +- `pingpong_server [port [io_threads]]` + +客户端示例: + +- `chat_client [host_ip] `(仅提供 `host_ip` 时,端口默认为 `4567`) +- `pingpong_client ` +- `time_service_sync_client`(无命令行参数) + +RPC 服务端示例: + +- `time_service_server`(无命令行参数) + ## 基本使用 一般流程: diff --git a/src/event_manager.cc b/src/event_manager.cc index c78e266b..a9c5b75d 100644 --- a/src/event_manager.cc +++ b/src/event_manager.cc @@ -255,6 +255,9 @@ void EventManager::DoWithActiveTasks(const TimePoint& return_time) { active_events_.clear(); } void EventManager::DoExpiredTimeTasks(const TimePoint& return_time) { + if (!timer_.HasTasks()) { + return; + } Timer::ExpiredTimeTasks expired_time_tasks = timer_.GetExpiredTimeTasks(); for (auto& expired_time_task : expired_time_tasks) { auto ExpiredTimeCallback = expired_time_task.second; diff --git a/src/poller.cc b/src/poller.cc index 58f79463..93746efe 100644 --- a/src/poller.cc +++ b/src/poller.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -26,6 +27,10 @@ namespace taotu { namespace { constexpr uint32_t kDefaultEntries = 32768; constexpr uint32_t kMinEntries = 1024; +constexpr size_t kDefaultSubmitBatch = 1; +constexpr size_t kMaxSubmitBatch = 256; +constexpr size_t kDefaultOpPoolLimit = 1U << 16; +constexpr size_t kMaxOpPoolLimit = 1U << 20; uint32_t GetIoUringEntries() { const char* env = ::getenv("TAOTU_IORING_ENTRIES"); @@ -45,6 +50,38 @@ uint32_t GetIoUringEntries() { } return static_cast(val); } + +size_t GetSubmitBatch() { + const char* env = ::getenv("TAOTU_IORING_SUBMIT_BATCH"); + if (!env || *env == '\0') { + return kDefaultSubmitBatch; + } + char* end = nullptr; + uint64_t val = ::strtoull(env, &end, 10); + if (end == env || val == 0) { + return kDefaultSubmitBatch; + } + if (val > kMaxSubmitBatch) { + return kMaxSubmitBatch; + } + return static_cast(val); +} + +size_t GetOpPoolLimit() { + const char* env = ::getenv("TAOTU_IORING_OP_POOL_LIMIT"); + if (!env || *env == '\0') { + return kDefaultOpPoolLimit; + } + char* end = nullptr; + uint64_t val = ::strtoull(env, &end, 10); + if (end == env) { + return kDefaultOpPoolLimit; + } + if (val > kMaxOpPoolLimit) { + return kMaxOpPoolLimit; + } + return static_cast(val); +} } // namespace Poller::Poller() { @@ -98,6 +135,9 @@ Poller::Poller() { if (use_sqpoll_) { LOG_DEBUG("io_uring initialized with SQPOLL."); } + submit_batch_ = GetSubmitBatch(); + op_pool_limit_ = GetOpPoolLimit(); + op_pool_.reserve(std::min(op_pool_limit_, static_cast(2048))); struct io_uring_probe* probe = ::io_uring_get_probe_ring(&ring_); if (probe) { if (!::io_uring_opcode_supported(probe, IORING_OP_ACCEPT)) { @@ -110,6 +150,10 @@ Poller::Poller() { } Poller::~Poller() { + for (auto* op : op_pool_) { + delete op; + } + op_pool_.clear(); UnregisterBuffers(); ::io_uring_queue_exit(&ring_); } @@ -125,12 +169,50 @@ Poller::IoUringOp* Poller::DecodeOp(uint64_t token) { return reinterpret_cast(static_cast(token)); } +Poller::IoUringOp* Poller::AcquireOp(OpType type, Eventer* eventer, void* ctx, + int fd, CompletionFn completion, + ContextDeleter context_deleter) { + IoUringOp* op = nullptr; + if (!op_pool_.empty()) { + op = op_pool_.back(); + op_pool_.pop_back(); + } else { + op = new IoUringOp; + } + op->type = type; + op->eventer = eventer; + op->context = ctx; + op->fd = fd; + op->completion = completion; + op->context_deleter = context_deleter; + op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + return op; +} + +void Poller::RecycleOp(IoUringOp* op) { + if (!op) { + return; + } + op->type = OpType::kNone; + op->eventer = nullptr; + op->context = nullptr; + op->fd = -1; + op->completion = nullptr; + op->context_deleter = nullptr; + op->state.store(IoUringOp::State::kInit, std::memory_order_relaxed); + if (op_pool_.size() < op_pool_limit_) { + op_pool_.push_back(op); + } else { + delete op; + } +} + void Poller::AddEventer(Eventer* eventer) { eventer->poll_mask_ = eventer->Events(); eventer->poll_armed_ = false; eventer->poll_token_ = 0; SubmitPoll(eventer); - SubmitPending(); + SubmitPending(true); } void Poller::ModifyEventer(Eventer* eventer) { @@ -140,7 +222,7 @@ void Poller::ModifyEventer(Eventer* eventer) { } else { SubmitPoll(eventer); } - SubmitPending(); + SubmitPending(true); } void Poller::RemoveEventer(Eventer* eventer) { @@ -155,13 +237,17 @@ uint64_t Poller::SubmitRead(Eventer* eventer, struct iovec* iov, int iovcnt, ContextDeleter context_deleter) { (void)key; struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); + if (!sqe) { + SubmitPending(true); + sqe = ::io_uring_get_sqe(&ring_); + } if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when submit read fd(%d)", eventer->Fd()); return 0; } - auto* op = new IoUringOp{OpType::kRead, eventer, ctx, - eventer->Fd(), completion, context_deleter}; - op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + auto* op = + AcquireOp(OpType::kRead, eventer, ctx, eventer->Fd(), completion, + context_deleter); uint64_t token = EncodeOp(op); ::io_uring_prep_readv(sqe, eventer->Fd(), iov, iovcnt, 0); ::io_uring_sqe_set_data64(sqe, token); @@ -176,14 +262,18 @@ uint64_t Poller::SubmitReadMultishot(Eventer* eventer, int buf_group, #ifdef IORING_RECV_MULTISHOT (void)key; struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); + if (!sqe) { + SubmitPending(true); + sqe = ::io_uring_get_sqe(&ring_); + } if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when submit recv-multishot fd(%d)", eventer->Fd()); return 0; } - auto* op = new IoUringOp{OpType::kRead, eventer, ctx, - eventer->Fd(), completion, context_deleter}; - op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + auto* op = + AcquireOp(OpType::kRead, eventer, ctx, eventer->Fd(), completion, + context_deleter); uint64_t token = EncodeOp(op); ::io_uring_prep_recv(sqe, eventer->Fd(), nullptr, 0, 0); sqe->ioprio |= IORING_RECV_MULTISHOT; @@ -208,14 +298,18 @@ uint64_t Poller::SubmitWrite(Eventer* eventer, struct iovec* iov, int iovcnt, ContextDeleter context_deleter) { (void)key; struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); + if (!sqe) { + SubmitPending(true); + sqe = ::io_uring_get_sqe(&ring_); + } if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when submit write fd(%d)", eventer->Fd()); return 0; } - auto* op = new IoUringOp{OpType::kWrite, eventer, ctx, - eventer->Fd(), completion, context_deleter}; - op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + auto* op = + AcquireOp(OpType::kWrite, eventer, ctx, eventer->Fd(), completion, + context_deleter); uint64_t token = EncodeOp(op); ::io_uring_prep_writev(sqe, eventer->Fd(), iov, iovcnt, 0); ::io_uring_sqe_set_data64(sqe, token); @@ -228,13 +322,16 @@ uint64_t Poller::SubmitAccept(int fd, struct sockaddr* addr, socklen_t* addrlen, bool multishot, ContextDeleter context_deleter) { (void)key; struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); + if (!sqe) { + SubmitPending(true); + sqe = ::io_uring_get_sqe(&ring_); + } if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when submit accept fd(%d)", fd); return 0; } - auto* op = new IoUringOp{OpType::kAccept, nullptr, ctx, fd, - completion, context_deleter}; - op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + auto* op = AcquireOp(OpType::kAccept, nullptr, ctx, fd, completion, + context_deleter); uint64_t token = EncodeOp(op); if (multishot && use_multishot_accept_) { #ifdef IORING_ACCEPT_MULTISHOT @@ -267,6 +364,10 @@ bool Poller::CancelOp(uint64_t user_data_key) { op->eventer = nullptr; op->completion = nullptr; struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); + if (!sqe) { + SubmitPending(true); + sqe = ::io_uring_get_sqe(&ring_); + } if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when cancel op"); return true; @@ -277,6 +378,7 @@ bool Poller::CancelOp(uint64_t user_data_key) { return true; } TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { + SubmitPending(true); struct __kernel_timespec ts {}; struct __kernel_timespec* tsp = nullptr; if (timeout >= 0) { @@ -288,12 +390,12 @@ TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { struct io_uring_cqe* cqe = nullptr; int ret = ::io_uring_wait_cqe_timeout(&ring_, &cqe, tsp); if (ret == -ETIME) { - SubmitPending(); + SubmitPending(true); return TimePoint::FromMicroseconds(TimePoint::FNowRaw()); } if (ret < 0) { LOG_ERROR("io_uring_wait_cqe_timeout failed: %s", ::strerror(-ret)); - SubmitPending(); + SubmitPending(true); return TimePoint::FromMicroseconds(TimePoint::FNowRaw()); } @@ -332,7 +434,7 @@ TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { now_us = TimePoint::FNowRaw(); now_cache.Update(now_us); - SubmitPending(); + SubmitPending(true); return TimePoint::FromMicroseconds(now_us); } @@ -359,7 +461,7 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { ReleaseBufferFromCqe(cqe); op->state.store(IoUringOp::State::kDone, std::memory_order_relaxed); if (!keep_op) { - delete op; + RecycleOp(op); } return; } @@ -368,7 +470,7 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { ReleaseBufferFromCqe(cqe); if (!keep_op) { op->state.store(IoUringOp::State::kDone, std::memory_order_relaxed); - delete op; + RecycleOp(op); } return; } @@ -428,7 +530,7 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { } op->state.store(IoUringOp::State::kDone, std::memory_order_relaxed); if (!keep_op) { - delete op; + RecycleOp(op); } } @@ -445,13 +547,17 @@ void Poller::SubmitPoll(Eventer* eventer) { return; } struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); + if (!sqe) { + SubmitPending(true); + sqe = ::io_uring_get_sqe(&ring_); + } if (!sqe) { LOG_ERROR("io_uring_get_sqe failed when arming fd(%d)", eventer->Fd()); return; } - auto* op = new IoUringOp{OpType::kPoll, eventer, nullptr, - eventer->Fd(), nullptr, nullptr}; - op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); + auto* op = + AcquireOp(OpType::kPoll, eventer, nullptr, eventer->Fd(), nullptr, + nullptr); uint64_t token = EncodeOp(op); ::io_uring_prep_poll_add(sqe, eventer->Fd(), static_cast(eventer->poll_mask_)); @@ -469,7 +575,14 @@ void Poller::CancelPoll(Eventer* eventer) { eventer->poll_token_ = 0; } -void Poller::SubmitPending() { +void Poller::SubmitPending(bool force) { + const unsigned ready = ::io_uring_sq_ready(&ring_); + if (ready == 0) { + return; + } + if (!force && ready < submit_batch_) { + return; + } int ret = ::io_uring_submit(&ring_); if (ret < 0) { LOG_ERROR("io_uring_submit failed: %s", ::strerror(-ret)); @@ -521,7 +634,7 @@ void Poller::UnregisterBuffers() { } ::io_uring_prep_remove_buffers(sqe, kBufCount, kBufferGroupId); ::io_uring_sqe_set_data64(sqe, 0); - ::io_uring_submit(&ring_); + SubmitPending(true); buffers_registered_ = false; #endif } @@ -541,8 +654,12 @@ void Poller::ReleaseBufferFromCqe(struct io_uring_cqe* cqe) { } struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { - LOG_WARN("io_uring_get_sqe failed when release buffer"); - return; + SubmitPending(true); + sqe = ::io_uring_get_sqe(&ring_); + if (!sqe) { + LOG_WARN("io_uring_get_sqe failed when release buffer"); + return; + } } ::io_uring_prep_provide_buffers(sqe, buffers_[bid], kBufSize, 1, kBufferGroupId, bid); diff --git a/src/poller.h b/src/poller.h index ef8e4c6f..b7c711ce 100644 --- a/src/poller.h +++ b/src/poller.h @@ -102,12 +102,16 @@ class Poller : NonCopyableMovable { private: static uint64_t EncodeOp(IoUringOp* op); static IoUringOp* DecodeOp(uint64_t token); + IoUringOp* AcquireOp(OpType type, Eventer* eventer, void* ctx, int fd, + CompletionFn completion, + ContextDeleter context_deleter); + void RecycleOp(IoUringOp* op); void CleanupOpContext(IoUringOp* op); void SubmitPoll(Eventer* eventer); void CancelPoll(Eventer* eventer); void HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers); - void SubmitPending(); + void SubmitPending(bool force = false); void RegisterBuffers(); void UnregisterBuffers(); void ReleaseBufferFromCqe(struct io_uring_cqe* cqe); @@ -117,6 +121,9 @@ class Poller : NonCopyableMovable { bool use_multishot_accept_{true}; bool buffers_registered_{false}; std::array buffers_{}; + std::vector op_pool_; + size_t op_pool_limit_{1U << 16}; + size_t submit_batch_{1}; size_t cqe_batch_limit_{1024}; int64_t cqe_time_budget_us_{1000}; }; diff --git a/src/timer.cc b/src/timer.cc index 55f76248..de98c55f 100644 --- a/src/timer.cc +++ b/src/timer.cc @@ -18,9 +18,13 @@ namespace taotu { void Timer::AddTimeTask(const TimePoint& time_point, TimeCallback TimeTask) { LockGuard lock_guard(mutex_lock_); time_points_.insert({time_point, std::move(TimeTask)}); + task_count_.store(time_points_.size(), std::memory_order_relaxed); } int Timer::GetMinTimeDuration() const { + if (!HasTasks()) { + return 10000; + } LockGuard lock_guard(mutex_lock_); if (time_points_.empty()) { return 10000; @@ -33,6 +37,9 @@ int Timer::GetMinTimeDuration() const { Timer::ExpiredTimeTasks Timer::GetExpiredTimeTasks() { ExpiredTimeTasks expired_time_tasks; + if (!HasTasks()) { + return expired_time_tasks; + } { LockGuard lock_guard(mutex_lock_); TimePoints::iterator itr; @@ -42,6 +49,7 @@ Timer::ExpiredTimeTasks Timer::GetExpiredTimeTasks() { expired_time_tasks.emplace_back(itr->first, itr->second); } time_points_.erase(time_points_.begin(), itr); + task_count_.store(time_points_.size(), std::memory_order_relaxed); } return expired_time_tasks; } diff --git a/src/timer.h b/src/timer.h index 395b0abd..6118cce2 100644 --- a/src/timer.h +++ b/src/timer.h @@ -14,6 +14,7 @@ #include #include +#include #include #include "non_copyable_movable.h" @@ -44,12 +45,18 @@ class Timer : NonCopyableMovable { // Get a set of expired time tasks ExpiredTimeTasks GetExpiredTimeTasks(); + bool HasTasks() const { + return task_count_.load(std::memory_order_relaxed) > 0; + } + private: // List of all time tasks (the time points and the corresponding tasks) TimePoints time_points_; // Spin lock protecting the list of all time tasks mutable MutexLock mutex_lock_; + + std::atomic task_count_{0}; }; } // namespace taotu From 46eddb0d2052341daeae6adac9dca9f8a22a48a6 Mon Sep 17 00:00:00 2001 From: Sigma711 <1979934715@qq.com> Date: Fri, 6 Feb 2026 03:14:53 +0800 Subject: [PATCH 6/9] [src && example] Fix recv-multishot end-of-stream handling --- README.md | 6 + README_zh-Hans.md | 6 + example/pingpong/pingpong_client.cc | 11 ++ example/pingpong/pingpong_server.cc | 3 + src/connecting.cc | 297 +++++++++++++++++++++++++++- src/connecting.h | 31 +++ src/poller.cc | 261 ++++++++++++++++++++++-- src/poller.h | 45 ++++- src/timer.h | 2 +- 9 files changed, 627 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 51469b90..b16c55a9 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,12 @@ These are read at process startup by `Poller`: - Default: `65536` - Max clamp: `1048576` - Effect: max cached io_uring operation objects per `Poller` for allocation reuse. +- `TAOTU_IORING_BORROWED_BUFFER_LIMIT` + - Default: `kBufCount/2` (currently `128`) + - Clamp range: `[0, kBufCount]` + - Effect: max number of recv-multishot provided buffers that can be leased + (held past the read CQE) for the borrowed-send fast path; `0` disables + leasing (borrowed send falls back to copy). Example: diff --git a/README_zh-Hans.md b/README_zh-Hans.md index 4fab5639..36c70e13 100644 --- a/README_zh-Hans.md +++ b/README_zh-Hans.md @@ -59,6 +59,12 @@ cmake --build build_release -j - 默认值:`65536` - 最大限制:`1048576` - 作用:每个 `Poller` 可缓存复用的 io_uring 操作对象上限。 +- `TAOTU_IORING_BORROWED_BUFFER_LIMIT` + - 默认值:`kBufCount/2`(当前为 `128`) + - 限制范围:`[0, kBufCount]` + - 作用:recv-multishot 的 provided-buffer 最多允许被“借用/持有”的数量上限 + (读取 CQE 回调结束后暂不归还,用于借用直发);设为 `0` 可禁用借用 + (借用发送会自动回退为拷贝发送)。 示例: diff --git a/example/pingpong/pingpong_client.cc b/example/pingpong/pingpong_client.cc index 32c1413e..d675cc49 100644 --- a/example/pingpong/pingpong_client.cc +++ b/example/pingpong/pingpong_client.cc @@ -159,6 +159,17 @@ void Session::Stop() { client_.StopWithoutQuit(); } void Session::OnConnectionCallback(taotu::Connecting& connection) { if (connection.IsConnected()) { connection.SetTcpNoDelay(true); + connection.RegisterOnBorrowedMessageCallback( + [this](taotu::Connecting& conn, const char*, size_t len, + uint16_t buf_id, taotu::TimePoint) { + if (!conn.SendBorrowed(buf_id, len)) { + return false; + } + messages_read_.fetch_add(1, std::memory_order_relaxed); + bytes_read_.fetch_add(static_cast(len), + std::memory_order_relaxed); + return true; + }); std::shared_ptr master_client(master_client_.lock()); if (master_client) { const auto& message = master_client->GetMessage(); diff --git a/example/pingpong/pingpong_server.cc b/example/pingpong/pingpong_server.cc index 96f352e9..0b375521 100644 --- a/example/pingpong/pingpong_server.cc +++ b/example/pingpong/pingpong_server.cc @@ -41,6 +41,9 @@ void PingpongServer::Start() { server_->Start(); } void PingpongServer::OnConnectionCallback(taotu::Connecting& connection) { if (connection.IsConnected()) { connection.SetTcpNoDelay(true); + connection.RegisterOnBorrowedMessageCallback( + [](taotu::Connecting& conn, const char*, size_t len, uint16_t buf_id, + taotu::TimePoint) { return conn.SendBorrowed(buf_id, len); }); } } void PingpongServer::OnMessageCallback(taotu::Connecting& connection, diff --git a/src/connecting.cc b/src/connecting.cc index 0dfeff18..2a53803b 100644 --- a/src/connecting.cc +++ b/src/connecting.cc @@ -90,7 +90,36 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, ctx->buf_id = static_cast(cqe->flags >> IORING_CQE_BUFFER_SHIFT); } if (ctx->multishot && !more && !has_buffer) { + // recv-multishot ended without a selected buffer. This can happen on EOF, + // transient ENOBUFS (buffer pool empty), or other errors. We must not + // swallow EOF here; otherwise peer shutdown may hang in CLOSE-WAIT. connecting->read_in_flight_ = false; + if (res == 0) { + connecting->DoClosing(); + connecting->CompletePendingIo(); + if (connecting->read_ctx_ == ctx) { + connecting->read_ctx_ = nullptr; + } + ctx->self = nullptr; + op->context = nullptr; + return; + } + if (res < 0 && err == ENOBUFS) { + // For provided-buffer multishot recv, ENOBUFS means the buffer pool is + // temporarily empty. Rearm and continue. + connecting->SubmitReadOnce(); + connecting->CompletePendingIo(); + // SubmitReadOnce reuses ctx storage; keep ctx->self for the new in-flight + // op. + op->context = nullptr; + return; + } + // Other errors: report and stop reading. + if (res < 0 && err != ECANCELED) { + LOG_ERROR("OnReadComplete(multishot-end) error: fd(%d) res(%zd) err(%d)", + connecting->Fd(), res, err); + connecting->DoWithError(err); + } connecting->CompletePendingIo(); if (connecting->read_ctx_ == ctx) { connecting->read_ctx_ = nullptr; @@ -104,12 +133,24 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, } if (res > 0) { bool rearmed = false; + bool consumed = false; // Update the input buffer. if (ctx->multishot && has_buffer) { auto* buf = connecting->event_manager_->GetPoller()->GetBuffer(ctx->buf_id); if (buf) { - connecting->input_buffer_.Append(buf, static_cast(res)); + if (connecting->OnBorrowedMessageCallback_) { + consumed = connecting->OnBorrowedMessageCallback_( + *connecting, buf, static_cast(res), ctx->buf_id, + TimePoint{}); + if (consumed) { + op->skip_buf_release = true; + } else { + connecting->input_buffer_.Append(buf, static_cast(res)); + } + } else { + connecting->input_buffer_.Append(buf, static_cast(res)); + } } else { LOG_WARN("buffer id out of range(%u)", ctx->buf_id); } @@ -123,7 +164,7 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, static_cast(res) - writable); } } - if (connecting->OnMessageCallback_) { + if (!consumed && connecting->OnMessageCallback_) { connecting->OnMessageCallback_(*connecting, &connecting->input_buffer_, TimePoint{}); } @@ -216,6 +257,118 @@ void Connecting::OnWriteComplete(struct io_uring_cqe* cqe, int err = res < 0 ? -res : 0; LOG_DEBUG("Write complete fd(%d) res(%zd) err(%d)", connecting->Fd(), res, err); + if (ctx->borrowed) { + // Borrowed fixed buffer write (may contain multiple iovecs). + auto* poller = connecting->event_manager_->GetPoller(); + if (res > 0) { + size_t sent = static_cast(res); + if (connecting->borrowed_size_ == 0) { + // Queue was cleared unexpectedly; return buffers best-effort. + for (size_t i = 0; i < ctx->borrowed_iovcnt; ++i) { + poller->ReturnBuffer(ctx->borrowed_ids[i]); + } + } else { + for (size_t i = 0; + i < ctx->borrowed_iovcnt && connecting->borrowed_size_ > 0; ++i) { + BorrowedChunk* chunk = + &connecting->borrowed_queue_[connecting->borrowed_head_]; + const size_t seg_len = ctx->borrowed_lens[i]; + if (sent >= seg_len) { + sent -= seg_len; + poller->ReturnBuffer(chunk->buf_id); + connecting->borrowed_head_ = + (connecting->borrowed_head_ + 1) % kBorrowedQueueCap; + --connecting->borrowed_size_; + continue; + } + // Partial within current buffer. + chunk->off += static_cast(sent); + break; + } + } + // Prefer owned pending output (if any), then borrowed queue. + if (connecting->pending_output_buffer_.GetReadableBytes() > 0) { + connecting->output_buffer_.Swap(connecting->pending_output_buffer_); + connecting->SubmitWriteOnce(); + connecting->CompletePendingIo(); + if (connecting->write_ctx_ == ctx) { + connecting->write_ctx_ = nullptr; + } + return; + } + if (connecting->borrowed_size_ > 0) { + connecting->SubmitWriteOnce(); + connecting->CompletePendingIo(); + if (connecting->write_ctx_ == ctx) { + connecting->write_ctx_ = nullptr; + } + return; + } + if (connecting->WriteCompleteCallback_) { + connecting->WriteCompleteCallback_(*connecting); + } + if (Connecting::ConnectionState::kDisconnecting == + connecting->state_.load() && + connecting->output_buffer_.GetReadableBytes() == 0 && + connecting->pending_output_buffer_.GetReadableBytes() == 0 && + connecting->borrowed_size_ == 0) { + connecting->socketer_.ShutdownWrite(); + } + } else { + if (err == EAGAIN || err == EWOULDBLOCK || err == EINTR) { + connecting->SubmitWriteOnce(); + connecting->CompletePendingIo(); + if (connecting->write_ctx_ == ctx) { + connecting->write_ctx_ = nullptr; + } + return; + } + // Fatal error: drop the borrowed buffers in this write attempt and + // continue. + if (connecting->borrowed_size_ == 0) { + for (size_t i = 0; i < ctx->borrowed_iovcnt; ++i) { + poller->ReturnBuffer(ctx->borrowed_ids[i]); + } + } else { + for (size_t i = 0; + i < ctx->borrowed_iovcnt && connecting->borrowed_size_ > 0; ++i) { + BorrowedChunk* chunk = + &connecting->borrowed_queue_[connecting->borrowed_head_]; + poller->ReturnBuffer(chunk->buf_id); + connecting->borrowed_head_ = + (connecting->borrowed_head_ + 1) % kBorrowedQueueCap; + --connecting->borrowed_size_; + } + } + LOG_ERROR("OnWriteComplete(borrowed) error: fd(%d) res(%zd) err(%d)", + connecting->Fd(), res, err); + connecting->DoWithError(err); + if (connecting->pending_output_buffer_.GetReadableBytes() > 0) { + connecting->output_buffer_.Swap(connecting->pending_output_buffer_); + connecting->SubmitWriteOnce(); + connecting->CompletePendingIo(); + if (connecting->write_ctx_ == ctx) { + connecting->write_ctx_ = nullptr; + } + return; + } + if (connecting->borrowed_size_ > 0) { + connecting->SubmitWriteOnce(); + connecting->CompletePendingIo(); + if (connecting->write_ctx_ == ctx) { + connecting->write_ctx_ = nullptr; + } + return; + } + } + connecting->CompletePendingIo(); + if (connecting->write_ctx_ == ctx) { + connecting->write_ctx_ = nullptr; + } + ctx->self = nullptr; + op->context = nullptr; + return; + } if (res > 0) { connecting->output_buffer_.Refresh(static_cast(res)); if (connecting->output_buffer_.GetReadableBytes() > 0) { @@ -230,13 +383,22 @@ void Connecting::OnWriteComplete(struct io_uring_cqe* cqe, } return; } + if (connecting->borrowed_size_ > 0) { + connecting->SubmitWriteOnce(); + connecting->CompletePendingIo(); + if (connecting->write_ctx_ == ctx) { + connecting->write_ctx_ = nullptr; + } + return; + } if (connecting->WriteCompleteCallback_) { connecting->WriteCompleteCallback_(*connecting); } if (Connecting::ConnectionState::kDisconnecting == connecting->state_.load() && connecting->output_buffer_.GetReadableBytes() == 0 && - connecting->pending_output_buffer_.GetReadableBytes() == 0) { + connecting->pending_output_buffer_.GetReadableBytes() == 0 && + connecting->borrowed_size_ == 0) { connecting->socketer_.ShutdownWrite(); } } @@ -335,27 +497,82 @@ void Connecting::DoWriting() { } } void Connecting::SubmitWriteOnce() { - if (write_in_flight_ || output_buffer_.GetReadableBytes() == 0) { + if (write_in_flight_) { + return; + } + const size_t readable = output_buffer_.GetReadableBytes(); + if (readable == 0 && borrowed_size_ == 0) { return; } auto* ctx = &write_ctx_storage_; ctx->self = this; write_ctx_ = ctx; ctx->key = 0; - ctx->to_send = output_buffer_.GetReadableBytes(); - ctx->iov.iov_base = const_cast(output_buffer_.GetReadablePosition()); - ctx->iov.iov_len = ctx->to_send; + ctx->borrowed = false; + ctx->borrowed_iovcnt = 0; + auto* poller = event_manager_->GetPoller(); + // ctx->key = next_io_key_++; // Deprecated // write_cancel_key_ = ctx->key; write_in_flight_ = true; - uint64_t key = event_manager_->GetPoller()->SubmitWrite( - &eventer_, &ctx->iov, 1, &Connecting::OnWriteComplete, ctx, 0, nullptr); + uint64_t key = 0; + if (readable > 0) { + ctx->to_send = readable; + ctx->iov.iov_base = const_cast(output_buffer_.GetReadablePosition()); + ctx->iov.iov_len = ctx->to_send; + key = poller->SubmitWrite(&eventer_, &ctx->iov, 1, + &Connecting::OnWriteComplete, ctx, 0, nullptr); + } else { + ctx->borrowed = true; + const size_t iov_max = Poller::IoUringOp::kProvidedIovMax; + const size_t cnt = borrowed_size_ < iov_max ? borrowed_size_ : iov_max; + std::array ids{}; + std::array offs{}; + std::array lens{}; + size_t total = 0; + for (size_t i = 0; i < cnt; ++i) { + const size_t idx = (borrowed_head_ + i) % kBorrowedQueueCap; + const BorrowedChunk& chunk = borrowed_queue_[idx]; + ids[i] = chunk.buf_id; + offs[i] = static_cast(chunk.off); + lens[i] = static_cast(chunk.len - chunk.off); + ctx->borrowed_ids[i] = ids[i]; + ctx->borrowed_lens[i] = lens[i]; + total += lens[i]; + } + ctx->borrowed_iovcnt = cnt; + ctx->to_send = total; + ctx->iov.iov_base = nullptr; + ctx->iov.iov_len = 0; + key = poller->SubmitWriteProvidedBuffers( + &eventer_, ids.data(), offs.data(), lens.data(), cnt, + &Connecting::OnWriteComplete, ctx, 0, nullptr); + } if (key == 0) { write_in_flight_ = false; write_cancel_key_ = 0; if (write_ctx_ == ctx) { write_ctx_ = nullptr; } + if (ctx->borrowed && borrowed_size_ > 0 && ctx->borrowed_iovcnt > 0) { + // Fallback: keep correctness by copying and returning the buffers. + for (size_t i = 0; i < ctx->borrowed_iovcnt && borrowed_size_ > 0; ++i) { + BorrowedChunk* chunk = &borrowed_queue_[borrowed_head_]; + char* buf = poller->GetBuffer(chunk->buf_id); + if (buf && chunk->off < chunk->len) { + output_buffer_.Append(buf + chunk->off, + static_cast(chunk->len - chunk->off)); + } + poller->ReturnBuffer(chunk->buf_id); + borrowed_head_ = (borrowed_head_ + 1) % kBorrowedQueueCap; + --borrowed_size_; + } + if (output_buffer_.GetReadableBytes() > 0 && !write_in_flight_) { + SubmitWriteOnce(); + // SubmitWriteOnce reuses ctx; keep ctx->self for the new in-flight op. + return; + } + } ctx->self = nullptr; return; } @@ -485,6 +702,40 @@ void Connecting::Send(IoBuffer* io_buffer) { SubmitWriteOnce(); } +bool Connecting::SendBorrowed(uint16_t buf_id, size_t len) { + if (len == 0) { + return false; + } + if (ConnectionState::kDisconnected == state_.load()) { + return false; + } + if (ConnectionState::kConnected != state_.load()) { + return false; + } + auto* poller = event_manager_->GetPoller(); + if (!poller->BuffersRegistered()) { + return false; + } + if (poller->GetBuffer(buf_id) == nullptr) { + return false; + } + if (borrowed_size_ >= kBorrowedQueueCap) { + return false; + } + if (!poller->TryLeaseBuffer(buf_id)) { + return false; + } + const size_t tail = (borrowed_head_ + borrowed_size_) % kBorrowedQueueCap; + borrowed_queue_[tail].buf_id = buf_id; + borrowed_queue_[tail].len = static_cast(len); + borrowed_queue_[tail].off = 0; + ++borrowed_size_; + if (!write_in_flight_) { + SubmitWriteOnce(); + } + return true; +} + void Connecting::ShutDownWrite() { if (ConnectionState::kConnected == state_.load()) { SetState(ConnectionState::kDisconnecting); @@ -513,6 +764,15 @@ void Connecting::ForceCloseAfter(int64_t delay_microseconds) { } void Connecting::CancelPendingIo() { + std::array + inflight_borrowed_ids{}; + size_t inflight_borrowed_cnt = 0; + if (write_in_flight_ && write_ctx_ && write_ctx_->borrowed) { + inflight_borrowed_cnt = write_ctx_->borrowed_iovcnt; + for (size_t i = 0; i < inflight_borrowed_cnt; ++i) { + inflight_borrowed_ids[i] = write_ctx_->borrowed_ids[i]; + } + } if (read_in_flight_) { if (read_cancel_key_ != 0) { (void)event_manager_->GetPoller()->CancelOp(read_cancel_key_); @@ -537,6 +797,25 @@ void Connecting::CancelPendingIo() { CompletePendingIo(); write_in_flight_ = false; } + if (borrowed_size_ > 0) { + auto* poller = event_manager_->GetPoller(); + for (size_t i = 0; i < borrowed_size_; ++i) { + const size_t idx = (borrowed_head_ + i) % kBorrowedQueueCap; + bool skip = false; + for (size_t j = 0; j < inflight_borrowed_cnt; ++j) { + if (borrowed_queue_[idx].buf_id == inflight_borrowed_ids[j]) { + skip = true; + break; + } + } + if (skip) { + continue; + } + poller->ReturnBuffer(borrowed_queue_[idx].buf_id); + } + borrowed_head_ = 0; + borrowed_size_ = 0; + } } std::string Connecting::GetConnectionStateInfo(ConnectionState state) { diff --git a/src/connecting.h b/src/connecting.h index 7a1d3aba..01c914b4 100644 --- a/src/connecting.h +++ b/src/connecting.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,12 @@ class Connecting : NonCopyableMovable { typedef std::function NormalCallback; typedef std::function OnMessageCallback; + // For recv-multishot provided buffers. Return true to "consume/lease" the + // buffer (Poller will NOT auto-return it); the user must ensure the buffer is + // eventually returned (e.g., by calling SendBorrowed()). + typedef std::function + OnBorrowedMessageCallback; typedef std::function HighWaterMarkCallback; Connecting(EventManager* event_manager, int socket_fd, @@ -74,6 +81,9 @@ class Connecting : NonCopyableMovable { void RegisterOnMessageCallback(const OnMessageCallback& cb) { OnMessageCallback_ = cb; } + void RegisterOnBorrowedMessageCallback(const OnBorrowedMessageCallback& cb) { + OnBorrowedMessageCallback_ = cb; + } void RegisterWriteCallback(const NormalCallback& cb) { WriteCompleteCallback_ = cb; } @@ -118,6 +128,11 @@ class Connecting : NonCopyableMovable { // Send the message (asynchronously at most time) void Send(IoBuffer* io_buffer); + // Send a Poller-provided recv buffer (buf_id from IORING_CQE_F_BUFFER) + // without copying into IoBuffer. Returns false if it can't be queued and + // caller should fallback to copying. + bool SendBorrowed(uint16_t buf_id, size_t len); + // Shut down the writing end (close half == stop writing indeed) void ShutDownWrite(); @@ -161,6 +176,16 @@ class Connecting : NonCopyableMovable { struct iovec iov {}; size_t to_send{0}; uint64_t key{0}; + bool borrowed{false}; + size_t borrowed_iovcnt{0}; + std::array borrowed_ids{}; + std::array borrowed_lens{}; + }; + + struct BorrowedChunk { + uint16_t buf_id{0}; + uint32_t len{0}; + uint32_t off{0}; }; void SubmitReadOnce(); @@ -207,6 +232,7 @@ class Connecting : NonCopyableMovable { // Callback function which will be called after each reading OnMessageCallback OnMessageCallback_; + OnBorrowedMessageCallback OnBorrowedMessageCallback_; // Callback function which will be called after each real writing NormalCallback WriteCompleteCallback_; @@ -249,6 +275,11 @@ class Connecting : NonCopyableMovable { int pending_io_wait_ms_{0}; int pending_io_retries_{0}; + static constexpr size_t kBorrowedQueueCap = 8; + std::array borrowed_queue_{}; + size_t borrowed_head_{0}; + size_t borrowed_size_{0}; + // Context for any object bound std::any context_; }; diff --git a/src/poller.cc b/src/poller.cc index 93746efe..0eec8303 100644 --- a/src/poller.cc +++ b/src/poller.cc @@ -31,6 +31,7 @@ constexpr size_t kDefaultSubmitBatch = 1; constexpr size_t kMaxSubmitBatch = 256; constexpr size_t kDefaultOpPoolLimit = 1U << 16; constexpr size_t kMaxOpPoolLimit = 1U << 20; +constexpr size_t kDefaultBorrowedBufLimit = Poller::kBufCount / 2; uint32_t GetIoUringEntries() { const char* env = ::getenv("TAOTU_IORING_ENTRIES"); @@ -82,6 +83,22 @@ size_t GetOpPoolLimit() { } return static_cast(val); } + +size_t GetBorrowedBufLimit() { + const char* env = ::getenv("TAOTU_IORING_BORROWED_BUFFER_LIMIT"); + if (!env || *env == '\0') { + return kDefaultBorrowedBufLimit; + } + char* end = nullptr; + uint64_t val = ::strtoull(env, &end, 10); + if (end == env) { + return kDefaultBorrowedBufLimit; + } + if (val > Poller::kBufCount) { + return Poller::kBufCount; + } + return static_cast(val); +} } // namespace Poller::Poller() { @@ -137,6 +154,7 @@ Poller::Poller() { } submit_batch_ = GetSubmitBatch(); op_pool_limit_ = GetOpPoolLimit(); + leased_buffer_limit_ = GetBorrowedBufLimit(); op_pool_.reserve(std::min(op_pool_limit_, static_cast(2048))); struct io_uring_probe* probe = ::io_uring_get_probe_ring(&ring_); if (probe) { @@ -185,6 +203,11 @@ Poller::IoUringOp* Poller::AcquireOp(OpType type, Eventer* eventer, void* ctx, op->fd = fd; op->completion = completion; op->context_deleter = context_deleter; + op->skip_buf_release = false; + op->uses_provided_buffer = false; + op->provided_buf_count = 0; + op->provided_iovs[0].iov_base = nullptr; + op->provided_iovs[0].iov_len = 0; op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); return op; } @@ -199,6 +222,11 @@ void Poller::RecycleOp(IoUringOp* op) { op->fd = -1; op->completion = nullptr; op->context_deleter = nullptr; + op->skip_buf_release = false; + op->uses_provided_buffer = false; + op->provided_buf_count = 0; + op->provided_iovs[0].iov_base = nullptr; + op->provided_iovs[0].iov_len = 0; op->state.store(IoUringOp::State::kInit, std::memory_order_relaxed); if (op_pool_.size() < op_pool_limit_) { op_pool_.push_back(op); @@ -245,9 +273,8 @@ uint64_t Poller::SubmitRead(Eventer* eventer, struct iovec* iov, int iovcnt, LOG_ERROR("io_uring_get_sqe failed when submit read fd(%d)", eventer->Fd()); return 0; } - auto* op = - AcquireOp(OpType::kRead, eventer, ctx, eventer->Fd(), completion, - context_deleter); + auto* op = AcquireOp(OpType::kRead, eventer, ctx, eventer->Fd(), completion, + context_deleter); uint64_t token = EncodeOp(op); ::io_uring_prep_readv(sqe, eventer->Fd(), iov, iovcnt, 0); ::io_uring_sqe_set_data64(sqe, token); @@ -271,9 +298,8 @@ uint64_t Poller::SubmitReadMultishot(Eventer* eventer, int buf_group, eventer->Fd()); return 0; } - auto* op = - AcquireOp(OpType::kRead, eventer, ctx, eventer->Fd(), completion, - context_deleter); + auto* op = AcquireOp(OpType::kRead, eventer, ctx, eventer->Fd(), completion, + context_deleter); uint64_t token = EncodeOp(op); ::io_uring_prep_recv(sqe, eventer->Fd(), nullptr, 0, 0); sqe->ioprio |= IORING_RECV_MULTISHOT; @@ -307,9 +333,8 @@ uint64_t Poller::SubmitWrite(Eventer* eventer, struct iovec* iov, int iovcnt, eventer->Fd()); return 0; } - auto* op = - AcquireOp(OpType::kWrite, eventer, ctx, eventer->Fd(), completion, - context_deleter); + auto* op = AcquireOp(OpType::kWrite, eventer, ctx, eventer->Fd(), completion, + context_deleter); uint64_t token = EncodeOp(op); ::io_uring_prep_writev(sqe, eventer->Fd(), iov, iovcnt, 0); ::io_uring_sqe_set_data64(sqe, token); @@ -317,6 +342,65 @@ uint64_t Poller::SubmitWrite(Eventer* eventer, struct iovec* iov, int iovcnt, return token; } +uint64_t Poller::SubmitWriteProvidedBuffer(Eventer* eventer, uint16_t buf_id, + size_t offset, size_t len, + CompletionFn completion, void* ctx, + uint64_t key, + ContextDeleter context_deleter) { +#ifdef IORING_RECV_MULTISHOT + (void)key; + if (!buffers_registered_) { + return 0; + } + if (!buffers_) { + return 0; + } + if (buf_id >= kBufCount) { + LOG_WARN("SubmitWriteProvidedBuffer: buffer id out of range: %u", buf_id); + return 0; + } + if (len == 0 || offset >= kBufSize || (offset + len) > kBufSize) { + LOG_WARN("SubmitWriteProvidedBuffer: invalid range (id=%u off=%zu len=%zu)", + buf_id, offset, len); + return 0; + } + struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); + if (!sqe) { + SubmitPending(true); + sqe = ::io_uring_get_sqe(&ring_); + } + if (!sqe) { + LOG_ERROR("io_uring_get_sqe failed when submit provided write fd(%d)", + eventer->Fd()); + return 0; + } + auto* op = AcquireOp(OpType::kWrite, eventer, ctx, eventer->Fd(), completion, + context_deleter); + op->uses_provided_buffer = true; + op->provided_buf_count = 1; + op->provided_buf_ids[0] = buf_id; + op->provided_iovs[0].iov_base = static_cast( + buffers_.get() + (static_cast(buf_id) * kBufSize) + + static_cast(offset)); + op->provided_iovs[0].iov_len = len; + uint64_t token = EncodeOp(op); + ::io_uring_prep_writev(sqe, eventer->Fd(), op->provided_iovs.data(), 1, 0); + ::io_uring_sqe_set_data64(sqe, token); + SubmitPending(); + return token; +#else + (void)eventer; + (void)buf_id; + (void)offset; + (void)len; + (void)completion; + (void)ctx; + (void)key; + (void)context_deleter; + return 0; +#endif +} + uint64_t Poller::SubmitAccept(int fd, struct sockaddr* addr, socklen_t* addrlen, void* ctx, CompletionFn completion, uint64_t key, bool multishot, ContextDeleter context_deleter) { @@ -330,8 +414,8 @@ uint64_t Poller::SubmitAccept(int fd, struct sockaddr* addr, socklen_t* addrlen, LOG_ERROR("io_uring_get_sqe failed when submit accept fd(%d)", fd); return 0; } - auto* op = AcquireOp(OpType::kAccept, nullptr, ctx, fd, completion, - context_deleter); + auto* op = + AcquireOp(OpType::kAccept, nullptr, ctx, fd, completion, context_deleter); uint64_t token = EncodeOp(op); if (multishot && use_multishot_accept_) { #ifdef IORING_ACCEPT_MULTISHOT @@ -438,6 +522,84 @@ TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { return TimePoint::FromMicroseconds(now_us); } +uint64_t Poller::SubmitWriteProvidedBuffers( + Eventer* eventer, const uint16_t* buf_ids, const size_t* offsets, + const size_t* lens, size_t count, CompletionFn completion, void* ctx, + uint64_t key, ContextDeleter context_deleter) { +#ifdef IORING_RECV_MULTISHOT + (void)key; + if (!buffers_registered_) { + return 0; + } + if (!buffers_) { + return 0; + } + if (!buf_ids || !offsets || !lens || count == 0) { + return 0; + } + if (count > IoUringOp::kProvidedIovMax) { + LOG_WARN("SubmitWriteProvidedBuffers: too many iovecs: %zu", count); + return 0; + } + for (size_t i = 0; i < count; ++i) { + const uint16_t bid = buf_ids[i]; + const size_t off = offsets[i]; + const size_t len = lens[i]; + if (bid >= kBufCount) { + LOG_WARN("SubmitWriteProvidedBuffers: buffer id out of range: %u", bid); + return 0; + } + if (len == 0 || off >= kBufSize || (off + len) > kBufSize) { + LOG_WARN( + "SubmitWriteProvidedBuffers: invalid range (id=%u off=%zu len=%zu)", + bid, off, len); + return 0; + } + } + struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); + if (!sqe) { + SubmitPending(true); + sqe = ::io_uring_get_sqe(&ring_); + } + if (!sqe) { + LOG_ERROR("io_uring_get_sqe failed when submit provided write fd(%d)", + eventer->Fd()); + return 0; + } + auto* op = AcquireOp(OpType::kWrite, eventer, ctx, eventer->Fd(), completion, + context_deleter); + op->uses_provided_buffer = true; + op->provided_buf_count = static_cast(count); + for (size_t i = 0; i < count; ++i) { + const uint16_t bid = buf_ids[i]; + const size_t off = offsets[i]; + const size_t len = lens[i]; + op->provided_buf_ids[i] = bid; + op->provided_iovs[i].iov_base = static_cast( + buffers_.get() + (static_cast(bid) * kBufSize) + + static_cast(off)); + op->provided_iovs[i].iov_len = len; + } + uint64_t token = EncodeOp(op); + ::io_uring_prep_writev(sqe, eventer->Fd(), op->provided_iovs.data(), + static_cast(count), 0); + ::io_uring_sqe_set_data64(sqe, token); + SubmitPending(); + return token; +#else + (void)eventer; + (void)buf_ids; + (void)offsets; + (void)lens; + (void)count; + (void)completion; + (void)ctx; + (void)key; + (void)context_deleter; + return 0; +#endif +} + void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { uint64_t token = cqe->user_data; if (token == 0) { @@ -455,6 +617,7 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { reinterpret_cast(op->completion)); bool keep_op = (cqe->flags & IORING_CQE_F_MORE) != 0; if (op->completion) { + op->skip_buf_release = false; if ((op->type == OpType::kRead || op->type == OpType::kWrite) && op->eventer == nullptr) { CleanupOpContext(op); @@ -467,7 +630,9 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { } LOG_DEBUG("Call completion for type(%d)", static_cast(op->type)); op->completion(cqe, op); - ReleaseBufferFromCqe(cqe); + if (!op->skip_buf_release) { + ReleaseBufferFromCqe(cqe); + } if (!keep_op) { op->state.store(IoUringOp::State::kDone, std::memory_order_relaxed); RecycleOp(op); @@ -528,6 +693,11 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { case OpType::kNone: break; } + if (op->uses_provided_buffer && op->type == OpType::kWrite) { + for (size_t i = 0; i < op->provided_buf_count; ++i) { + ReturnBuffer(op->provided_buf_ids[i]); + } + } op->state.store(IoUringOp::State::kDone, std::memory_order_relaxed); if (!keep_op) { RecycleOp(op); @@ -555,9 +725,8 @@ void Poller::SubmitPoll(Eventer* eventer) { LOG_ERROR("io_uring_get_sqe failed when arming fd(%d)", eventer->Fd()); return; } - auto* op = - AcquireOp(OpType::kPoll, eventer, nullptr, eventer->Fd(), nullptr, - nullptr); + auto* op = AcquireOp(OpType::kPoll, eventer, nullptr, eventer->Fd(), nullptr, + nullptr); uint64_t token = EncodeOp(op); ::io_uring_prep_poll_add(sqe, eventer->Fd(), static_cast(eventer->poll_mask_)); @@ -597,6 +766,9 @@ void Poller::RegisterBuffers() { LOG_DEBUG("recv-multishot disabled by TAOTU_DISABLE_RECV_MULTISHOT."); return; } + if (!buffers_) { + buffers_.reset(new char[kBufCount * kBufSize]); + } struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { LOG_WARN("io_uring_get_sqe failed when registering buffers, skip."); @@ -604,7 +776,7 @@ void Poller::RegisterBuffers() { } struct io_uring_recvmsg_out out; // dummy to silence potential warnings (void)out; - ::io_uring_prep_provide_buffers(sqe, buffers_.data(), kBufSize, kBufCount, + ::io_uring_prep_provide_buffers(sqe, buffers_.get(), kBufSize, kBufCount, kBufferGroupId, 0); ::io_uring_sqe_set_data64(sqe, 0); // ignored CQE int ret = ::io_uring_submit(&ring_); @@ -652,6 +824,28 @@ void Poller::ReleaseBufferFromCqe(struct io_uring_cqe* cqe) { LOG_WARN("buffer id out of range: %u", bid); return; } + ReturnBuffer(bid); +#endif +} + +void Poller::ReturnBuffer(uint16_t bid) { +#ifdef IORING_RECV_MULTISHOT + if (!buffers_registered_) { + return; + } + if (!buffers_) { + return; + } + if (bid >= kBufCount) { + LOG_WARN("buffer id out of range: %u", bid); + return; + } + if (leased_buffers_[bid]) { + leased_buffers_[bid] = 0; + if (leased_buffer_count_ > 0) { + --leased_buffer_count_; + } + } struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { SubmitPending(true); @@ -661,17 +855,44 @@ void Poller::ReleaseBufferFromCqe(struct io_uring_cqe* cqe) { return; } } - ::io_uring_prep_provide_buffers(sqe, buffers_[bid], kBufSize, 1, - kBufferGroupId, bid); + ::io_uring_prep_provide_buffers( + sqe, buffers_.get() + (static_cast(bid) * kBufSize), kBufSize, 1, + kBufferGroupId, bid); ::io_uring_sqe_set_data64(sqe, 0); #endif } char* Poller::GetBuffer(uint16_t id) { - if (!buffers_registered_ || id >= kBufCount) { + if (!buffers_registered_ || !buffers_ || id >= kBufCount) { return nullptr; } - return buffers_[id]; + return buffers_.get() + (static_cast(id) * kBufSize); +} + +bool Poller::TryLeaseBuffer(uint16_t id) { +#ifdef IORING_RECV_MULTISHOT + if (!buffers_registered_) { + return false; + } + if (id >= kBufCount) { + return false; + } + if (leased_buffer_limit_ == 0) { + return false; + } + if (leased_buffers_[id]) { + return false; + } + if (leased_buffer_count_ >= leased_buffer_limit_) { + return false; + } + leased_buffers_[id] = 1; + ++leased_buffer_count_; + return true; +#else + (void)id; + return false; +#endif } } // namespace taotu diff --git a/src/poller.h b/src/poller.h index b7c711ce..53e416a0 100644 --- a/src/poller.h +++ b/src/poller.h @@ -51,6 +51,19 @@ class Poller : NonCopyableMovable { CompletionFn completion{nullptr}; ContextDeleter context_deleter{nullptr}; std::atomic state{State::kInit}; + + // Per-CQE flag: completion may set this to keep the provided recv buffer + // (IORING_CQE_F_BUFFER) until it is explicitly returned later. + bool skip_buf_release{false}; + + // For write ops that use Poller's provided buffers (see + // SubmitWriteProvidedBuffer). If the op gets canceled (completion cleared), + // Poller will return the buffer using this metadata. + static constexpr size_t kProvidedIovMax = 8; + bool uses_provided_buffer{false}; + uint8_t provided_buf_count{0}; + std::array provided_buf_ids{}; + std::array provided_iovs{}; }; Poller(); @@ -76,6 +89,17 @@ class Poller : NonCopyableMovable { CompletionFn completion = nullptr, void* ctx = nullptr, uint64_t key = 0, ContextDeleter context_deleter = nullptr); + uint64_t SubmitWriteProvidedBuffer(Eventer* eventer, uint16_t buf_id, + size_t offset, size_t len, + CompletionFn completion = nullptr, + void* ctx = nullptr, uint64_t key = 0, + ContextDeleter context_deleter = nullptr); + uint64_t SubmitWriteProvidedBuffers(Eventer* eventer, const uint16_t* buf_ids, + const size_t* offsets, const size_t* lens, + size_t count, + CompletionFn completion = nullptr, + void* ctx = nullptr, uint64_t key = 0, + ContextDeleter context_deleter = nullptr); uint64_t SubmitAccept(int fd, struct sockaddr* addr, socklen_t* addrlen, void* ctx, CompletionFn completion = nullptr, uint64_t key = 0, bool multishot = false, @@ -93,18 +117,22 @@ class Poller : NonCopyableMovable { bool UseMultishotAccept() const { return use_multishot_accept_; } bool BuffersRegistered() const { return buffers_registered_; } size_t BufferCount() const { return kBufCount; } - // The buffer pointer is only valid during the completion callback. + // The buffer pointer is valid while Poller's provided-buffer pool is + // registered. For recv-multishot buffers, it must be returned via + // ReturnBuffer() when no longer needed (Poller auto-returns it unless + // completion sets IoUringOp::skip_buf_release=true). char* GetBuffer(uint16_t id); + void ReturnBuffer(uint16_t id); + bool TryLeaseBuffer(uint16_t id); static constexpr int kBufferGroupId = 1; static constexpr size_t kBufSize = 64 * 1024; - static constexpr size_t kBufCount = 64; + static constexpr size_t kBufCount = 256; private: static uint64_t EncodeOp(IoUringOp* op); static IoUringOp* DecodeOp(uint64_t token); IoUringOp* AcquireOp(OpType type, Eventer* eventer, void* ctx, int fd, - CompletionFn completion, - ContextDeleter context_deleter); + CompletionFn completion, ContextDeleter context_deleter); void RecycleOp(IoUringOp* op); void CleanupOpContext(IoUringOp* op); @@ -120,7 +148,14 @@ class Poller : NonCopyableMovable { bool use_sqpoll_{false}; bool use_multishot_accept_{true}; bool buffers_registered_{false}; - std::array buffers_{}; + // Large provided-buffer pool for recv-multishot. Stored on heap to avoid + // inflating Poller size (EventManager is often stack-allocated in tests). + // Not value-initialized to avoid touching (zeroing) large memory on Poller + // construction. + std::unique_ptr buffers_{}; + std::array leased_buffers_{}; // 0/1 + size_t leased_buffer_count_{0}; + size_t leased_buffer_limit_{0}; std::vector op_pool_; size_t op_pool_limit_{1U << 16}; size_t submit_batch_{1}; diff --git a/src/timer.h b/src/timer.h index 6118cce2..d235cc7a 100644 --- a/src/timer.h +++ b/src/timer.h @@ -12,9 +12,9 @@ #ifndef TAOTU_SRC_TIMER_H_ #define TAOTU_SRC_TIMER_H_ +#include #include #include -#include #include #include "non_copyable_movable.h" From 51662d18f142f1dc83626701ed7d2904d6dea4d5 Mon Sep 17 00:00:00 2001 From: Sigma711 <1979934715@qq.com> Date: Sat, 7 Feb 2026 17:33:40 +0800 Subject: [PATCH 7/9] [src && build] Enable buf_ring and tune io_uring poll loop --- README.md | 13 +++- README_zh-Hans.md | 11 +++- src/event_manager.cc | 17 +++-- src/poller.cc | 148 +++++++++++++++++++++++++++++++++++-------- src/poller.h | 8 ++- src/timer.cc | 11 +++- src/timer.h | 2 + 7 files changed, 171 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index b16c55a9..eb44f52c 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,17 @@ You can configure build behavior with: These are read at process startup by `Poller`: +- `TAOTU_ENABLE_BUF_RING` + - Default: enabled (best effort) + - Effect: enable io_uring buffer ring (`buf_ring`) for provided buffers + (values non-empty and not `'0'` enable). When enabled and supported by the + running kernel/liburing, returning a recv-multishot provided buffer becomes + a user-space operation (no extra `IORING_OP_PROVIDE_BUFFERS` SQE). +- `TAOTU_DISABLE_BUF_RING` + - Default: disabled + - Effect: force-disable `buf_ring` provided-buffer path and fallback to the + legacy `IORING_OP_PROVIDE_BUFFERS` path (`non-empty` and not `'0'` disables), + takes precedence over `TAOTU_ENABLE_BUF_RING`. - `TAOTU_IORING_ENTRIES` - Default: `32768` - Clamp range: `[1024, 32768]` @@ -52,7 +63,7 @@ These are read at process startup by `Poller`: - Default: disabled - Effect: disable recv-multishot + provided-buffer registration path. - `TAOTU_IORING_SUBMIT_BATCH` - - Default: `1` + - Default: `16` - Max clamp: `256` - Effect: submit SQEs when pending count reaches this threshold (or when forced by the loop). - `TAOTU_IORING_OP_POOL_LIMIT` diff --git a/README_zh-Hans.md b/README_zh-Hans.md index 36c70e13..8a29424f 100644 --- a/README_zh-Hans.md +++ b/README_zh-Hans.md @@ -38,6 +38,15 @@ cmake --build build_release -j 以下变量会在进程启动时由 `Poller` 读取: +- `TAOTU_ENABLE_BUF_RING` + - 默认:开启(尽力启用) + - 作用:启用 io_uring 的 buffer ring(`buf_ring`)作为 provided-buffer 的回收机制 + (值非空且不为 `'0'` 时启用)。当开启且系统内核/liburing 支持时,recv-multishot + 的 buffer 归还会变成纯用户态操作(不再需要额外提交 `IORING_OP_PROVIDE_BUFFERS` SQE)。 +- `TAOTU_DISABLE_BUF_RING` + - 默认:关闭 + - 作用:强制关闭 `buf_ring` 的 provided-buffer 路径,回退到旧的 + `IORING_OP_PROVIDE_BUFFERS` 机制(值非空且不为 `'0'` 时禁用),优先级高于 `TAOTU_ENABLE_BUF_RING`。 - `TAOTU_IORING_ENTRIES` - 默认值:`32768` - 限制范围:`[1024, 32768]` @@ -52,7 +61,7 @@ cmake --build build_release -j - 默认:关闭 - 作用:关闭 recv-multishot + provided-buffer 注册路径。 - `TAOTU_IORING_SUBMIT_BATCH` - - 默认值:`1` + - 默认值:`16` - 最大限制:`256` - 作用:当待提交 SQE 数达到该阈值时触发提交(事件循环中也可能被强制提交)。 - `TAOTU_IORING_OP_POOL_LIMIT` diff --git a/src/event_manager.cc b/src/event_manager.cc index a9c5b75d..cd2fc2c9 100644 --- a/src/event_manager.cc +++ b/src/event_manager.cc @@ -200,11 +200,15 @@ void EventManager::Quit() { void EventManager::Start() { should_quit_.store(false); LOG_DEBUG("The event loop in thread(%lu) is starting.", ::pthread_self()); + // Cache "now" across loop body to avoid repeated gettimeofday() in Timer. + TimePoint loop_now; while (!should_quit_.load()) { + int timeout_ms = timer_.GetMinTimeDuration(loop_now); auto return_time = - poller_.Poll(timer_.GetMinTimeDuration(), - &active_events_); // Return time is the time point of - // the end of this polling + poller_.Poll(timeout_ms, &active_events_); // Return time is the time + // point of the end of this + // polling + loop_now = return_time; DoWithActiveTasks(return_time); DoExpiredTimeTasks(return_time); DestroyClosedConnections(); @@ -238,8 +242,8 @@ void EventManager::Start() { if (!has_connections && !has_closed) { break; } - poller_.Poll(1, &active_events_); - DoWithActiveTasks(TimePoint{}); + TimePoint return_time = poller_.Poll(1, &active_events_); + DoWithActiveTasks(return_time); DestroyClosedConnections(); } { @@ -258,7 +262,8 @@ void EventManager::DoExpiredTimeTasks(const TimePoint& return_time) { if (!timer_.HasTasks()) { return; } - Timer::ExpiredTimeTasks expired_time_tasks = timer_.GetExpiredTimeTasks(); + Timer::ExpiredTimeTasks expired_time_tasks = + timer_.GetExpiredTimeTasks(return_time); for (auto& expired_time_task : expired_time_tasks) { auto ExpiredTimeCallback = expired_time_task.second; if (ExpiredTimeCallback) { diff --git a/src/poller.cc b/src/poller.cc index 0eec8303..2f82b5e0 100644 --- a/src/poller.cc +++ b/src/poller.cc @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -27,12 +28,25 @@ namespace taotu { namespace { constexpr uint32_t kDefaultEntries = 32768; constexpr uint32_t kMinEntries = 1024; -constexpr size_t kDefaultSubmitBatch = 1; +constexpr size_t kDefaultSubmitBatch = 16; constexpr size_t kMaxSubmitBatch = 256; constexpr size_t kDefaultOpPoolLimit = 1U << 16; constexpr size_t kMaxOpPoolLimit = 1U << 20; constexpr size_t kDefaultBorrowedBufLimit = Poller::kBufCount / 2; +bool WantBufRing() { + const char* disable = ::getenv("TAOTU_DISABLE_BUF_RING"); + if (disable && *disable != '\0' && *disable != '0') { + return false; + } + const char* enable = ::getenv("TAOTU_ENABLE_BUF_RING"); + if (enable && *enable != '\0' && *enable != '0') { + return true; + } + // Default on: buf_ring avoids per-buffer PROVIDE_BUFFERS SQEs. + return true; +} + uint32_t GetIoUringEntries() { const char* env = ::getenv("TAOTU_IORING_ENTRIES"); if (!env || *env == '\0') { @@ -462,7 +476,6 @@ bool Poller::CancelOp(uint64_t user_data_key) { return true; } TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { - SubmitPending(true); struct __kernel_timespec ts {}; struct __kernel_timespec* tsp = nullptr; if (timeout >= 0) { @@ -472,54 +485,69 @@ TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { } struct io_uring_cqe* cqe = nullptr; - int ret = ::io_uring_wait_cqe_timeout(&ring_, &cqe, tsp); + int ret = ::io_uring_peek_cqe(&ring_, &cqe); + if (ret == -EAGAIN) { + // Submit pending SQEs and wait for at least one CQE in a single syscall. + ret = ::io_uring_submit_and_wait_timeout(&ring_, &cqe, 1, tsp, nullptr); + } if (ret == -ETIME) { - SubmitPending(true); return TimePoint::FromMicroseconds(TimePoint::FNowRaw()); } if (ret < 0) { - LOG_ERROR("io_uring_wait_cqe_timeout failed: %s", ::strerror(-ret)); - SubmitPending(true); + LOG_ERROR("io_uring wait failed: %s", ::strerror(-ret)); + return TimePoint::FromMicroseconds(TimePoint::FNowRaw()); + } + if (cqe == nullptr) { + // Defensive: should not happen, but avoid returning an uninitialized time. return TimePoint::FromMicroseconds(TimePoint::FNowRaw()); } int64_t now_us = TimePoint::FNowRaw(); - const int64_t start_us = now_us; + TimePoint now = TimePoint::FromMicroseconds(now_us); TimePoint::NowCacheGuard now_cache(now_us); - HandleCqe(cqe, active_eventers); + HandleCqe(cqe, now, active_eventers); ::io_uring_cqe_seen(&ring_, cqe); // Continue draining all completed CQEs. const size_t limit = cqe_batch_limit_; const int64_t budget_us = cqe_time_budget_us_; - size_t since_last_clock_check = 0; + const int64_t start_us = now_us; size_t handled = 1; while (limit == 0 || handled < limit) { - // Avoid querying time for every CQE. We only refresh cached "now" - // periodically, which keeps timer precision within one CQE batch chunk. - if (budget_us > 0 && (++since_last_clock_check & 31U) == 0U) { + struct io_uring_cqe* cqes[64]; + unsigned want = static_cast(sizeof(cqes) / sizeof(cqes[0])); + if (limit != 0) { + size_t remaining = limit - handled; + if (remaining < want) { + want = static_cast(remaining); + } + } + unsigned got = ::io_uring_peek_batch_cqe(&ring_, cqes, want); + if (got == 0) { + break; + } + for (unsigned i = 0; i < got; ++i) { + HandleCqe(cqes[i], now, active_eventers); + ::io_uring_cqe_seen(&ring_, cqes[i]); + ++handled; + if (limit != 0 && handled >= limit) { + break; + } + } + if (budget_us > 0) { + // Don't query time per CQE. Only check once per drained chunk. now_us = TimePoint::FNowRaw(); now_cache.Update(now_us); + now = TimePoint::FromMicroseconds(now_us); if ((now_us - start_us) >= budget_us) { break; } } - ret = ::io_uring_peek_cqe(&ring_, &cqe); - if (ret == -EAGAIN) { - break; - } else if (ret < 0) { - LOG_ERROR("io_uring_peek_cqe failed: %s", ::strerror(-ret)); - break; - } - HandleCqe(cqe, active_eventers); - ::io_uring_cqe_seen(&ring_, cqe); - ++handled; } - now_us = TimePoint::FNowRaw(); - now_cache.Update(now_us); - SubmitPending(true); - return TimePoint::FromMicroseconds(now_us); + // Best-effort flush: allow batching via submit_batch_. + SubmitPending(false); + return now; } uint64_t Poller::SubmitWriteProvidedBuffers( @@ -600,7 +628,8 @@ uint64_t Poller::SubmitWriteProvidedBuffers( #endif } -void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { +void Poller::HandleCqe(struct io_uring_cqe* cqe, const TimePoint& now, + EventerList* active_eventers) { uint64_t token = cqe->user_data; if (token == 0) { ReleaseBufferFromCqe(cqe); @@ -669,7 +698,7 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers) { Eventer::ReadResult rr{.bytes = cqe->res, .err = cqe->res < 0 ? -cqe->res : 0}; ReleaseBufferFromCqe(cqe); - eventer->OnReadDone(rr, TimePoint{}); + eventer->OnReadDone(rr, now); break; } case OpType::kWrite: { @@ -769,6 +798,52 @@ void Poller::RegisterBuffers() { if (!buffers_) { buffers_.reset(new char[kBufCount * kBufSize]); } + + // Prefer buf_ring if available. It makes buffer return a pure user-space op, + // instead of emitting an IORING_OP_PROVIDE_BUFFERS SQE per CQE. + if (WantBufRing()) { + int ret = 0; + unsigned entries = static_cast(kBufCount); + // Must be power-of-two for mask helpers. + if ((entries & (entries - 1U)) != 0) { + // Next pow2. + unsigned p = 1; + while (p < entries) { + p <<= 1U; + } + entries = p; + } + struct io_uring_buf_ring* br = + ::io_uring_setup_buf_ring(&ring_, entries, kBufferGroupId, 0, &ret); + if (ret == 0 && br != nullptr) { + use_buf_ring_ = true; + buf_ring_ = br; + buf_ring_entries_ = entries; + buf_ring_mask_ = ::io_uring_buf_ring_mask(entries); + ::io_uring_buf_ring_init(buf_ring_); + for (unsigned i = 0; i < static_cast(kBufCount); ++i) { + ::io_uring_buf_ring_add( + buf_ring_, buffers_.get() + (static_cast(i) * kBufSize), + kBufSize, i, buf_ring_mask_, i); + } + ::io_uring_buf_ring_advance(buf_ring_, static_cast(kBufCount)); + buffers_registered_ = true; + LOG_DEBUG("buf_ring enabled for provided buffers (entries=%u, bgid=%d).", + buf_ring_entries_, kBufferGroupId); + return; + } + if (ret != 0) { + LOG_WARN("buf_ring unavailable, fallback to PROVIDE_BUFFERS: %s", + ::strerror(-ret)); + } else { + LOG_WARN("buf_ring setup returned null, fallback to PROVIDE_BUFFERS."); + } + use_buf_ring_ = false; + buf_ring_ = nullptr; + buf_ring_entries_ = 0; + buf_ring_mask_ = 0; + } + struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { LOG_WARN("io_uring_get_sqe failed when registering buffers, skip."); @@ -800,6 +875,16 @@ void Poller::UnregisterBuffers() { if (!buffers_registered_) { return; } + if (use_buf_ring_ && buf_ring_ != nullptr) { + ::io_uring_free_buf_ring(&ring_, buf_ring_, buf_ring_entries_, + kBufferGroupId); + buf_ring_ = nullptr; + buf_ring_entries_ = 0; + buf_ring_mask_ = 0; + use_buf_ring_ = false; + buffers_registered_ = false; + return; + } struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { return; @@ -846,6 +931,13 @@ void Poller::ReturnBuffer(uint16_t bid) { --leased_buffer_count_; } } + if (use_buf_ring_ && buf_ring_ != nullptr) { + ::io_uring_buf_ring_add( + buf_ring_, buffers_.get() + (static_cast(bid) * kBufSize), + kBufSize, bid, buf_ring_mask_, 0); + ::io_uring_buf_ring_advance(buf_ring_, 1); + return; + } struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); if (!sqe) { SubmitPending(true); diff --git a/src/poller.h b/src/poller.h index 53e416a0..35d9bb3b 100644 --- a/src/poller.h +++ b/src/poller.h @@ -116,6 +116,7 @@ class Poller : NonCopyableMovable { bool UseSqpoll() const { return use_sqpoll_; } bool UseMultishotAccept() const { return use_multishot_accept_; } bool BuffersRegistered() const { return buffers_registered_; } + bool UseBufRing() const { return use_buf_ring_; } size_t BufferCount() const { return kBufCount; } // The buffer pointer is valid while Poller's provided-buffer pool is // registered. For recv-multishot buffers, it must be returned via @@ -138,7 +139,8 @@ class Poller : NonCopyableMovable { void SubmitPoll(Eventer* eventer); void CancelPoll(Eventer* eventer); - void HandleCqe(struct io_uring_cqe* cqe, EventerList* active_eventers); + void HandleCqe(struct io_uring_cqe* cqe, const TimePoint& now, + EventerList* active_eventers); void SubmitPending(bool force = false); void RegisterBuffers(); void UnregisterBuffers(); @@ -148,6 +150,10 @@ class Poller : NonCopyableMovable { bool use_sqpoll_{false}; bool use_multishot_accept_{true}; bool buffers_registered_{false}; + bool use_buf_ring_{false}; + struct io_uring_buf_ring* buf_ring_{nullptr}; + unsigned buf_ring_entries_{0}; + unsigned buf_ring_mask_{0}; // Large provided-buffer pool for recv-multishot. Stored on heap to avoid // inflating Poller size (EventManager is often stack-allocated in tests). // Not value-initialized to avoid touching (zeroing) large memory on Poller diff --git a/src/timer.cc b/src/timer.cc index de98c55f..aea34036 100644 --- a/src/timer.cc +++ b/src/timer.cc @@ -22,6 +22,10 @@ void Timer::AddTimeTask(const TimePoint& time_point, TimeCallback TimeTask) { } int Timer::GetMinTimeDuration() const { + return GetMinTimeDuration(TimePoint{}); +} + +int Timer::GetMinTimeDuration(const TimePoint& now) const { if (!HasTasks()) { return 10000; } @@ -30,12 +34,16 @@ int Timer::GetMinTimeDuration() const { return 10000; } int duration = static_cast(time_points_.begin()->first.GetMillisecond() - - TimePoint().GetMillisecond()); + now.GetMillisecond()); return duration > 0 ? duration : 0; // Could not give a negative value of the duration } Timer::ExpiredTimeTasks Timer::GetExpiredTimeTasks() { + return GetExpiredTimeTasks(TimePoint{}); +} + +Timer::ExpiredTimeTasks Timer::GetExpiredTimeTasks(const TimePoint& now) { ExpiredTimeTasks expired_time_tasks; if (!HasTasks()) { return expired_time_tasks; @@ -43,7 +51,6 @@ Timer::ExpiredTimeTasks Timer::GetExpiredTimeTasks() { { LockGuard lock_guard(mutex_lock_); TimePoints::iterator itr; - TimePoint now; for (itr = time_points_.begin(); itr != time_points_.end() && itr->first <= now; ++itr) { expired_time_tasks.emplace_back(itr->first, itr->second); diff --git a/src/timer.h b/src/timer.h index d235cc7a..d982afd7 100644 --- a/src/timer.h +++ b/src/timer.h @@ -41,9 +41,11 @@ class Timer : NonCopyableMovable { // Get minimum time duration for next io_uring wait int GetMinTimeDuration() const; + int GetMinTimeDuration(const TimePoint& now) const; // Get a set of expired time tasks ExpiredTimeTasks GetExpiredTimeTasks(); + ExpiredTimeTasks GetExpiredTimeTasks(const TimePoint& now); bool HasTasks() const { return task_count_.load(std::memory_order_relaxed) > 0; From 4aedc0e026bdac1be3afda36dfbcb36e7d857096 Mon Sep 17 00:00:00 2001 From: Sigma711 <1979934715@qq.com> Date: Mon, 9 Mar 2026 14:04:05 +0800 Subject: [PATCH 8/9] [src] Reduce multishot read fast-path overhead --- src/connecting.cc | 65 +++++++++++++++++++++++++---------------------- src/connecting.h | 13 ++++------ 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/connecting.cc b/src/connecting.cc index 2a53803b..160686bd 100644 --- a/src/connecting.cc +++ b/src/connecting.cc @@ -426,39 +426,21 @@ void Connecting::SubmitReadOnce() { auto* ctx = &read_ctx_storage_; ctx->self = this; read_ctx_ = ctx; - ctx->extra_buffer = nullptr; - ctx->extra_len = 0; ctx->key = 0; - ctx->multishot = false; ctx->buf_id = 0; - ctx->writable = input_buffer_.GetWritableBytes(); - ctx->iov[0].iov_base = const_cast(input_buffer_.GetWritablePosition()); - ctx->iov[0].iov_len = ctx->writable; - if (!event_manager_->GetPoller()->BuffersRegistered()) { - if (!extra_read_buffer_) { - extra_read_buffer_.reset(new char[64 * 1024]); - } - ctx->extra_buffer = extra_read_buffer_.get(); - ctx->extra_len = 64 * 1024; - } - ctx->iov[1].iov_base = ctx->extra_buffer; - ctx->iov[1].iov_len = ctx->extra_len; - int iovcnt; - if (ctx->writable == 0) { - iovcnt = 1; - ctx->iov[0] = ctx->iov[1]; - } else { - iovcnt = (ctx->extra_len > 0 && ctx->writable < ctx->extra_len) ? 2 : 1; - } - // ctx->key = next_io_key_++; // Deprecated: let Poller generate key - // read_cancel_key_ = ctx->key; // Do not set yet + auto* poller = event_manager_->GetPoller(); read_in_flight_ = true; #ifdef IORING_RECV_MULTISHOT - if (event_manager_->GetPoller()->BuffersRegistered()) { + if (poller->BuffersRegistered()) { + // recv-multishot uses poller-owned provided buffers directly, so it does + // not need the per-submit iovec/extra-buffer setup used by one-shot reads. ctx->multishot = true; - uint64_t key = event_manager_->GetPoller()->SubmitReadMultishot( - &eventer_, Poller::kBufferGroupId, &Connecting::OnReadComplete, ctx, 0, - nullptr); + ctx->writable = 0; + ctx->extra_buffer = nullptr; + ctx->extra_len = 0; + uint64_t key = poller->SubmitReadMultishot(&eventer_, Poller::kBufferGroupId, + &Connecting::OnReadComplete, ctx, + 0, nullptr); if (key == 0) { read_in_flight_ = false; read_cancel_key_ = 0; @@ -475,9 +457,30 @@ void Connecting::SubmitReadOnce() { } #endif ctx->multishot = false; - uint64_t key = event_manager_->GetPoller()->SubmitRead( - &eventer_, ctx->iov.data(), iovcnt, &Connecting::OnReadComplete, ctx, 0, - nullptr); + ctx->extra_buffer = nullptr; + ctx->extra_len = 0; + ctx->writable = input_buffer_.GetWritableBytes(); + ctx->iov[0].iov_base = const_cast(input_buffer_.GetWritablePosition()); + ctx->iov[0].iov_len = ctx->writable; + if (!poller->BuffersRegistered()) { + if (!extra_read_buffer_) { + extra_read_buffer_.reset(new char[64 * 1024]); + } + ctx->extra_buffer = extra_read_buffer_.get(); + ctx->extra_len = 64 * 1024; + } + ctx->iov[1].iov_base = ctx->extra_buffer; + ctx->iov[1].iov_len = ctx->extra_len; + int iovcnt; + if (ctx->writable == 0) { + iovcnt = 1; + ctx->iov[0] = ctx->iov[1]; + } else { + iovcnt = (ctx->extra_len > 0 && ctx->writable < ctx->extra_len) ? 2 : 1; + } + uint64_t key = poller->SubmitRead(&eventer_, ctx->iov.data(), iovcnt, + &Connecting::OnReadComplete, ctx, 0, + nullptr); if (key == 0) { read_in_flight_ = false; read_cancel_key_ = 0; diff --git a/src/connecting.h b/src/connecting.h index 01c914b4..ccda7d2f 100644 --- a/src/connecting.h +++ b/src/connecting.h @@ -142,9 +142,7 @@ class Connecting : NonCopyableMovable { bool IsDisconnected() const { return ConnectionState::kDisconnected == state_.load(); } - bool HasPendingIo() const { - return pending_io_.load(std::memory_order_relaxed) > 0; - } + bool HasPendingIo() const { return pending_io_ > 0; } int GetPendingIoWaitMs() const { return pending_io_wait_ms_; } int GetPendingIoRetries() const { return pending_io_retries_; } void BumpPendingIoWait(int delta_ms = 1) { @@ -193,10 +191,9 @@ class Connecting : NonCopyableMovable { static void OnReadComplete(struct io_uring_cqe* cqe, Poller::IoUringOp* op); static void OnWriteComplete(struct io_uring_cqe* cqe, Poller::IoUringOp* op); void CancelPendingIo(); - void BumpPendingIo() { pending_io_.fetch_add(1, std::memory_order_relaxed); } - void CompletePendingIo() { - pending_io_.fetch_sub(1, std::memory_order_relaxed); - } + // Pending I/O bookkeeping is confined to the owning EventManager thread. + void BumpPendingIo() { ++pending_io_; } + void CompletePendingIo() { --pending_io_; } enum class ConnectionState { kDisconnected, @@ -261,7 +258,7 @@ class Connecting : NonCopyableMovable { // Connection state (atomic) std::atomic state_; - std::atomic pending_io_{0}; + int pending_io_{0}; bool read_in_flight_{false}; bool write_in_flight_{false}; ReadContext* read_ctx_{nullptr}; From 989948022d953dc880ab3375c3e7d547bce1b255 Mon Sep 17 00:00:00 2001 From: Sigma711 <1979934715@qq.com> Date: Mon, 9 Mar 2026 18:36:45 +0800 Subject: [PATCH 9/9] [src && example] Simplify provided-buffer path and trim retry logs --- example/pingpong/pingpong_client.cc | 11 -- example/pingpong/pingpong_server.cc | 3 - src/connecting.cc | 274 ++-------------------------- src/connecting.h | 30 --- src/connector.cc | 42 +++-- src/poller.cc | 206 +-------------------- src/poller.h | 35 +--- 7 files changed, 50 insertions(+), 551 deletions(-) diff --git a/example/pingpong/pingpong_client.cc b/example/pingpong/pingpong_client.cc index d675cc49..32c1413e 100644 --- a/example/pingpong/pingpong_client.cc +++ b/example/pingpong/pingpong_client.cc @@ -159,17 +159,6 @@ void Session::Stop() { client_.StopWithoutQuit(); } void Session::OnConnectionCallback(taotu::Connecting& connection) { if (connection.IsConnected()) { connection.SetTcpNoDelay(true); - connection.RegisterOnBorrowedMessageCallback( - [this](taotu::Connecting& conn, const char*, size_t len, - uint16_t buf_id, taotu::TimePoint) { - if (!conn.SendBorrowed(buf_id, len)) { - return false; - } - messages_read_.fetch_add(1, std::memory_order_relaxed); - bytes_read_.fetch_add(static_cast(len), - std::memory_order_relaxed); - return true; - }); std::shared_ptr master_client(master_client_.lock()); if (master_client) { const auto& message = master_client->GetMessage(); diff --git a/example/pingpong/pingpong_server.cc b/example/pingpong/pingpong_server.cc index 0b375521..96f352e9 100644 --- a/example/pingpong/pingpong_server.cc +++ b/example/pingpong/pingpong_server.cc @@ -41,9 +41,6 @@ void PingpongServer::Start() { server_->Start(); } void PingpongServer::OnConnectionCallback(taotu::Connecting& connection) { if (connection.IsConnected()) { connection.SetTcpNoDelay(true); - connection.RegisterOnBorrowedMessageCallback( - [](taotu::Connecting& conn, const char*, size_t len, uint16_t buf_id, - taotu::TimePoint) { return conn.SendBorrowed(buf_id, len); }); } } void PingpongServer::OnMessageCallback(taotu::Connecting& connection, diff --git a/src/connecting.cc b/src/connecting.cc index 160686bd..28a0410e 100644 --- a/src/connecting.cc +++ b/src/connecting.cc @@ -133,24 +133,12 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, } if (res > 0) { bool rearmed = false; - bool consumed = false; // Update the input buffer. if (ctx->multishot && has_buffer) { auto* buf = connecting->event_manager_->GetPoller()->GetBuffer(ctx->buf_id); if (buf) { - if (connecting->OnBorrowedMessageCallback_) { - consumed = connecting->OnBorrowedMessageCallback_( - *connecting, buf, static_cast(res), ctx->buf_id, - TimePoint{}); - if (consumed) { - op->skip_buf_release = true; - } else { - connecting->input_buffer_.Append(buf, static_cast(res)); - } - } else { - connecting->input_buffer_.Append(buf, static_cast(res)); - } + connecting->input_buffer_.Append(buf, static_cast(res)); } else { LOG_WARN("buffer id out of range(%u)", ctx->buf_id); } @@ -164,7 +152,7 @@ void Connecting::OnReadComplete(struct io_uring_cqe* cqe, static_cast(res) - writable); } } - if (!consumed && connecting->OnMessageCallback_) { + if (connecting->OnMessageCallback_) { connecting->OnMessageCallback_(*connecting, &connecting->input_buffer_, TimePoint{}); } @@ -257,118 +245,6 @@ void Connecting::OnWriteComplete(struct io_uring_cqe* cqe, int err = res < 0 ? -res : 0; LOG_DEBUG("Write complete fd(%d) res(%zd) err(%d)", connecting->Fd(), res, err); - if (ctx->borrowed) { - // Borrowed fixed buffer write (may contain multiple iovecs). - auto* poller = connecting->event_manager_->GetPoller(); - if (res > 0) { - size_t sent = static_cast(res); - if (connecting->borrowed_size_ == 0) { - // Queue was cleared unexpectedly; return buffers best-effort. - for (size_t i = 0; i < ctx->borrowed_iovcnt; ++i) { - poller->ReturnBuffer(ctx->borrowed_ids[i]); - } - } else { - for (size_t i = 0; - i < ctx->borrowed_iovcnt && connecting->borrowed_size_ > 0; ++i) { - BorrowedChunk* chunk = - &connecting->borrowed_queue_[connecting->borrowed_head_]; - const size_t seg_len = ctx->borrowed_lens[i]; - if (sent >= seg_len) { - sent -= seg_len; - poller->ReturnBuffer(chunk->buf_id); - connecting->borrowed_head_ = - (connecting->borrowed_head_ + 1) % kBorrowedQueueCap; - --connecting->borrowed_size_; - continue; - } - // Partial within current buffer. - chunk->off += static_cast(sent); - break; - } - } - // Prefer owned pending output (if any), then borrowed queue. - if (connecting->pending_output_buffer_.GetReadableBytes() > 0) { - connecting->output_buffer_.Swap(connecting->pending_output_buffer_); - connecting->SubmitWriteOnce(); - connecting->CompletePendingIo(); - if (connecting->write_ctx_ == ctx) { - connecting->write_ctx_ = nullptr; - } - return; - } - if (connecting->borrowed_size_ > 0) { - connecting->SubmitWriteOnce(); - connecting->CompletePendingIo(); - if (connecting->write_ctx_ == ctx) { - connecting->write_ctx_ = nullptr; - } - return; - } - if (connecting->WriteCompleteCallback_) { - connecting->WriteCompleteCallback_(*connecting); - } - if (Connecting::ConnectionState::kDisconnecting == - connecting->state_.load() && - connecting->output_buffer_.GetReadableBytes() == 0 && - connecting->pending_output_buffer_.GetReadableBytes() == 0 && - connecting->borrowed_size_ == 0) { - connecting->socketer_.ShutdownWrite(); - } - } else { - if (err == EAGAIN || err == EWOULDBLOCK || err == EINTR) { - connecting->SubmitWriteOnce(); - connecting->CompletePendingIo(); - if (connecting->write_ctx_ == ctx) { - connecting->write_ctx_ = nullptr; - } - return; - } - // Fatal error: drop the borrowed buffers in this write attempt and - // continue. - if (connecting->borrowed_size_ == 0) { - for (size_t i = 0; i < ctx->borrowed_iovcnt; ++i) { - poller->ReturnBuffer(ctx->borrowed_ids[i]); - } - } else { - for (size_t i = 0; - i < ctx->borrowed_iovcnt && connecting->borrowed_size_ > 0; ++i) { - BorrowedChunk* chunk = - &connecting->borrowed_queue_[connecting->borrowed_head_]; - poller->ReturnBuffer(chunk->buf_id); - connecting->borrowed_head_ = - (connecting->borrowed_head_ + 1) % kBorrowedQueueCap; - --connecting->borrowed_size_; - } - } - LOG_ERROR("OnWriteComplete(borrowed) error: fd(%d) res(%zd) err(%d)", - connecting->Fd(), res, err); - connecting->DoWithError(err); - if (connecting->pending_output_buffer_.GetReadableBytes() > 0) { - connecting->output_buffer_.Swap(connecting->pending_output_buffer_); - connecting->SubmitWriteOnce(); - connecting->CompletePendingIo(); - if (connecting->write_ctx_ == ctx) { - connecting->write_ctx_ = nullptr; - } - return; - } - if (connecting->borrowed_size_ > 0) { - connecting->SubmitWriteOnce(); - connecting->CompletePendingIo(); - if (connecting->write_ctx_ == ctx) { - connecting->write_ctx_ = nullptr; - } - return; - } - } - connecting->CompletePendingIo(); - if (connecting->write_ctx_ == ctx) { - connecting->write_ctx_ = nullptr; - } - ctx->self = nullptr; - op->context = nullptr; - return; - } if (res > 0) { connecting->output_buffer_.Refresh(static_cast(res)); if (connecting->output_buffer_.GetReadableBytes() > 0) { @@ -383,22 +259,13 @@ void Connecting::OnWriteComplete(struct io_uring_cqe* cqe, } return; } - if (connecting->borrowed_size_ > 0) { - connecting->SubmitWriteOnce(); - connecting->CompletePendingIo(); - if (connecting->write_ctx_ == ctx) { - connecting->write_ctx_ = nullptr; - } - return; - } if (connecting->WriteCompleteCallback_) { connecting->WriteCompleteCallback_(*connecting); } if (Connecting::ConnectionState::kDisconnecting == connecting->state_.load() && connecting->output_buffer_.GetReadableBytes() == 0 && - connecting->pending_output_buffer_.GetReadableBytes() == 0 && - connecting->borrowed_size_ == 0) { + connecting->pending_output_buffer_.GetReadableBytes() == 0) { connecting->socketer_.ShutdownWrite(); } } @@ -438,9 +305,9 @@ void Connecting::SubmitReadOnce() { ctx->writable = 0; ctx->extra_buffer = nullptr; ctx->extra_len = 0; - uint64_t key = poller->SubmitReadMultishot(&eventer_, Poller::kBufferGroupId, - &Connecting::OnReadComplete, ctx, - 0, nullptr); + uint64_t key = poller->SubmitReadMultishot( + &eventer_, Poller::kBufferGroupId, &Connecting::OnReadComplete, ctx, 0, + nullptr); if (key == 0) { read_in_flight_ = false; read_cancel_key_ = 0; @@ -478,9 +345,9 @@ void Connecting::SubmitReadOnce() { } else { iovcnt = (ctx->extra_len > 0 && ctx->writable < ctx->extra_len) ? 2 : 1; } - uint64_t key = poller->SubmitRead(&eventer_, ctx->iov.data(), iovcnt, - &Connecting::OnReadComplete, ctx, 0, - nullptr); + uint64_t key = + poller->SubmitRead(&eventer_, ctx->iov.data(), iovcnt, + &Connecting::OnReadComplete, ctx, 0, nullptr); if (key == 0) { read_in_flight_ = false; read_cancel_key_ = 0; @@ -504,78 +371,29 @@ void Connecting::SubmitWriteOnce() { return; } const size_t readable = output_buffer_.GetReadableBytes(); - if (readable == 0 && borrowed_size_ == 0) { + if (readable == 0) { return; } auto* ctx = &write_ctx_storage_; ctx->self = this; write_ctx_ = ctx; ctx->key = 0; - ctx->borrowed = false; - ctx->borrowed_iovcnt = 0; auto* poller = event_manager_->GetPoller(); // ctx->key = next_io_key_++; // Deprecated // write_cancel_key_ = ctx->key; write_in_flight_ = true; - uint64_t key = 0; - if (readable > 0) { - ctx->to_send = readable; - ctx->iov.iov_base = const_cast(output_buffer_.GetReadablePosition()); - ctx->iov.iov_len = ctx->to_send; - key = poller->SubmitWrite(&eventer_, &ctx->iov, 1, - &Connecting::OnWriteComplete, ctx, 0, nullptr); - } else { - ctx->borrowed = true; - const size_t iov_max = Poller::IoUringOp::kProvidedIovMax; - const size_t cnt = borrowed_size_ < iov_max ? borrowed_size_ : iov_max; - std::array ids{}; - std::array offs{}; - std::array lens{}; - size_t total = 0; - for (size_t i = 0; i < cnt; ++i) { - const size_t idx = (borrowed_head_ + i) % kBorrowedQueueCap; - const BorrowedChunk& chunk = borrowed_queue_[idx]; - ids[i] = chunk.buf_id; - offs[i] = static_cast(chunk.off); - lens[i] = static_cast(chunk.len - chunk.off); - ctx->borrowed_ids[i] = ids[i]; - ctx->borrowed_lens[i] = lens[i]; - total += lens[i]; - } - ctx->borrowed_iovcnt = cnt; - ctx->to_send = total; - ctx->iov.iov_base = nullptr; - ctx->iov.iov_len = 0; - key = poller->SubmitWriteProvidedBuffers( - &eventer_, ids.data(), offs.data(), lens.data(), cnt, - &Connecting::OnWriteComplete, ctx, 0, nullptr); - } + ctx->to_send = readable; + ctx->iov.iov_base = const_cast(output_buffer_.GetReadablePosition()); + ctx->iov.iov_len = ctx->to_send; + uint64_t key = poller->SubmitWrite( + &eventer_, &ctx->iov, 1, &Connecting::OnWriteComplete, ctx, 0, nullptr); if (key == 0) { write_in_flight_ = false; write_cancel_key_ = 0; if (write_ctx_ == ctx) { write_ctx_ = nullptr; } - if (ctx->borrowed && borrowed_size_ > 0 && ctx->borrowed_iovcnt > 0) { - // Fallback: keep correctness by copying and returning the buffers. - for (size_t i = 0; i < ctx->borrowed_iovcnt && borrowed_size_ > 0; ++i) { - BorrowedChunk* chunk = &borrowed_queue_[borrowed_head_]; - char* buf = poller->GetBuffer(chunk->buf_id); - if (buf && chunk->off < chunk->len) { - output_buffer_.Append(buf + chunk->off, - static_cast(chunk->len - chunk->off)); - } - poller->ReturnBuffer(chunk->buf_id); - borrowed_head_ = (borrowed_head_ + 1) % kBorrowedQueueCap; - --borrowed_size_; - } - if (output_buffer_.GetReadableBytes() > 0 && !write_in_flight_) { - SubmitWriteOnce(); - // SubmitWriteOnce reuses ctx; keep ctx->self for the new in-flight op. - return; - } - } ctx->self = nullptr; return; } @@ -705,40 +523,6 @@ void Connecting::Send(IoBuffer* io_buffer) { SubmitWriteOnce(); } -bool Connecting::SendBorrowed(uint16_t buf_id, size_t len) { - if (len == 0) { - return false; - } - if (ConnectionState::kDisconnected == state_.load()) { - return false; - } - if (ConnectionState::kConnected != state_.load()) { - return false; - } - auto* poller = event_manager_->GetPoller(); - if (!poller->BuffersRegistered()) { - return false; - } - if (poller->GetBuffer(buf_id) == nullptr) { - return false; - } - if (borrowed_size_ >= kBorrowedQueueCap) { - return false; - } - if (!poller->TryLeaseBuffer(buf_id)) { - return false; - } - const size_t tail = (borrowed_head_ + borrowed_size_) % kBorrowedQueueCap; - borrowed_queue_[tail].buf_id = buf_id; - borrowed_queue_[tail].len = static_cast(len); - borrowed_queue_[tail].off = 0; - ++borrowed_size_; - if (!write_in_flight_) { - SubmitWriteOnce(); - } - return true; -} - void Connecting::ShutDownWrite() { if (ConnectionState::kConnected == state_.load()) { SetState(ConnectionState::kDisconnecting); @@ -767,15 +551,6 @@ void Connecting::ForceCloseAfter(int64_t delay_microseconds) { } void Connecting::CancelPendingIo() { - std::array - inflight_borrowed_ids{}; - size_t inflight_borrowed_cnt = 0; - if (write_in_flight_ && write_ctx_ && write_ctx_->borrowed) { - inflight_borrowed_cnt = write_ctx_->borrowed_iovcnt; - for (size_t i = 0; i < inflight_borrowed_cnt; ++i) { - inflight_borrowed_ids[i] = write_ctx_->borrowed_ids[i]; - } - } if (read_in_flight_) { if (read_cancel_key_ != 0) { (void)event_manager_->GetPoller()->CancelOp(read_cancel_key_); @@ -800,25 +575,6 @@ void Connecting::CancelPendingIo() { CompletePendingIo(); write_in_flight_ = false; } - if (borrowed_size_ > 0) { - auto* poller = event_manager_->GetPoller(); - for (size_t i = 0; i < borrowed_size_; ++i) { - const size_t idx = (borrowed_head_ + i) % kBorrowedQueueCap; - bool skip = false; - for (size_t j = 0; j < inflight_borrowed_cnt; ++j) { - if (borrowed_queue_[idx].buf_id == inflight_borrowed_ids[j]) { - skip = true; - break; - } - } - if (skip) { - continue; - } - poller->ReturnBuffer(borrowed_queue_[idx].buf_id); - } - borrowed_head_ = 0; - borrowed_size_ = 0; - } } std::string Connecting::GetConnectionStateInfo(ConnectionState state) { diff --git a/src/connecting.h b/src/connecting.h index ccda7d2f..18339726 100644 --- a/src/connecting.h +++ b/src/connecting.h @@ -49,12 +49,6 @@ class Connecting : NonCopyableMovable { typedef std::function NormalCallback; typedef std::function OnMessageCallback; - // For recv-multishot provided buffers. Return true to "consume/lease" the - // buffer (Poller will NOT auto-return it); the user must ensure the buffer is - // eventually returned (e.g., by calling SendBorrowed()). - typedef std::function - OnBorrowedMessageCallback; typedef std::function HighWaterMarkCallback; Connecting(EventManager* event_manager, int socket_fd, @@ -81,9 +75,6 @@ class Connecting : NonCopyableMovable { void RegisterOnMessageCallback(const OnMessageCallback& cb) { OnMessageCallback_ = cb; } - void RegisterOnBorrowedMessageCallback(const OnBorrowedMessageCallback& cb) { - OnBorrowedMessageCallback_ = cb; - } void RegisterWriteCallback(const NormalCallback& cb) { WriteCompleteCallback_ = cb; } @@ -128,11 +119,6 @@ class Connecting : NonCopyableMovable { // Send the message (asynchronously at most time) void Send(IoBuffer* io_buffer); - // Send a Poller-provided recv buffer (buf_id from IORING_CQE_F_BUFFER) - // without copying into IoBuffer. Returns false if it can't be queued and - // caller should fallback to copying. - bool SendBorrowed(uint16_t buf_id, size_t len); - // Shut down the writing end (close half == stop writing indeed) void ShutDownWrite(); @@ -174,16 +160,6 @@ class Connecting : NonCopyableMovable { struct iovec iov {}; size_t to_send{0}; uint64_t key{0}; - bool borrowed{false}; - size_t borrowed_iovcnt{0}; - std::array borrowed_ids{}; - std::array borrowed_lens{}; - }; - - struct BorrowedChunk { - uint16_t buf_id{0}; - uint32_t len{0}; - uint32_t off{0}; }; void SubmitReadOnce(); @@ -229,7 +205,6 @@ class Connecting : NonCopyableMovable { // Callback function which will be called after each reading OnMessageCallback OnMessageCallback_; - OnBorrowedMessageCallback OnBorrowedMessageCallback_; // Callback function which will be called after each real writing NormalCallback WriteCompleteCallback_; @@ -272,11 +247,6 @@ class Connecting : NonCopyableMovable { int pending_io_wait_ms_{0}; int pending_io_retries_{0}; - static constexpr size_t kBorrowedQueueCap = 8; - std::array borrowed_queue_{}; - size_t borrowed_head_{0}; - size_t borrowed_size_{0}; - // Context for any object bound std::any context_; }; diff --git a/src/connector.cc b/src/connector.cc index 4012e966..aa46412b 100644 --- a/src/connector.cc +++ b/src/connector.cc @@ -30,6 +30,19 @@ namespace { constexpr int kMaxRetryDelayMicroseconds = 30 * 1000 * 1000; constexpr int kInitRetryDelayMicroseconds = 500 * 1000; +bool IsRetryableConnectError(int err) { + switch (err) { + case EAGAIN: + case EADDRINUSE: + case EADDRNOTAVAIL: + case ECONNREFUSED: + case ENETUNREACH: + return true; + default: + return false; + } +} + const char* StrError(int err, char* buf, size_t len) { #if defined(_GNU_SOURCE) char* msg = ::strerror_r(err, buf, len); @@ -185,7 +198,7 @@ void Connector::DoConnecting(int conn_fd) { eventer_->EnableWriteEvents(); } void Connector::DoRetrying(int conn_fd) { - LOG_WARN("Connector fd(%d) is closing for retrying!", conn_fd); + LOG_DEBUG("Connector fd(%d) is closing for retrying!", conn_fd); ::close(conn_fd); SetState(ConnectState::kDisconnected); if (can_connect_) { @@ -214,10 +227,12 @@ void Connector::DoWriting() { int conn_fd = RemoveAndReset(); int error = GetSocketError(conn_fd); if (error) { - char errno_info[512]; - const char* err_str = StrError(error, errno_info, sizeof(errno_info)); - LOG_WARN("Connector fd(%d) has the error(%s)!", conn_fd, - err_str ? err_str : errno_info); + if (!IsRetryableConnectError(error)) { + char errno_info[512]; + const char* err_str = StrError(error, errno_info, sizeof(errno_info)); + LOG_WARN("Connector fd(%d) has the error(%s)!", conn_fd, + err_str ? err_str : errno_info); + } DoRetrying(conn_fd); } else if ([](int conn_fd) -> bool { struct sockaddr_in6 local_address = @@ -256,17 +271,22 @@ void Connector::DoWithError() { if (!eventer_) { return; } - LOG_ERROR("Connector fd(%d) has the error with the state(%d).", - eventer_->Fd(), state_); if (ConnectState::kConnecting == state_) { int conn_fd = RemoveAndReset(); int error = GetSocketError(conn_fd); - char errno_info[512]; - const char* err_str = StrError(error, errno_info, sizeof(errno_info)); - LOG_WARN("Connector fd(%d) has the error(%s)!", conn_fd, - err_str ? err_str : errno_info); + if (!IsRetryableConnectError(error)) { + char errno_info[512]; + const char* err_str = StrError(error, errno_info, sizeof(errno_info)); + LOG_ERROR("Connector fd(%d) has the error with the state(%d).", conn_fd, + state_); + LOG_WARN("Connector fd(%d) has the error(%s)!", conn_fd, + err_str ? err_str : errno_info); + } DoRetrying(conn_fd); + return; } + LOG_ERROR("Connector fd(%d) has the error with the state(%d).", + eventer_->Fd(), state_); } int Connector::RemoveAndReset() { if (!eventer_) { diff --git a/src/poller.cc b/src/poller.cc index 2f82b5e0..a7a47b51 100644 --- a/src/poller.cc +++ b/src/poller.cc @@ -32,7 +32,6 @@ constexpr size_t kDefaultSubmitBatch = 16; constexpr size_t kMaxSubmitBatch = 256; constexpr size_t kDefaultOpPoolLimit = 1U << 16; constexpr size_t kMaxOpPoolLimit = 1U << 20; -constexpr size_t kDefaultBorrowedBufLimit = Poller::kBufCount / 2; bool WantBufRing() { const char* disable = ::getenv("TAOTU_DISABLE_BUF_RING"); @@ -98,21 +97,6 @@ size_t GetOpPoolLimit() { return static_cast(val); } -size_t GetBorrowedBufLimit() { - const char* env = ::getenv("TAOTU_IORING_BORROWED_BUFFER_LIMIT"); - if (!env || *env == '\0') { - return kDefaultBorrowedBufLimit; - } - char* end = nullptr; - uint64_t val = ::strtoull(env, &end, 10); - if (end == env) { - return kDefaultBorrowedBufLimit; - } - if (val > Poller::kBufCount) { - return Poller::kBufCount; - } - return static_cast(val); -} } // namespace Poller::Poller() { @@ -168,7 +152,6 @@ Poller::Poller() { } submit_batch_ = GetSubmitBatch(); op_pool_limit_ = GetOpPoolLimit(); - leased_buffer_limit_ = GetBorrowedBufLimit(); op_pool_.reserve(std::min(op_pool_limit_, static_cast(2048))); struct io_uring_probe* probe = ::io_uring_get_probe_ring(&ring_); if (probe) { @@ -217,11 +200,6 @@ Poller::IoUringOp* Poller::AcquireOp(OpType type, Eventer* eventer, void* ctx, op->fd = fd; op->completion = completion; op->context_deleter = context_deleter; - op->skip_buf_release = false; - op->uses_provided_buffer = false; - op->provided_buf_count = 0; - op->provided_iovs[0].iov_base = nullptr; - op->provided_iovs[0].iov_len = 0; op->state.store(IoUringOp::State::kInflight, std::memory_order_relaxed); return op; } @@ -236,11 +214,6 @@ void Poller::RecycleOp(IoUringOp* op) { op->fd = -1; op->completion = nullptr; op->context_deleter = nullptr; - op->skip_buf_release = false; - op->uses_provided_buffer = false; - op->provided_buf_count = 0; - op->provided_iovs[0].iov_base = nullptr; - op->provided_iovs[0].iov_len = 0; op->state.store(IoUringOp::State::kInit, std::memory_order_relaxed); if (op_pool_.size() < op_pool_limit_) { op_pool_.push_back(op); @@ -356,65 +329,6 @@ uint64_t Poller::SubmitWrite(Eventer* eventer, struct iovec* iov, int iovcnt, return token; } -uint64_t Poller::SubmitWriteProvidedBuffer(Eventer* eventer, uint16_t buf_id, - size_t offset, size_t len, - CompletionFn completion, void* ctx, - uint64_t key, - ContextDeleter context_deleter) { -#ifdef IORING_RECV_MULTISHOT - (void)key; - if (!buffers_registered_) { - return 0; - } - if (!buffers_) { - return 0; - } - if (buf_id >= kBufCount) { - LOG_WARN("SubmitWriteProvidedBuffer: buffer id out of range: %u", buf_id); - return 0; - } - if (len == 0 || offset >= kBufSize || (offset + len) > kBufSize) { - LOG_WARN("SubmitWriteProvidedBuffer: invalid range (id=%u off=%zu len=%zu)", - buf_id, offset, len); - return 0; - } - struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); - if (!sqe) { - SubmitPending(true); - sqe = ::io_uring_get_sqe(&ring_); - } - if (!sqe) { - LOG_ERROR("io_uring_get_sqe failed when submit provided write fd(%d)", - eventer->Fd()); - return 0; - } - auto* op = AcquireOp(OpType::kWrite, eventer, ctx, eventer->Fd(), completion, - context_deleter); - op->uses_provided_buffer = true; - op->provided_buf_count = 1; - op->provided_buf_ids[0] = buf_id; - op->provided_iovs[0].iov_base = static_cast( - buffers_.get() + (static_cast(buf_id) * kBufSize) + - static_cast(offset)); - op->provided_iovs[0].iov_len = len; - uint64_t token = EncodeOp(op); - ::io_uring_prep_writev(sqe, eventer->Fd(), op->provided_iovs.data(), 1, 0); - ::io_uring_sqe_set_data64(sqe, token); - SubmitPending(); - return token; -#else - (void)eventer; - (void)buf_id; - (void)offset; - (void)len; - (void)completion; - (void)ctx; - (void)key; - (void)context_deleter; - return 0; -#endif -} - uint64_t Poller::SubmitAccept(int fd, struct sockaddr* addr, socklen_t* addrlen, void* ctx, CompletionFn completion, uint64_t key, bool multishot, ContextDeleter context_deleter) { @@ -550,84 +464,6 @@ TimePoint Poller::Poll(int timeout, EventerList* active_eventers) { return now; } -uint64_t Poller::SubmitWriteProvidedBuffers( - Eventer* eventer, const uint16_t* buf_ids, const size_t* offsets, - const size_t* lens, size_t count, CompletionFn completion, void* ctx, - uint64_t key, ContextDeleter context_deleter) { -#ifdef IORING_RECV_MULTISHOT - (void)key; - if (!buffers_registered_) { - return 0; - } - if (!buffers_) { - return 0; - } - if (!buf_ids || !offsets || !lens || count == 0) { - return 0; - } - if (count > IoUringOp::kProvidedIovMax) { - LOG_WARN("SubmitWriteProvidedBuffers: too many iovecs: %zu", count); - return 0; - } - for (size_t i = 0; i < count; ++i) { - const uint16_t bid = buf_ids[i]; - const size_t off = offsets[i]; - const size_t len = lens[i]; - if (bid >= kBufCount) { - LOG_WARN("SubmitWriteProvidedBuffers: buffer id out of range: %u", bid); - return 0; - } - if (len == 0 || off >= kBufSize || (off + len) > kBufSize) { - LOG_WARN( - "SubmitWriteProvidedBuffers: invalid range (id=%u off=%zu len=%zu)", - bid, off, len); - return 0; - } - } - struct io_uring_sqe* sqe = ::io_uring_get_sqe(&ring_); - if (!sqe) { - SubmitPending(true); - sqe = ::io_uring_get_sqe(&ring_); - } - if (!sqe) { - LOG_ERROR("io_uring_get_sqe failed when submit provided write fd(%d)", - eventer->Fd()); - return 0; - } - auto* op = AcquireOp(OpType::kWrite, eventer, ctx, eventer->Fd(), completion, - context_deleter); - op->uses_provided_buffer = true; - op->provided_buf_count = static_cast(count); - for (size_t i = 0; i < count; ++i) { - const uint16_t bid = buf_ids[i]; - const size_t off = offsets[i]; - const size_t len = lens[i]; - op->provided_buf_ids[i] = bid; - op->provided_iovs[i].iov_base = static_cast( - buffers_.get() + (static_cast(bid) * kBufSize) + - static_cast(off)); - op->provided_iovs[i].iov_len = len; - } - uint64_t token = EncodeOp(op); - ::io_uring_prep_writev(sqe, eventer->Fd(), op->provided_iovs.data(), - static_cast(count), 0); - ::io_uring_sqe_set_data64(sqe, token); - SubmitPending(); - return token; -#else - (void)eventer; - (void)buf_ids; - (void)offsets; - (void)lens; - (void)count; - (void)completion; - (void)ctx; - (void)key; - (void)context_deleter; - return 0; -#endif -} - void Poller::HandleCqe(struct io_uring_cqe* cqe, const TimePoint& now, EventerList* active_eventers) { uint64_t token = cqe->user_data; @@ -646,7 +482,6 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, const TimePoint& now, reinterpret_cast(op->completion)); bool keep_op = (cqe->flags & IORING_CQE_F_MORE) != 0; if (op->completion) { - op->skip_buf_release = false; if ((op->type == OpType::kRead || op->type == OpType::kWrite) && op->eventer == nullptr) { CleanupOpContext(op); @@ -659,9 +494,7 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, const TimePoint& now, } LOG_DEBUG("Call completion for type(%d)", static_cast(op->type)); op->completion(cqe, op); - if (!op->skip_buf_release) { - ReleaseBufferFromCqe(cqe); - } + ReleaseBufferFromCqe(cqe); if (!keep_op) { op->state.store(IoUringOp::State::kDone, std::memory_order_relaxed); RecycleOp(op); @@ -722,11 +555,6 @@ void Poller::HandleCqe(struct io_uring_cqe* cqe, const TimePoint& now, case OpType::kNone: break; } - if (op->uses_provided_buffer && op->type == OpType::kWrite) { - for (size_t i = 0; i < op->provided_buf_count; ++i) { - ReturnBuffer(op->provided_buf_ids[i]); - } - } op->state.store(IoUringOp::State::kDone, std::memory_order_relaxed); if (!keep_op) { RecycleOp(op); @@ -925,12 +753,6 @@ void Poller::ReturnBuffer(uint16_t bid) { LOG_WARN("buffer id out of range: %u", bid); return; } - if (leased_buffers_[bid]) { - leased_buffers_[bid] = 0; - if (leased_buffer_count_ > 0) { - --leased_buffer_count_; - } - } if (use_buf_ring_ && buf_ring_ != nullptr) { ::io_uring_buf_ring_add( buf_ring_, buffers_.get() + (static_cast(bid) * kBufSize), @@ -961,30 +783,4 @@ char* Poller::GetBuffer(uint16_t id) { return buffers_.get() + (static_cast(id) * kBufSize); } -bool Poller::TryLeaseBuffer(uint16_t id) { -#ifdef IORING_RECV_MULTISHOT - if (!buffers_registered_) { - return false; - } - if (id >= kBufCount) { - return false; - } - if (leased_buffer_limit_ == 0) { - return false; - } - if (leased_buffers_[id]) { - return false; - } - if (leased_buffer_count_ >= leased_buffer_limit_) { - return false; - } - leased_buffers_[id] = 1; - ++leased_buffer_count_; - return true; -#else - (void)id; - return false; -#endif -} - } // namespace taotu diff --git a/src/poller.h b/src/poller.h index 35d9bb3b..9ef2c7ce 100644 --- a/src/poller.h +++ b/src/poller.h @@ -51,19 +51,6 @@ class Poller : NonCopyableMovable { CompletionFn completion{nullptr}; ContextDeleter context_deleter{nullptr}; std::atomic state{State::kInit}; - - // Per-CQE flag: completion may set this to keep the provided recv buffer - // (IORING_CQE_F_BUFFER) until it is explicitly returned later. - bool skip_buf_release{false}; - - // For write ops that use Poller's provided buffers (see - // SubmitWriteProvidedBuffer). If the op gets canceled (completion cleared), - // Poller will return the buffer using this metadata. - static constexpr size_t kProvidedIovMax = 8; - bool uses_provided_buffer{false}; - uint8_t provided_buf_count{0}; - std::array provided_buf_ids{}; - std::array provided_iovs{}; }; Poller(); @@ -89,17 +76,6 @@ class Poller : NonCopyableMovable { CompletionFn completion = nullptr, void* ctx = nullptr, uint64_t key = 0, ContextDeleter context_deleter = nullptr); - uint64_t SubmitWriteProvidedBuffer(Eventer* eventer, uint16_t buf_id, - size_t offset, size_t len, - CompletionFn completion = nullptr, - void* ctx = nullptr, uint64_t key = 0, - ContextDeleter context_deleter = nullptr); - uint64_t SubmitWriteProvidedBuffers(Eventer* eventer, const uint16_t* buf_ids, - const size_t* offsets, const size_t* lens, - size_t count, - CompletionFn completion = nullptr, - void* ctx = nullptr, uint64_t key = 0, - ContextDeleter context_deleter = nullptr); uint64_t SubmitAccept(int fd, struct sockaddr* addr, socklen_t* addrlen, void* ctx, CompletionFn completion = nullptr, uint64_t key = 0, bool multishot = false, @@ -119,15 +95,13 @@ class Poller : NonCopyableMovable { bool UseBufRing() const { return use_buf_ring_; } size_t BufferCount() const { return kBufCount; } // The buffer pointer is valid while Poller's provided-buffer pool is - // registered. For recv-multishot buffers, it must be returned via - // ReturnBuffer() when no longer needed (Poller auto-returns it unless - // completion sets IoUringOp::skip_buf_release=true). + // registered. For recv-multishot buffers, Poller returns them automatically + // after the read CQE is handled. char* GetBuffer(uint16_t id); void ReturnBuffer(uint16_t id); - bool TryLeaseBuffer(uint16_t id); static constexpr int kBufferGroupId = 1; static constexpr size_t kBufSize = 64 * 1024; - static constexpr size_t kBufCount = 256; + static constexpr size_t kBufCount = 1024; private: static uint64_t EncodeOp(IoUringOp* op); @@ -159,9 +133,6 @@ class Poller : NonCopyableMovable { // Not value-initialized to avoid touching (zeroing) large memory on Poller // construction. std::unique_ptr buffers_{}; - std::array leased_buffers_{}; // 0/1 - size_t leased_buffer_count_{0}; - size_t leased_buffer_limit_{0}; std::vector op_pool_; size_t op_pool_limit_{1U << 16}; size_t submit_batch_{1};