-
Notifications
You must be signed in to change notification settings - Fork 75
refactor: optimize with shared_lock/unique_lock for InMemoryCatalog #405
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,7 +21,6 @@ | |
|
|
||
| #include <algorithm> | ||
| #include <iterator> | ||
| #include <mutex> | ||
|
|
||
| #include "iceberg/table.h" | ||
| #include "iceberg/table_metadata.h" | ||
|
|
@@ -337,42 +336,42 @@ std::string_view InMemoryCatalog::name() const { return catalog_name_; } | |
|
|
||
| Status InMemoryCatalog::CreateNamespace( | ||
| const Namespace& ns, const std::unordered_map<std::string, std::string>& properties) { | ||
| std::lock_guard guard(mutex_); | ||
| std::unique_lock lock(mutex_); | ||
| return root_namespace_->CreateNamespace(ns, properties); | ||
| } | ||
|
|
||
| Result<std::unordered_map<std::string, std::string>> | ||
| InMemoryCatalog::GetNamespaceProperties(const Namespace& ns) const { | ||
| std::lock_guard guard(mutex_); | ||
| std::shared_lock lock(mutex_); | ||
| return root_namespace_->GetProperties(ns); | ||
| } | ||
|
|
||
| Result<std::vector<Namespace>> InMemoryCatalog::ListNamespaces( | ||
| const Namespace& ns) const { | ||
| std::lock_guard guard(mutex_); | ||
| std::shared_lock lock(mutex_); | ||
| return root_namespace_->ListNamespaces(ns); | ||
| } | ||
|
|
||
| Status InMemoryCatalog::DropNamespace(const Namespace& ns) { | ||
| std::lock_guard guard(mutex_); | ||
| std::unique_lock lock(mutex_); | ||
| return root_namespace_->DropNamespace(ns); | ||
| } | ||
|
|
||
| Result<bool> InMemoryCatalog::NamespaceExists(const Namespace& ns) const { | ||
| std::lock_guard guard(mutex_); | ||
| std::shared_lock lock(mutex_); | ||
| return root_namespace_->NamespaceExists(ns); | ||
| } | ||
|
|
||
| Status InMemoryCatalog::UpdateNamespaceProperties( | ||
| const Namespace& ns, const std::unordered_map<std::string, std::string>& updates, | ||
| const std::unordered_set<std::string>& removals) { | ||
| std::lock_guard guard(mutex_); | ||
| std::unique_lock lock(mutex_); | ||
| return root_namespace_->UpdateNamespaceProperties(ns, updates, removals); | ||
| } | ||
|
|
||
| Result<std::vector<TableIdentifier>> InMemoryCatalog::ListTables( | ||
| const Namespace& ns) const { | ||
| std::lock_guard guard(mutex_); | ||
| std::shared_lock lock(mutex_); | ||
| const auto& table_names = root_namespace_->ListTables(ns); | ||
| ICEBERG_RETURN_UNEXPECTED(table_names); | ||
| std::vector<TableIdentifier> table_idents; | ||
|
|
@@ -387,36 +386,40 @@ Result<std::unique_ptr<Table>> InMemoryCatalog::CreateTable( | |
| const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, | ||
| const std::string& location, | ||
| const std::unordered_map<std::string, std::string>& properties) { | ||
| std::unique_lock lock(mutex_); | ||
| return NotImplemented("create table"); | ||
| } | ||
|
|
||
| Result<std::unique_ptr<Table>> InMemoryCatalog::UpdateTable( | ||
| const TableIdentifier& identifier, | ||
| const std::vector<std::unique_ptr<TableRequirement>>& requirements, | ||
| const std::vector<std::unique_ptr<TableUpdate>>& updates) { | ||
| std::unique_lock lock(mutex_); | ||
| return NotImplemented("update table"); | ||
| } | ||
|
|
||
| Result<std::shared_ptr<Transaction>> InMemoryCatalog::StageCreateTable( | ||
| const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, | ||
| const std::string& location, | ||
| const std::unordered_map<std::string, std::string>& properties) { | ||
| std::unique_lock lock(mutex_); | ||
| return NotImplemented("stage create table"); | ||
| } | ||
|
|
||
| Result<bool> InMemoryCatalog::TableExists(const TableIdentifier& identifier) const { | ||
| std::lock_guard guard(mutex_); | ||
| std::shared_lock lock(mutex_); | ||
| return root_namespace_->TableExists(identifier); | ||
| } | ||
|
|
||
| Status InMemoryCatalog::DropTable(const TableIdentifier& identifier, bool purge) { | ||
| std::lock_guard guard(mutex_); | ||
| std::unique_lock lock(mutex_); | ||
| // TODO(Guotao): Delete all metadata files if purge is true. | ||
| return root_namespace_->UnregisterTable(identifier); | ||
| } | ||
|
|
||
| Status InMemoryCatalog::RenameTable(const TableIdentifier& from, | ||
| const TableIdentifier& to) { | ||
| std::unique_lock lock(mutex_); | ||
| return NotImplemented("rename table"); | ||
| } | ||
|
|
||
|
|
@@ -426,31 +429,40 @@ Result<std::unique_ptr<Table>> InMemoryCatalog::LoadTable( | |
| return InvalidArgument("file_io is not set for catalog {}", catalog_name_); | ||
| } | ||
|
|
||
| Result<std::string> metadata_location; | ||
| std::string metadata_location; | ||
| { | ||
| std::lock_guard guard(mutex_); | ||
| std::shared_lock lock(mutex_); | ||
| ICEBERG_ASSIGN_OR_RAISE(metadata_location, | ||
| root_namespace_->GetTableMetadataLocation(identifier)); | ||
| } | ||
|
|
||
| ICEBERG_ASSIGN_OR_RAISE(auto metadata, | ||
| TableMetadataUtil::Read(*file_io_, metadata_location.value())); | ||
| TableMetadataUtil::Read(*file_io_, metadata_location)); | ||
|
|
||
| return std::make_unique<Table>(identifier, std::move(metadata), | ||
| metadata_location.value(), file_io_, | ||
| return std::make_unique<Table>(identifier, std::move(metadata), metadata_location, | ||
| file_io_, | ||
| std::static_pointer_cast<Catalog>(shared_from_this())); | ||
| } | ||
|
|
||
| Result<std::shared_ptr<Table>> InMemoryCatalog::RegisterTable( | ||
| const TableIdentifier& identifier, const std::string& metadata_file_location) { | ||
| std::lock_guard guard(mutex_); | ||
| if (!file_io_) [[unlikely]] { | ||
| return InvalidArgument("file_io is not set for catalog {}", catalog_name_); | ||
| } | ||
|
|
||
| ICEBERG_ASSIGN_OR_RAISE(auto metadata, | ||
| TableMetadataUtil::Read(*file_io_, metadata_file_location)); | ||
|
Comment on lines
+453
to
+454
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be delayed until register is successful. Or we can add a new lockless
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's an issue: if a file is invalid, register returns failure, but the map may already contain an entry. Subsequent calls to register will then return "already exists". |
||
|
|
||
| std::unique_lock lock(mutex_); | ||
| if (!root_namespace_->NamespaceExists(identifier.ns)) { | ||
| return NoSuchNamespace("table namespace does not exist."); | ||
| } | ||
| if (!root_namespace_->RegisterTable(identifier, metadata_file_location)) { | ||
| return UnknownError("The registry failed."); | ||
| } | ||
| return LoadTable(identifier); | ||
| return std::make_unique<Table>(identifier, std::move(metadata), metadata_file_location, | ||
| file_io_, | ||
| std::static_pointer_cast<Catalog>(shared_from_this())); | ||
| } | ||
|
|
||
| } // namespace iceberg | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Keep
std::lock_guardif you only need write guard here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
std::lock_guard cannot be used to acquire a write lock on a std::shared_mutex
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
std::lock_guardis a template wrapper (cppreference link). Any lock which meets BasicLockable can be used instd::lock_guard, includingstd::shared_mutex.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was wrong — lock_guard can indeed lock a shared_mutex, but it is still recommended to use unique_lock and shared_lock for read-write locking on a shared_mutex for better code readability.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think there are some differences about code readability here.
std::unique_lockhas additional overhead butstd::scoped_lockandstd::lock_guarddon't. It should not be compared withstd::scoped_lockorstd::lock_guard.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using unique_lock and shared_lock with shared_mutex makes it clear that this is a read-write lock, as seeing unique_lock immediately indicates that exclusive (write) access is being acquired. And, in other mutex scenarios, use lock_guard.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 on
std::lock_guardif we care the performance here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Compiler can optimize the simple scene (godbolt). But I still suggest to keep the code from doing unnecessary things. The simpler the better.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it's unnecessary, and I recommend use the unique_lock/shared_lock pair for locking shared_mutex