From 766cd9ef803a9ec194c57f96adb768efe1e2d04a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 16:29:30 +0200 Subject: [PATCH 01/36] wip first commit --- CMakeLists.txt | 3 +- .../openPMD/toolkit/ExternalBlockStorage.hpp | 49 ++++++ .../toolkit/ExternalBlockStorage_internal.hpp | 19 +++ src/toolkit/ExternalBlockStorage.cpp | 139 ++++++++++++++++++ 4 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 include/openPMD/toolkit/ExternalBlockStorage.hpp create mode 100644 include/openPMD/toolkit/ExternalBlockStorage_internal.hpp create mode 100644 src/toolkit/ExternalBlockStorage.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bd888f84b5..9fbe26b380 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -434,7 +434,8 @@ set(CORE_SOURCE src/snapshots/IteratorTraits.cpp src/snapshots/RandomAccessIterator.cpp src/snapshots/Snapshots.cpp - src/snapshots/StatefulIterator.cpp) + src/snapshots/StatefulIterator.cpp + src/toolkit/ExternalBlockStorage.cpp) set(IO_SOURCE src/IO/AbstractIOHandler.cpp src/IO/AbstractIOHandlerImpl.cpp diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp new file mode 100644 index 0000000000..d540e8d7cc --- /dev/null +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include + +namespace openPMD +{ +class ExternalBlockStorage; +} + +namespace openPMD::internal +{ +struct ExternalBlockStorageBackend +{ + virtual void + put(std::string const &identifier, void const *data, size_t len) = 0; + virtual ~ExternalBlockStorageBackend(); +}; + +struct StdioBuilder +{ + std::string m_directory; + std::optional m_openMode = std::nullopt; + + auto setDirectory(std::string directory) -> StdioBuilder &; + auto setOpenMode(std::string openMode) -> StdioBuilder &; + + operator ExternalBlockStorage(); +}; +} // namespace openPMD::internal + +namespace openPMD +{ +class ExternalBlockStorage +{ +private: + std::unique_ptr m_worker; + ExternalBlockStorage( + std::unique_ptr); + + friend struct internal::StdioBuilder; + +public: + static auto makeStdioSession(std::string directory) + -> internal::StdioBuilder; +}; +} // namespace openPMD diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp new file mode 100644 index 0000000000..2adaaa05f6 --- /dev/null +++ b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +namespace openPMD::internal +{ +struct ExternalBlockStorageStdio : ExternalBlockStorageBackend +{ +private: + std::string m_directory; + std::string m_openMode; + +public: + ExternalBlockStorageStdio(std::string directory, std::string openMode); + void + put(std::string const &identifier, void const *data, size_t len) override; + ~ExternalBlockStorageStdio() override; +}; +} // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp new file mode 100644 index 0000000000..5b5a7dc2ac --- /dev/null +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -0,0 +1,139 @@ + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +#include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" + +#include +#include +#include + +namespace +{ +auto concat_filepath(std::string const &s1, std::string const &s2) + -> std::string +{ + if (s1.empty()) + { + return s2; + } + if (s2.empty()) + { + return s1; + } + bool ends_with_slash = + *s1.crbegin() == openPMD::auxiliary::directory_separator; + bool starts_with_slash = + *s2.cbegin() == openPMD::auxiliary::directory_separator; + + if (ends_with_slash ^ starts_with_slash) + { + return s1 + s2; + } + else if (ends_with_slash && starts_with_slash) + { + return s1 + (s2.c_str() + 1); + } + else + { + return s1 + openPMD::auxiliary::directory_separator + s2; + } +} +} // namespace + +namespace openPMD::internal +{ +ExternalBlockStorageBackend::~ExternalBlockStorageBackend() = default; + +ExternalBlockStorageStdio::ExternalBlockStorageStdio( + std::string directory, std::string openMode) + : m_directory(std::move(directory)), m_openMode(std::move(openMode)) +{ + if (m_directory.empty()) + { + throw std::invalid_argument( + "ExternalBlockStorageStdio: directory cannot be empty"); + } + + // Ensure the directory exists and is writable + if (!auxiliary::create_directories(m_directory)) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to create or access " + "directory: " + + m_directory); + } +} + +ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; + +void ExternalBlockStorageStdio::put( + std::string const &identifier, void const *data, size_t len) +{ + if (len == 0) + { + return; + } + + // Generate a unique filename using a simple counter approach (can be + // extended) + static size_t counter = 0; + std::string filename = m_directory + "/block_" + std::to_string(counter++); + std::string filepath = concat_filepath(m_directory, identifier); + + FILE *file = std::fopen(filepath.c_str(), "wb"); + if (!file) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to open file for writing: " + + filepath); + } + + size_t written = std::fwrite(data, 1, len, file); + if (written != len) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to write full data to file: " + + filepath); + } + + if (std::fclose(file) != 0) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to close file after writing: " + + filepath); + } +} +} // namespace openPMD::internal +namespace openPMD +{ +auto ExternalBlockStorage::makeStdioSession(std::string directory) + -> internal::StdioBuilder +{ + return internal::StdioBuilder{std::move(directory)}; +} + +auto internal::StdioBuilder::setDirectory(std::string directory) + -> StdioBuilder & +{ + m_directory = std::move(directory); + return *this; +} +auto internal::StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & +{ + m_openMode = std::move(openMode); + return *this; +} + +internal::StdioBuilder::operator ExternalBlockStorage() +{ + return ExternalBlockStorage{ + std::make_unique( + std::move(m_directory), std::move(m_openMode).value_or("wb"))}; +} + +ExternalBlockStorage::ExternalBlockStorage( + std::unique_ptr worker) + : m_worker(std::move(worker)) +{} +} // namespace openPMD From c2e53d562edb3dd163f02384f7fb3e1ca67d8770 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 18:48:45 +0200 Subject: [PATCH 02/36] Some cleaning --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 6 ++- .../toolkit/ExternalBlockStorage_internal.hpp | 4 +- src/toolkit/ExternalBlockStorage.cpp | 49 ++++++++++--------- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index d540e8d7cc..9b8cd1da44 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -14,8 +14,9 @@ namespace openPMD::internal { struct ExternalBlockStorageBackend { - virtual void - put(std::string const &identifier, void const *data, size_t len) = 0; + virtual auto + put(std::string const &identifier, void const *data, size_t len) + -> std::string = 0; virtual ~ExternalBlockStorageBackend(); }; @@ -28,6 +29,7 @@ struct StdioBuilder auto setOpenMode(std::string openMode) -> StdioBuilder &; operator ExternalBlockStorage(); + auto build() -> ExternalBlockStorage; }; } // namespace openPMD::internal diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp index 2adaaa05f6..10a3e724be 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp @@ -12,8 +12,8 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend public: ExternalBlockStorageStdio(std::string directory, std::string openMode); - void - put(std::string const &identifier, void const *data, size_t len) override; + auto put(std::string const &identifier, void const *data, size_t len) + -> std::string override; ~ExternalBlockStorageStdio() override; }; } // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 5b5a7dc2ac..6344330ee3 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -67,20 +67,15 @@ ExternalBlockStorageStdio::ExternalBlockStorageStdio( ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; -void ExternalBlockStorageStdio::put( - std::string const &identifier, void const *data, size_t len) +auto ExternalBlockStorageStdio::put( + std::string const &identifier, void const *data, size_t len) -> std::string { + std::string filepath = concat_filepath(m_directory, identifier); if (len == 0) { - return; + return filepath; } - // Generate a unique filename using a simple counter approach (can be - // extended) - static size_t counter = 0; - std::string filename = m_directory + "/block_" + std::to_string(counter++); - std::string filepath = concat_filepath(m_directory, identifier); - FILE *file = std::fopen(filepath.c_str(), "wb"); if (!file) { @@ -103,33 +98,39 @@ void ExternalBlockStorageStdio::put( "ExternalBlockStorageStdio: failed to close file after writing: " + filepath); } -} -} // namespace openPMD::internal -namespace openPMD -{ -auto ExternalBlockStorage::makeStdioSession(std::string directory) - -> internal::StdioBuilder -{ - return internal::StdioBuilder{std::move(directory)}; + + return filepath; } -auto internal::StdioBuilder::setDirectory(std::string directory) - -> StdioBuilder & +auto StdioBuilder::setDirectory(std::string directory) -> StdioBuilder & { m_directory = std::move(directory); return *this; } -auto internal::StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & +auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & { m_openMode = std::move(openMode); return *this; } -internal::StdioBuilder::operator ExternalBlockStorage() +StdioBuilder::operator ExternalBlockStorage() { - return ExternalBlockStorage{ - std::make_unique( - std::move(m_directory), std::move(m_openMode).value_or("wb"))}; + return ExternalBlockStorage{std::make_unique( + std::move(m_directory), std::move(m_openMode).value_or("wb"))}; +} + +auto StdioBuilder::build() -> ExternalBlockStorage +{ + return *this; +} +} // namespace openPMD::internal + +namespace openPMD +{ +auto ExternalBlockStorage::makeStdioSession(std::string directory) + -> internal::StdioBuilder +{ + return internal::StdioBuilder{std::move(directory)}; } ExternalBlockStorage::ExternalBlockStorage( From 6a6330facc14b18de20cfe4859a2eab22cb89902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 20:15:48 +0200 Subject: [PATCH 03/36] Hmm maybe usable --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 18 +++ src/toolkit/ExternalBlockStorage.cpp | 143 +++++++++++++++++- 2 files changed, 156 insertions(+), 5 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 9b8cd1da44..318cfbdc87 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -1,5 +1,9 @@ #pragma once +#include "openPMD/Dataset.hpp" + +#include + #include #include #include @@ -45,7 +49,21 @@ class ExternalBlockStorage friend struct internal::StdioBuilder; public: + explicit ExternalBlockStorage(); + static auto makeStdioSession(std::string directory) -> internal::StdioBuilder; + + // returns created JSON key + template + auto store( + Extent globalExtent, + Offset blockOffset, + Extent blockExtent, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T const *data) -> std::string; + + static void sanitizeString(std::string &s); }; } // namespace openPMD diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 6344330ee3..27404259ff 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,12 +1,17 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include "openPMD/DatatypeMacros.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" #include #include +#include +#include #include +#include +#include namespace { @@ -70,7 +75,10 @@ ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; auto ExternalBlockStorageStdio::put( std::string const &identifier, void const *data, size_t len) -> std::string { - std::string filepath = concat_filepath(m_directory, identifier); + auto sanitized = identifier; + ExternalBlockStorage::sanitizeString(sanitized); + std::string filepath = concat_filepath(m_directory, sanitized); + if (len == 0) { return filepath; @@ -127,14 +135,139 @@ auto StdioBuilder::build() -> ExternalBlockStorage namespace openPMD { +ExternalBlockStorage::ExternalBlockStorage() = default; +ExternalBlockStorage::ExternalBlockStorage( + std::unique_ptr worker) + : m_worker(std::move(worker)) +{} + auto ExternalBlockStorage::makeStdioSession(std::string directory) -> internal::StdioBuilder { return internal::StdioBuilder{std::move(directory)}; } -ExternalBlockStorage::ExternalBlockStorage( - std::unique_ptr worker) - : m_worker(std::move(worker)) -{} +template +auto ExternalBlockStorage::store( + Extent globalExtent, + Offset blockOffset, + Extent blockExtent, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T const *data) -> std::string +{ + // JSON Identifier: running counter. + // Do not use an array to avoid reindexing upon deletion. + + // Filesystem Identifier: JSON path + running counter. + + // For each externally handled data block, store: + // 1. Filesystem identifier + // 2. Offset, Extent + auto &dataset = fullJsonDataset[path]; + + // running_index denotes the last *used* block index in the dataset + using running_index_t = uint64_t; + running_index_t running_index = [&]() -> running_index_t { + if (auto it = dataset.find("_running_index"); it != dataset.end()) + { + auto res = it->get(); + ++res; + *it = res; + return res; + } + else + { + dataset["_running_index"] = 0; + return 0; + } + }(); + + constexpr size_t padding = 6; + std::string index_as_str = [running_index]() { + auto res = std::to_string(running_index); + auto size = res.size(); + if (size >= padding) + { + return res; + } + std::stringstream padded; + for (size_t i = 0; i < padding - size; ++i) + { + padded << '0'; + } + padded << res; + return padded.str(); + }(); + + if (dataset.contains(index_as_str)) + { + throw std::runtime_error( + "Inconsistent state: Index " + index_as_str + " already in use."); + } + + auto check_metadata = [&dataset](char const *key, auto const &value) { + using value_t = + std::remove_reference_t>; + if (auto it = dataset.find(key); it != dataset.end()) + { + auto const &stored_value = it->get(); + if (stored_value != value) + { + throw std::runtime_error( + "Inconsistent chunk storage in key " + std::string(key) + + "."); + } + } + else + { + dataset[key] = value; + } + }; + std::string type = typeid(T).name(); // TODO use sth more portable + check_metadata("_type", type); + check_metadata("_byte_width", sizeof(T)); + check_metadata("_extent", globalExtent); + + auto &block = dataset[index_as_str]; + block["offset"] = blockOffset; + block["extent"] = blockExtent; + std::stringstream filesystem_identifier; + filesystem_identifier << path.to_string() << "--" << index_as_str << ".dat"; + auto escaped_filesystem_identifier = m_worker->put( + filesystem_identifier.str(), + data, + std::accumulate( + blockExtent.begin(), + blockExtent.end(), + sizeof(T), + [](size_t left, size_t right) { return left * right; })); + block["external_ref"] = escaped_filesystem_identifier; + return index_as_str; +} + +void ExternalBlockStorage::sanitizeString(std::string &s) +{ + // Replace invalid characters with underscore + for (char &c : s) + { + if (c == '/' || c == '\\' || c == ':' || c == '*' || c == '?' || + c == '"' || c == '<' || c == '>' || c == '|' || c == '\n' || + c == '\r' || c == '\t' || c == '\0' || c == ' ') + { + c = '_'; + } + } +} + +#define OPENPMD_INSTANTIATE(type) \ + template auto ExternalBlockStorage::store( \ + Extent globalExtent, \ + Offset blockOffset, \ + Extent blockExtent, \ + nlohmann::json & fullJsonDataset, \ + nlohmann::json::json_pointer const &path, \ + type const *data) -> std::string; +OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) +#undef OPENPMD_INSTANTIATE } // namespace openPMD From 4f41e600b9659b78a3d7d66ca976c6dd50947317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 20:39:19 +0200 Subject: [PATCH 04/36] Quickly use this in the JSON backend bin/openpmd-pipe --infile samples/git-sample/data%T.bp --outfile data.json --outconfig "$(echo -e 'json.dataset.mode = "template"\njson.attribute.mode = "short"')" --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 3 ++ src/IO/JSON/JSONIOHandlerImpl.cpp | 49 +++++++++++++++---- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 7c02f38ddd..82d417038c 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -29,6 +29,7 @@ #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/backend/Variant_internal.hpp" #include "openPMD/config.hpp" +#include "openPMD/toolkit/ExternalBlockStorage.hpp" #include #include @@ -177,6 +178,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string originalExtension); #endif + ExternalBlockStorage externalBlockStorage; + void init(openPMD::json::TracingJSON config); ~JSONIOHandlerImpl() override; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 88e221bea5..7e778718fa 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -33,6 +33,7 @@ #include "openPMD/auxiliary/TypeTraits.hpp" #include "openPMD/backend/Attribute.hpp" #include "openPMD/backend/Writable.hpp" +#include "openPMD/toolkit/ExternalBlockStorage.hpp" #include #include @@ -440,6 +441,9 @@ void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) (void)_; warnUnusedJson(backendConfig.value()); } + + externalBlockStorage = + ExternalBlockStorage::makeStdioSession("./external_blocks/"); } JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; @@ -1139,6 +1143,22 @@ void JSONIOHandlerImpl::deleteAttribute( j.erase(parameters.name); } +namespace +{ + struct StoreExternally + { + template + static void call( + ExternalBlockStorage &blockStorage, void const *ptr, Args &&...args) + { + blockStorage.store( + std::forward(args)..., static_cast(ptr)); + } + + static constexpr char const *errorMsg = "StoreExternally"; + }; +} // namespace + void JSONIOHandlerImpl::writeDataset( Writable *writable, Parameter ¶meters) { @@ -1148,21 +1168,32 @@ void JSONIOHandlerImpl::writeDataset( auto pos = setAndGetFilePosition(writable); auto file = refreshFileFromParent(writable); - auto &j = obtainJsonContents(writable); + auto filePosition = setAndGetFilePosition(writable, false); + auto &jsonRoot = *obtainJsonContents(file); + auto &j = jsonRoot[filePosition->id]; switch (verifyDataset(parameters, j)) { case DatasetMode::Dataset: break; case DatasetMode::Template: - if (!m_datasetMode.m_skipWarnings) - { - std::cerr - << "[JSON/TOML backend: Warning] Trying to write data to a " - "template dataset. Will skip." - << '\n'; - m_datasetMode.m_skipWarnings = true; - } + switchDatasetType( + parameters.dtype, + externalBlockStorage, + parameters.data.get(), + j.at("extent").get(), + parameters.offset, + parameters.extent, + jsonRoot, + filePosition->id); + // if (!m_datasetMode.m_skipWarnings) + // { + // std::cerr + // << "[JSON/TOML backend: Warning] Trying to write data to a " + // "template dataset. Will skip." + // << '\n'; + // m_datasetMode.m_skipWarnings = true; + // } return; } From 7301e94e93d7a8933e5a65976ff6431d7b0c6039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 08:42:39 +0200 Subject: [PATCH 05/36] Better and generalized handling for datatypes --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 40 +++++++++++++++++++ .../openPMD/toolkit/ExternalBlockStorage.hpp | 13 +++++- src/IO/JSON/JSONIOHandlerImpl.cpp | 13 +++--- src/toolkit/ExternalBlockStorage.cpp | 15 ++++--- 4 files changed, 70 insertions(+), 11 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 82d417038c..f8f3cc49c0 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -154,6 +154,46 @@ void from_json(const nlohmann::json &j, std::complex &p) } } // namespace std +namespace openPMD::internal +{ +auto jsonDatatypeToString(Datatype dt) -> std::string; + +struct JsonDatatypeHandling +{ + template + static auto encodeDatatype(nlohmann::json &j) -> bool + { + auto const &needed_datatype = + jsonDatatypeToString(determineDatatype()); + if (auto it = j.find("datatype"); it != j.end()) + { + return it.value().get() == needed_datatype; + } + else + { + j["datatype"] = needed_datatype; + return true; + } + } + + template + static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool + { + if (auto it = j.find("datatype"); it != j.end()) + { + switchDatasetType( + stringToDatatype(it.value().get()), + std::forward(args)...); + return true; + } + else + { + return false; + } + } +}; +} // namespace openPMD::internal + namespace openPMD { class JSONIOHandlerImpl : public AbstractIOHandlerImpl diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 318cfbdc87..5bbb87091e 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -39,6 +39,17 @@ struct StdioBuilder namespace openPMD { +// used nowhere, just shows the signatures +// TODO: replace this with a concept upon switching to C++20 +struct DatatypeHandling_Interface +{ + template + static auto encodeDatatype(nlohmann::json &) -> bool; + + template + static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool; +}; + class ExternalBlockStorage { private: @@ -55,7 +66,7 @@ class ExternalBlockStorage -> internal::StdioBuilder; // returns created JSON key - template + template auto store( Extent globalExtent, Offset blockOffset, diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 7e778718fa..10e09add1b 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -163,7 +163,10 @@ namespace } } } +} // namespace +namespace internal +{ // Does the same as datatypeToString(), but this makes sure that we don't // accidentally change the JSON schema by modifying datatypeToString() std::string jsonDatatypeToString(Datatype dt) @@ -252,7 +255,7 @@ namespace } return "Unreachable!"; } -} // namespace +} // namespace internal auto JSONIOHandlerImpl::retrieveDatasetMode( openPMD::json::TracingJSON &config) const -> DatasetMode_s @@ -637,7 +640,7 @@ void JSONIOHandlerImpl::createDataset( } setAndGetFilePosition(writable, name); auto &dset = jsonVal[name]; - dset["datatype"] = jsonDatatypeToString(parameter.dtype); + dset["datatype"] = internal::jsonDatatypeToString(parameter.dtype); switch (localMode) { @@ -1151,7 +1154,7 @@ namespace static void call( ExternalBlockStorage &blockStorage, void const *ptr, Args &&...args) { - blockStorage.store( + blockStorage.store( std::forward(args)..., static_cast(ptr)); } @@ -1235,7 +1238,7 @@ void JSONIOHandlerImpl::writeAttribute( { case AttributeMode::Long: (*jsonVal)[filePosition->id]["attributes"][name] = { - {"datatype", jsonDatatypeToString(parameter.dtype)}, + {"datatype", internal::jsonDatatypeToString(parameter.dtype)}, {"value", value}}; break; case AttributeMode::Short: @@ -2397,7 +2400,7 @@ nlohmann::json JSONIOHandlerImpl::platformSpecifics() Datatype::BOOL}; for (auto &datatype : datatypes) { - res[jsonDatatypeToString(datatype)] = toBytes(datatype); + res[internal::jsonDatatypeToString(datatype)] = toBytes(datatype); } return res; } diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 27404259ff..a33039df86 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -2,6 +2,7 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" #include "openPMD/DatatypeMacros.hpp" +#include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" @@ -147,7 +148,7 @@ auto ExternalBlockStorage::makeStdioSession(std::string directory) return internal::StdioBuilder{std::move(directory)}; } -template +template auto ExternalBlockStorage::store( Extent globalExtent, Offset blockOffset, @@ -224,8 +225,10 @@ auto ExternalBlockStorage::store( dataset[key] = value; } }; - std::string type = typeid(T).name(); // TODO use sth more portable - check_metadata("_type", type); + if (!DatatypeHandling::template encodeDatatype(dataset)) + { + throw std::runtime_error("Inconsistent chunk storage in datatype."); + } check_metadata("_byte_width", sizeof(T)); check_metadata("_extent", globalExtent); @@ -260,14 +263,16 @@ void ExternalBlockStorage::sanitizeString(std::string &s) } } -#define OPENPMD_INSTANTIATE(type) \ - template auto ExternalBlockStorage::store( \ +#define OPENPMD_INSTANTIATE_DATATYPEHANDLING(datatypehandling, type) \ + template auto ExternalBlockStorage::store( \ Extent globalExtent, \ Offset blockOffset, \ Extent blockExtent, \ nlohmann::json & fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ type const *data) -> std::string; +#define OPENPMD_INSTANTIATE(type) \ + OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) #undef OPENPMD_INSTANTIATE } // namespace openPMD From ec47d7d08d5b40b5e6bfd549650bd81378d076e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 09:38:54 +0200 Subject: [PATCH 06/36] structure for aws --- CMakeLists.txt | 4 + .../openPMD/toolkit/ExternalBlockStorage.hpp | 45 ++++++ .../toolkit/ExternalBlockStorage_internal.hpp | 15 ++ src/toolkit/ExternalBlockStorage.cpp | 128 +++++++++++++++++- 4 files changed, 190 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fbe26b380..a0de1cfa29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -385,6 +385,8 @@ else() endif() unset(openPMD_REQUIRED_ADIOS2_COMPONENTS) +find_package(AWSSDK REQUIRED COMPONENTS s3) + # external library: pybind11 (optional) include(${openPMD_SOURCE_DIR}/cmake/dependencies/pybind11.cmake) @@ -563,6 +565,8 @@ if(openPMD_HAVE_ADIOS2) endif() endif() +target_link_libraries(openPMD PUBLIC ${AWSSDK_LIBRARIES}) + # Runtime parameter and API status checks ("asserts") if(openPMD_USE_VERIFY) target_compile_definitions(openPMD PRIVATE openPMD_USE_VERIFY=1) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 5bbb87091e..d69db5a16b 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -2,9 +2,11 @@ #include "openPMD/Dataset.hpp" +#include #include #include +#include #include #include #include @@ -35,6 +37,41 @@ struct StdioBuilder operator ExternalBlockStorage(); auto build() -> ExternalBlockStorage; }; + +struct AwsBuilder +{ + struct init_credentials_tag_t + {}; + static constexpr init_credentials_tag_t init_credentials_tag = {}; + + AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey); + + enum class Scheme : uint8_t + { + HTTP, + HTTPS + }; + std::string m_bucketName; + std::string m_accessKeyId; + std::string m_secretKey; + std::optional m_sessionToken; + std::initializer_list m_credentials; + std::optional m_endpointOverride; + std::optional m_region; + std::optional m_scheme; + + auto setBucketName(std::string bucketName) -> AwsBuilder &; + auto setCredentials(std::string accessKeyId, std::string secretKey) + -> AwsBuilder &; + auto setSessionToken(std::string sessionToken) -> AwsBuilder &; + auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; + auto setRegion(std::string regionName) -> AwsBuilder &; + auto setScheme(Scheme s) -> AwsBuilder &; + + operator ExternalBlockStorage(); + auto build() -> ExternalBlockStorage; +}; } // namespace openPMD::internal namespace openPMD @@ -58,12 +95,17 @@ class ExternalBlockStorage std::unique_ptr); friend struct internal::StdioBuilder; + friend struct internal::AwsBuilder; public: explicit ExternalBlockStorage(); static auto makeStdioSession(std::string directory) -> internal::StdioBuilder; + template + static auto makeAwsSession( + std::string bucketName, std::string accessKeyId, std::string secretKey) + -> internal::AwsBuilder; // returns created JSON key template @@ -77,4 +119,7 @@ class ExternalBlockStorage static void sanitizeString(std::string &s); }; + +// Implementations + } // namespace openPMD diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp index 10a3e724be..2ad77e7965 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp @@ -2,6 +2,8 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include + namespace openPMD::internal { struct ExternalBlockStorageStdio : ExternalBlockStorageBackend @@ -16,4 +18,17 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend -> std::string override; ~ExternalBlockStorageStdio() override; }; + +struct ExternalBlockStorageAws : ExternalBlockStorageBackend +{ +private: + Aws::S3::S3Client m_client; + std::string m_bucketName; + +public: + ExternalBlockStorageAws(Aws::S3::S3Client, std::string bucketName); + auto put(std::string const &identifier, void const *data, size_t len) + -> std::string override; + ~ExternalBlockStorageAws() override; +}; } // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index a33039df86..7672b1016e 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,10 +1,15 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" + #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" -#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" + +#include +#include +#include #include #include @@ -12,7 +17,8 @@ #include #include #include -#include +#include +#include namespace { @@ -122,6 +128,58 @@ auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & return *this; } +ExternalBlockStorageAws::ExternalBlockStorageAws( + Aws::S3::S3Client client, std::string bucketName) + : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) +{} +ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; + +AwsBuilder::AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey) + : m_bucketName(std::move(bucketName)) + , m_accessKeyId(std::move(accessKeyId)) + , m_secretKey(std::move(secretKey)) +{} + +auto AwsBuilder::setBucketName(std::string bucketName) -> AwsBuilder & +{ + m_bucketName = std::move(bucketName); + return *this; +} + +auto internal::AwsBuilder::setCredentials( + std::string accessKeyId, std::string secretKey) -> AwsBuilder & +{ + m_accessKeyId = std::move(accessKeyId); + m_secretKey = std::move(secretKey); + return *this; +} + +auto AwsBuilder::setEndpointOverride(std::string endpoint) -> AwsBuilder & +{ + m_endpointOverride = std::move(endpoint); + return *this; +} + +auto AwsBuilder::setRegion(std::string regionName) -> AwsBuilder & +{ + m_region = std::move(regionName); + return *this; +} + +auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & +{ + m_scheme = s; + return *this; +} + +auto internal::AwsBuilder::setSessionToken(std::string sessionToken) + -> AwsBuilder & +{ + m_sessionToken = std::move(sessionToken); + return *this; +} + StdioBuilder::operator ExternalBlockStorage() { return ExternalBlockStorage{std::make_unique( @@ -132,6 +190,63 @@ auto StdioBuilder::build() -> ExternalBlockStorage { return *this; } + +AwsBuilder::operator ExternalBlockStorage() +{ + Aws::Client::ClientConfiguration config; + + if (m_endpointOverride.has_value()) + { + config.endpointOverride = *m_endpointOverride; + } + if (m_region.has_value()) + { + config.region = *m_region; + } + else + { + config.region = "us-east-1"; + } + if (m_scheme.has_value()) + { + switch (*m_scheme) + { + case Scheme::HTTP: + config.scheme = Aws::Http::Scheme::HTTP; + break; + case Scheme::HTTPS: + config.scheme = Aws::Http::Scheme::HTTPS; + break; + break; + } + } + + // default timeout + config.connectTimeoutMs = 5000; + config.requestTimeoutMs = 15000; + + auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { + if (m_sessionToken.has_value()) + { + return {m_accessKeyId, m_secretKey, *m_sessionToken}; + } + else + { + return {m_accessKeyId, m_secretKey}; + } + }(); + + // Create the S3 client + Aws::S3::S3Client s3_client( + aws_credentials, + config, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false); + + // Create the AWS storage backend + return ExternalBlockStorage{std::make_unique( + std::move(s3_client), std::move(m_bucketName))}; +} } // namespace openPMD::internal namespace openPMD @@ -148,6 +263,15 @@ auto ExternalBlockStorage::makeStdioSession(std::string directory) return internal::StdioBuilder{std::move(directory)}; } +template +auto ExternalBlockStorage::makeAwsSession( + std::string bucketName, std::string accessKeyId, std::string secretKey) + -> internal::AwsBuilder +{ + return internal::AwsBuilder( + std::move(bucketName), std::move(accessKeyId), std::move(secretKey)); +} + template auto ExternalBlockStorage::store( Extent globalExtent, From c17553ccd345b5a5a2a78028bd421a2a544e6b87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 10:59:58 +0200 Subject: [PATCH 07/36] first untested implementation for S3 --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 6 -- src/toolkit/ExternalBlockStorage.cpp | 78 +++++++++++++++++-- 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index d69db5a16b..53cf0d36ea 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -2,7 +2,6 @@ #include "openPMD/Dataset.hpp" -#include #include #include @@ -40,10 +39,6 @@ struct StdioBuilder struct AwsBuilder { - struct init_credentials_tag_t - {}; - static constexpr init_credentials_tag_t init_credentials_tag = {}; - AwsBuilder( std::string bucketName, std::string accessKeyId, std::string secretKey); @@ -102,7 +97,6 @@ class ExternalBlockStorage static auto makeStdioSession(std::string directory) -> internal::StdioBuilder; - template static auto makeAwsSession( std::string bucketName, std::string accessKeyId, std::string secretKey) -> internal::AwsBuilder; diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 7672b1016e..3e59ea0260 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -10,15 +10,19 @@ #include #include #include +#include +#include +#include #include +#include #include #include #include #include +#include #include #include -#include namespace { @@ -82,7 +86,7 @@ ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; auto ExternalBlockStorageStdio::put( std::string const &identifier, void const *data, size_t len) -> std::string { - auto sanitized = identifier; + auto sanitized = identifier + ".dat"; ExternalBlockStorage::sanitizeString(sanitized); std::string filepath = concat_filepath(m_directory, sanitized); @@ -131,9 +135,74 @@ auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & ExternalBlockStorageAws::ExternalBlockStorageAws( Aws::S3::S3Client client, std::string bucketName) : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) -{} +{ + Aws::S3::Model::CreateBucketRequest create_request; + create_request.SetBucket(m_bucketName); + auto create_outcome = m_client.CreateBucket(create_request); + if (!create_outcome.IsSuccess()) + { + std::cerr << "[ExternalBlockStorageAws::ExternalBlockStorageAws] " + "Warning: Failed to create bucket (may already exist): " + << create_outcome.GetError().GetMessage() << std::endl; + } + else + { + std::cout << "Bucket created: " << m_bucketName << std::endl; + } +} ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; +namespace +{ + struct membuf : std::streambuf + { + membuf(char const *base, std::size_t size) + { + // hm hm + auto p = const_cast(base); + this->setg(p, p, p + size); // setup get area + } + }; + + struct imemstream : std::iostream + { + imemstream(char const *base, std::size_t size) + : std::iostream(&m_buf), m_buf(base, size) + {} + + private: + membuf m_buf; + }; +} // namespace + +auto ExternalBlockStorageAws::put( + std::string const &identifier, void const *data, size_t len) -> std::string +{ + auto sanitized = identifier; + ExternalBlockStorage::sanitizeString(sanitized); + + Aws::S3::Model::PutObjectRequest put_request; + put_request.SetBucket(m_bucketName); + put_request.SetKey(sanitized); + + auto input_data = Aws::MakeShared( + "PutObjectInputStream", reinterpret_cast(data), len); + std::static_pointer_cast(input_data); + + auto put_outcome = m_client.PutObject(put_request); + + if (put_outcome.IsSuccess()) + { + std::cout << "File uploaded successfully to S3!" << std::endl; + } + else + { + std::cerr << "Upload failed: " << put_outcome.GetError().GetMessage() + << std::endl; + } + return sanitized; +} + AwsBuilder::AwsBuilder( std::string bucketName, std::string accessKeyId, std::string secretKey) : m_bucketName(std::move(bucketName)) @@ -263,7 +332,6 @@ auto ExternalBlockStorage::makeStdioSession(std::string directory) return internal::StdioBuilder{std::move(directory)}; } -template auto ExternalBlockStorage::makeAwsSession( std::string bucketName, std::string accessKeyId, std::string secretKey) -> internal::AwsBuilder @@ -360,7 +428,7 @@ auto ExternalBlockStorage::store( block["offset"] = blockOffset; block["extent"] = blockExtent; std::stringstream filesystem_identifier; - filesystem_identifier << path.to_string() << "--" << index_as_str << ".dat"; + filesystem_identifier << path.to_string() << "--" << index_as_str; auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), data, From 39679a0fe98dedf6dc0f17e6081ad5475cd0aac5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 14:45:54 +0200 Subject: [PATCH 08/36] Reordering --- ...ernalBlockStorage_internal.hpp => Aws.hpp} | 13 ------ include/openPMD/toolkit/AwsBuilder.hpp | 45 +++++++++++++++++++ .../openPMD/toolkit/ExternalBlockStorage.hpp | 45 +------------------ include/openPMD/toolkit/Stdio.hpp | 19 ++++++++ include/openPMD/toolkit/StdioBuilder.hpp | 24 ++++++++++ src/toolkit/ExternalBlockStorage.cpp | 4 +- 6 files changed, 92 insertions(+), 58 deletions(-) rename include/openPMD/toolkit/{ExternalBlockStorage_internal.hpp => Aws.hpp} (58%) create mode 100644 include/openPMD/toolkit/AwsBuilder.hpp create mode 100644 include/openPMD/toolkit/Stdio.hpp create mode 100644 include/openPMD/toolkit/StdioBuilder.hpp diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/Aws.hpp similarity index 58% rename from include/openPMD/toolkit/ExternalBlockStorage_internal.hpp rename to include/openPMD/toolkit/Aws.hpp index 2ad77e7965..737629ec2b 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -6,19 +6,6 @@ namespace openPMD::internal { -struct ExternalBlockStorageStdio : ExternalBlockStorageBackend -{ -private: - std::string m_directory; - std::string m_openMode; - -public: - ExternalBlockStorageStdio(std::string directory, std::string openMode); - auto put(std::string const &identifier, void const *data, size_t len) - -> std::string override; - ~ExternalBlockStorageStdio() override; -}; - struct ExternalBlockStorageAws : ExternalBlockStorageBackend { private: diff --git a/include/openPMD/toolkit/AwsBuilder.hpp b/include/openPMD/toolkit/AwsBuilder.hpp new file mode 100644 index 0000000000..3bb8cef491 --- /dev/null +++ b/include/openPMD/toolkit/AwsBuilder.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include + +namespace openPMD +{ +class ExternalBlockStorage; +} + +namespace openPMD::internal +{ +struct AwsBuilder +{ + AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey); + + enum class Scheme : uint8_t + { + HTTP, + HTTPS + }; + std::string m_bucketName; + std::string m_accessKeyId; + std::string m_secretKey; + std::optional m_sessionToken; + std::initializer_list m_credentials; + std::optional m_endpointOverride; + std::optional m_region; + std::optional m_scheme; + + auto setBucketName(std::string bucketName) -> AwsBuilder &; + auto setCredentials(std::string accessKeyId, std::string secretKey) + -> AwsBuilder &; + auto setSessionToken(std::string sessionToken) -> AwsBuilder &; + auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; + auto setRegion(std::string regionName) -> AwsBuilder &; + auto setScheme(Scheme s) -> AwsBuilder &; + + operator ::openPMD::ExternalBlockStorage(); + auto build() -> ::openPMD::ExternalBlockStorage; +}; +} // namespace openPMD::internal diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 53cf0d36ea..f795b34eb3 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -1,6 +1,8 @@ #pragma once #include "openPMD/Dataset.hpp" +#include "openPMD/toolkit/AwsBuilder.hpp" +#include "openPMD/toolkit/StdioBuilder.hpp" #include @@ -24,49 +26,6 @@ struct ExternalBlockStorageBackend -> std::string = 0; virtual ~ExternalBlockStorageBackend(); }; - -struct StdioBuilder -{ - std::string m_directory; - std::optional m_openMode = std::nullopt; - - auto setDirectory(std::string directory) -> StdioBuilder &; - auto setOpenMode(std::string openMode) -> StdioBuilder &; - - operator ExternalBlockStorage(); - auto build() -> ExternalBlockStorage; -}; - -struct AwsBuilder -{ - AwsBuilder( - std::string bucketName, std::string accessKeyId, std::string secretKey); - - enum class Scheme : uint8_t - { - HTTP, - HTTPS - }; - std::string m_bucketName; - std::string m_accessKeyId; - std::string m_secretKey; - std::optional m_sessionToken; - std::initializer_list m_credentials; - std::optional m_endpointOverride; - std::optional m_region; - std::optional m_scheme; - - auto setBucketName(std::string bucketName) -> AwsBuilder &; - auto setCredentials(std::string accessKeyId, std::string secretKey) - -> AwsBuilder &; - auto setSessionToken(std::string sessionToken) -> AwsBuilder &; - auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; - auto setRegion(std::string regionName) -> AwsBuilder &; - auto setScheme(Scheme s) -> AwsBuilder &; - - operator ExternalBlockStorage(); - auto build() -> ExternalBlockStorage; -}; } // namespace openPMD::internal namespace openPMD diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp new file mode 100644 index 0000000000..10a3e724be --- /dev/null +++ b/include/openPMD/toolkit/Stdio.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +namespace openPMD::internal +{ +struct ExternalBlockStorageStdio : ExternalBlockStorageBackend +{ +private: + std::string m_directory; + std::string m_openMode; + +public: + ExternalBlockStorageStdio(std::string directory, std::string openMode); + auto put(std::string const &identifier, void const *data, size_t len) + -> std::string override; + ~ExternalBlockStorageStdio() override; +}; +} // namespace openPMD::internal diff --git a/include/openPMD/toolkit/StdioBuilder.hpp b/include/openPMD/toolkit/StdioBuilder.hpp new file mode 100644 index 0000000000..7d93048167 --- /dev/null +++ b/include/openPMD/toolkit/StdioBuilder.hpp @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +namespace openPMD +{ +class ExternalBlockStorage; +} + +namespace openPMD::internal +{ +struct StdioBuilder +{ + std::string m_directory; + std::optional m_openMode = std::nullopt; + + auto setDirectory(std::string directory) -> StdioBuilder &; + auto setOpenMode(std::string openMode) -> StdioBuilder &; + + operator ::openPMD::ExternalBlockStorage(); + auto build() -> ::openPMD::ExternalBlockStorage; +}; +} // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 3e59ea0260..fc08eadc4a 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,11 +1,11 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" -#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" - #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/toolkit/Aws.hpp" +#include "openPMD/toolkit/Stdio.hpp" #include #include From 6ff99e4be2fa37095b820cdb691b1246fb58b952 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 15:00:06 +0200 Subject: [PATCH 09/36] continue restructuring --- CMakeLists.txt | 6 +- src/toolkit/Aws.cpp | 80 +++++++ src/toolkit/AwsBuilder.cpp | 117 ++++++++++ src/toolkit/ExternalBlockStorage.cpp | 321 +-------------------------- src/toolkit/Stdio.cpp | 68 ++++++ src/toolkit/StdioBuilder.cpp | 31 +++ 6 files changed, 305 insertions(+), 318 deletions(-) create mode 100644 src/toolkit/Aws.cpp create mode 100644 src/toolkit/AwsBuilder.cpp create mode 100644 src/toolkit/Stdio.cpp create mode 100644 src/toolkit/StdioBuilder.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a0de1cfa29..95edb7a266 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -437,7 +437,11 @@ set(CORE_SOURCE src/snapshots/RandomAccessIterator.cpp src/snapshots/Snapshots.cpp src/snapshots/StatefulIterator.cpp - src/toolkit/ExternalBlockStorage.cpp) + src/toolkit/ExternalBlockStorage.cpp + src/toolkit/AwsBuilder.cpp + src/toolkit/Aws.cpp + src/toolkit/StdioBuilder.cpp + src/toolkit/Stdio.cpp) set(IO_SOURCE src/IO/AbstractIOHandler.cpp src/IO/AbstractIOHandlerImpl.cpp diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp new file mode 100644 index 0000000000..2f05ec9402 --- /dev/null +++ b/src/toolkit/Aws.cpp @@ -0,0 +1,80 @@ +#include "openPMD/toolkit/Aws.hpp" + +#include +#include + +#include + +namespace +{ +struct membuf : std::streambuf +{ + membuf(char const *base, std::size_t size) + { + auto p = const_cast(base); + this->setg(p, p, p + size); + } +}; + +struct imemstream : std::iostream +{ + imemstream(char const *base, std::size_t size) + : std::iostream(&m_buf), m_buf(base, size) + {} + +private: + membuf m_buf; +}; +} // namespace + +namespace openPMD::internal +{ +ExternalBlockStorageAws::ExternalBlockStorageAws( + Aws::S3::S3Client client, std::string bucketName) + : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) +{ + Aws::S3::Model::CreateBucketRequest create_request; + create_request.SetBucket(m_bucketName); + auto create_outcome = m_client.CreateBucket(create_request); + if (!create_outcome.IsSuccess()) + { + std::cerr << "[ExternalBlockStorageAws::ExternalBlockStorageAws] " + "Warning: Failed to create bucket (may already exist): " + << create_outcome.GetError().GetMessage() << std::endl; + } + else + { + std::cout << "Bucket created: " << m_bucketName << std::endl; + } +} +ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; + +auto ExternalBlockStorageAws::put( + std::string const &identifier, void const *data, size_t len) -> std::string +{ + auto sanitized = identifier; + ExternalBlockStorage::sanitizeString(sanitized); + + Aws::S3::Model::PutObjectRequest put_request; + put_request.SetBucket(m_bucketName); + put_request.SetKey(sanitized); + + auto input_data = Aws::MakeShared( + "PutObjectInputStream", reinterpret_cast(data), len); + std::static_pointer_cast(input_data); + + auto put_outcome = m_client.PutObject(put_request); + + if (put_outcome.IsSuccess()) + { + std::cout << "File uploaded successfully to S3!" << std::endl; + } + else + { + std::cerr << "Upload failed: " << put_outcome.GetError().GetMessage() + << std::endl; + } + return sanitized; +} + +} // namespace openPMD::internal diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp new file mode 100644 index 0000000000..13caa1f878 --- /dev/null +++ b/src/toolkit/AwsBuilder.cpp @@ -0,0 +1,117 @@ +#include "openPMD/toolkit/AwsBuilder.hpp" + +#include "openPMD/toolkit/Aws.hpp" +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +#include +#include +#include + +namespace openPMD::internal +{ +AwsBuilder::AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey) + : m_bucketName(std::move(bucketName)) + , m_accessKeyId(std::move(accessKeyId)) + , m_secretKey(std::move(secretKey)) +{} + +auto AwsBuilder::setBucketName(std::string bucketName) -> AwsBuilder & +{ + m_bucketName = std::move(bucketName); + return *this; +} + +auto internal::AwsBuilder::setCredentials( + std::string accessKeyId, std::string secretKey) -> AwsBuilder & +{ + m_accessKeyId = std::move(accessKeyId); + m_secretKey = std::move(secretKey); + return *this; +} + +auto AwsBuilder::setEndpointOverride(std::string endpoint) -> AwsBuilder & +{ + m_endpointOverride = std::move(endpoint); + return *this; +} + +auto AwsBuilder::setRegion(std::string regionName) -> AwsBuilder & +{ + m_region = std::move(regionName); + return *this; +} + +auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & +{ + m_scheme = s; + return *this; +} + +auto internal::AwsBuilder::setSessionToken(std::string sessionToken) + -> AwsBuilder & +{ + m_sessionToken = std::move(sessionToken); + return *this; +} + +AwsBuilder::operator ExternalBlockStorage() +{ + Aws::Client::ClientConfiguration config; + + if (m_endpointOverride.has_value()) + { + config.endpointOverride = *m_endpointOverride; + } + if (m_region.has_value()) + { + config.region = *m_region; + } + else + { + config.region = "us-east-1"; + } + if (m_scheme.has_value()) + { + switch (*m_scheme) + { + case Scheme::HTTP: + config.scheme = Aws::Http::Scheme::HTTP; + break; + case Scheme::HTTPS: + config.scheme = Aws::Http::Scheme::HTTPS; + break; + break; + } + } + + config.connectTimeoutMs = 5000; + config.requestTimeoutMs = 15000; + + auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { + if (m_sessionToken.has_value()) + { + return {m_accessKeyId, m_secretKey, *m_sessionToken}; + } + else + { + return {m_accessKeyId, m_secretKey}; + } + }(); + + Aws::S3::S3Client s3_client( + aws_credentials, + config, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false); + + return ExternalBlockStorage{std::make_unique( + std::move(s3_client), std::move(m_bucketName))}; +} + +auto AwsBuilder::build() -> ExternalBlockStorage +{ + return *this; +} + +} // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index fc08eadc4a..fefaa00858 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,323 +1,19 @@ - #include "openPMD/toolkit/ExternalBlockStorage.hpp" #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" -#include "openPMD/auxiliary/Filesystem.hpp" -#include "openPMD/toolkit/Aws.hpp" -#include "openPMD/toolkit/Stdio.hpp" -#include -#include -#include -#include -#include -#include +#include -#include -#include -#include #include #include #include -#include -#include -#include - -namespace -{ -auto concat_filepath(std::string const &s1, std::string const &s2) - -> std::string -{ - if (s1.empty()) - { - return s2; - } - if (s2.empty()) - { - return s1; - } - bool ends_with_slash = - *s1.crbegin() == openPMD::auxiliary::directory_separator; - bool starts_with_slash = - *s2.cbegin() == openPMD::auxiliary::directory_separator; - - if (ends_with_slash ^ starts_with_slash) - { - return s1 + s2; - } - else if (ends_with_slash && starts_with_slash) - { - return s1 + (s2.c_str() + 1); - } - else - { - return s1 + openPMD::auxiliary::directory_separator + s2; - } -} -} // namespace namespace openPMD::internal { ExternalBlockStorageBackend::~ExternalBlockStorageBackend() = default; - -ExternalBlockStorageStdio::ExternalBlockStorageStdio( - std::string directory, std::string openMode) - : m_directory(std::move(directory)), m_openMode(std::move(openMode)) -{ - if (m_directory.empty()) - { - throw std::invalid_argument( - "ExternalBlockStorageStdio: directory cannot be empty"); - } - - // Ensure the directory exists and is writable - if (!auxiliary::create_directories(m_directory)) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to create or access " - "directory: " + - m_directory); - } -} - -ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; - -auto ExternalBlockStorageStdio::put( - std::string const &identifier, void const *data, size_t len) -> std::string -{ - auto sanitized = identifier + ".dat"; - ExternalBlockStorage::sanitizeString(sanitized); - std::string filepath = concat_filepath(m_directory, sanitized); - - if (len == 0) - { - return filepath; - } - - FILE *file = std::fopen(filepath.c_str(), "wb"); - if (!file) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to open file for writing: " + - filepath); - } - - size_t written = std::fwrite(data, 1, len, file); - if (written != len) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to write full data to file: " + - filepath); - } - - if (std::fclose(file) != 0) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to close file after writing: " + - filepath); - } - - return filepath; -} - -auto StdioBuilder::setDirectory(std::string directory) -> StdioBuilder & -{ - m_directory = std::move(directory); - return *this; -} -auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & -{ - m_openMode = std::move(openMode); - return *this; } -ExternalBlockStorageAws::ExternalBlockStorageAws( - Aws::S3::S3Client client, std::string bucketName) - : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) -{ - Aws::S3::Model::CreateBucketRequest create_request; - create_request.SetBucket(m_bucketName); - auto create_outcome = m_client.CreateBucket(create_request); - if (!create_outcome.IsSuccess()) - { - std::cerr << "[ExternalBlockStorageAws::ExternalBlockStorageAws] " - "Warning: Failed to create bucket (may already exist): " - << create_outcome.GetError().GetMessage() << std::endl; - } - else - { - std::cout << "Bucket created: " << m_bucketName << std::endl; - } -} -ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; - -namespace -{ - struct membuf : std::streambuf - { - membuf(char const *base, std::size_t size) - { - // hm hm - auto p = const_cast(base); - this->setg(p, p, p + size); // setup get area - } - }; - - struct imemstream : std::iostream - { - imemstream(char const *base, std::size_t size) - : std::iostream(&m_buf), m_buf(base, size) - {} - - private: - membuf m_buf; - }; -} // namespace - -auto ExternalBlockStorageAws::put( - std::string const &identifier, void const *data, size_t len) -> std::string -{ - auto sanitized = identifier; - ExternalBlockStorage::sanitizeString(sanitized); - - Aws::S3::Model::PutObjectRequest put_request; - put_request.SetBucket(m_bucketName); - put_request.SetKey(sanitized); - - auto input_data = Aws::MakeShared( - "PutObjectInputStream", reinterpret_cast(data), len); - std::static_pointer_cast(input_data); - - auto put_outcome = m_client.PutObject(put_request); - - if (put_outcome.IsSuccess()) - { - std::cout << "File uploaded successfully to S3!" << std::endl; - } - else - { - std::cerr << "Upload failed: " << put_outcome.GetError().GetMessage() - << std::endl; - } - return sanitized; -} - -AwsBuilder::AwsBuilder( - std::string bucketName, std::string accessKeyId, std::string secretKey) - : m_bucketName(std::move(bucketName)) - , m_accessKeyId(std::move(accessKeyId)) - , m_secretKey(std::move(secretKey)) -{} - -auto AwsBuilder::setBucketName(std::string bucketName) -> AwsBuilder & -{ - m_bucketName = std::move(bucketName); - return *this; -} - -auto internal::AwsBuilder::setCredentials( - std::string accessKeyId, std::string secretKey) -> AwsBuilder & -{ - m_accessKeyId = std::move(accessKeyId); - m_secretKey = std::move(secretKey); - return *this; -} - -auto AwsBuilder::setEndpointOverride(std::string endpoint) -> AwsBuilder & -{ - m_endpointOverride = std::move(endpoint); - return *this; -} - -auto AwsBuilder::setRegion(std::string regionName) -> AwsBuilder & -{ - m_region = std::move(regionName); - return *this; -} - -auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & -{ - m_scheme = s; - return *this; -} - -auto internal::AwsBuilder::setSessionToken(std::string sessionToken) - -> AwsBuilder & -{ - m_sessionToken = std::move(sessionToken); - return *this; -} - -StdioBuilder::operator ExternalBlockStorage() -{ - return ExternalBlockStorage{std::make_unique( - std::move(m_directory), std::move(m_openMode).value_or("wb"))}; -} - -auto StdioBuilder::build() -> ExternalBlockStorage -{ - return *this; -} - -AwsBuilder::operator ExternalBlockStorage() -{ - Aws::Client::ClientConfiguration config; - - if (m_endpointOverride.has_value()) - { - config.endpointOverride = *m_endpointOverride; - } - if (m_region.has_value()) - { - config.region = *m_region; - } - else - { - config.region = "us-east-1"; - } - if (m_scheme.has_value()) - { - switch (*m_scheme) - { - case Scheme::HTTP: - config.scheme = Aws::Http::Scheme::HTTP; - break; - case Scheme::HTTPS: - config.scheme = Aws::Http::Scheme::HTTPS; - break; - break; - } - } - - // default timeout - config.connectTimeoutMs = 5000; - config.requestTimeoutMs = 15000; - - auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { - if (m_sessionToken.has_value()) - { - return {m_accessKeyId, m_secretKey, *m_sessionToken}; - } - else - { - return {m_accessKeyId, m_secretKey}; - } - }(); - - // Create the S3 client - Aws::S3::S3Client s3_client( - aws_credentials, - config, - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, - false); - - // Create the AWS storage backend - return ExternalBlockStorage{std::make_unique( - std::move(s3_client), std::move(m_bucketName))}; -} -} // namespace openPMD::internal - namespace openPMD { ExternalBlockStorage::ExternalBlockStorage() = default; @@ -349,17 +45,8 @@ auto ExternalBlockStorage::store( nlohmann::json::json_pointer const &path, T const *data) -> std::string { - // JSON Identifier: running counter. - // Do not use an array to avoid reindexing upon deletion. - - // Filesystem Identifier: JSON path + running counter. - - // For each externally handled data block, store: - // 1. Filesystem identifier - // 2. Offset, Extent auto &dataset = fullJsonDataset[path]; - // running_index denotes the last *used* block index in the dataset using running_index_t = uint64_t; running_index_t running_index = [&]() -> running_index_t { if (auto it = dataset.find("_running_index"); it != dataset.end()) @@ -421,8 +108,8 @@ auto ExternalBlockStorage::store( { throw std::runtime_error("Inconsistent chunk storage in datatype."); } - check_metadata("_byte_width", sizeof(T)); - check_metadata("_extent", globalExtent); + check_metadata("byte_width", sizeof(T)); + check_metadata("extent", globalExtent); auto &block = dataset[index_as_str]; block["offset"] = blockOffset; @@ -443,7 +130,6 @@ auto ExternalBlockStorage::store( void ExternalBlockStorage::sanitizeString(std::string &s) { - // Replace invalid characters with underscore for (char &c : s) { if (c == '/' || c == '\\' || c == ':' || c == '*' || c == '?' || @@ -467,4 +153,5 @@ void ExternalBlockStorage::sanitizeString(std::string &s) OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) #undef OPENPMD_INSTANTIATE + } // namespace openPMD diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp new file mode 100644 index 0000000000..dac83d35c9 --- /dev/null +++ b/src/toolkit/Stdio.cpp @@ -0,0 +1,68 @@ +#include "openPMD/toolkit/Stdio.hpp" + +#include "openPMD/auxiliary/Filesystem.hpp" + +#include +#include + +namespace openPMD::internal +{ +ExternalBlockStorageStdio::ExternalBlockStorageStdio( + std::string directory, std::string openMode) + : m_directory(std::move(directory)), m_openMode(std::move(openMode)) +{ + if (m_directory.empty()) + { + throw std::invalid_argument( + "ExternalBlockStorageStdio: directory cannot be empty"); + } + + if (!auxiliary::create_directories(m_directory)) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to create or access " + "directory: " + + m_directory); + } +} + +ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; + +auto ExternalBlockStorageStdio::put( + std::string const &identifier, void const *data, size_t len) -> std::string +{ + auto sanitized = identifier + ".dat"; + ExternalBlockStorage::sanitizeString(sanitized); + std::string filepath = m_directory + "/" + sanitized; + + if (len == 0) + { + return filepath; + } + + FILE *file = std::fopen(filepath.c_str(), "wb"); + if (!file) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to open file for writing: " + + filepath); + } + + size_t written = std::fwrite(data, 1, len, file); + if (written != len) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to write full data to file: " + + filepath); + } + + if (std::fclose(file) != 0) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to close file after writing: " + + filepath); + } + + return filepath; +} +} // namespace openPMD::internal diff --git a/src/toolkit/StdioBuilder.cpp b/src/toolkit/StdioBuilder.cpp new file mode 100644 index 0000000000..8fa5f6bb6f --- /dev/null +++ b/src/toolkit/StdioBuilder.cpp @@ -0,0 +1,31 @@ +#include "openPMD/toolkit/StdioBuilder.hpp" + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include "openPMD/toolkit/Stdio.hpp" + +#include + +namespace openPMD::internal +{ +auto StdioBuilder::setDirectory(std::string directory) -> StdioBuilder & +{ + m_directory = std::move(directory); + return *this; +} +auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & +{ + m_openMode = std::move(openMode); + return *this; +} + +StdioBuilder::operator ExternalBlockStorage() +{ + return ExternalBlockStorage{std::make_unique( + std::move(m_directory), std::move(m_openMode).value_or("wb"))}; +} + +auto StdioBuilder::build() -> ExternalBlockStorage +{ + return *this; +} +} // namespace openPMD::internal From 1a47575e7a1798628218ad4f4b77585246e4bc89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 15:25:49 +0200 Subject: [PATCH 10/36] Some first little MPI awareness --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 1 + src/IO/JSON/JSONIOHandlerImpl.cpp | 157 ++++++++++-------- src/toolkit/ExternalBlockStorage.cpp | 9 +- 3 files changed, 100 insertions(+), 67 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index f795b34eb3..f1b5b83671 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -68,6 +68,7 @@ class ExternalBlockStorage Extent blockExtent, nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, + std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; static void sanitizeString(std::string &s); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 10e09add1b..2550a7f02a 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1148,6 +1148,30 @@ void JSONIOHandlerImpl::deleteAttribute( namespace { + template + auto + write_rank_to_stream_with_sufficient_padding(Stream &s, int rank, int size) + -> Stream & + { + auto num_digits = [](unsigned n) -> unsigned { + constexpr auto max = std::numeric_limits::max(); + unsigned base_10 = 1; + unsigned res = 1; + while (base_10 < max) + { + base_10 *= 10; + if (n / base_10 == 0) + { + return res; + } + ++res; + } + return res; + }; + s << std::setw(num_digits(size - 1)) << std::setfill('0') << rank; + return s; + } + struct StoreExternally { template @@ -1179,7 +1203,22 @@ void JSONIOHandlerImpl::writeDataset( { case DatasetMode::Dataset: break; - case DatasetMode::Template: + case DatasetMode::Template: { + std::optional rankInfix; +#if openPMD_HAVE_MPI + if (m_communicator.has_value()) + { + auto &comm = *m_communicator; + // TODO maybe cache the result for this computation + int rank, size; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + std::stringstream s; + s << "r"; + write_rank_to_stream_with_sufficient_padding(s, rank, size); + rankInfix = s.str(); + } +#endif switchDatasetType( parameters.dtype, externalBlockStorage, @@ -1188,7 +1227,8 @@ void JSONIOHandlerImpl::writeDataset( parameters.offset, parameters.extent, jsonRoot, - filePosition->id); + filePosition->id, + std::move(rankInfix)); // if (!m_datasetMode.m_skipWarnings) // { // std::cerr @@ -1199,6 +1239,7 @@ void JSONIOHandlerImpl::writeDataset( // } return; } + } switchType(parameters.dtype, j, parameters); @@ -2146,53 +2187,37 @@ auto JSONIOHandlerImpl::putJsonContents( }; #if openPMD_HAVE_MPI - auto num_digits = [](unsigned n) -> unsigned { - constexpr auto max = std::numeric_limits::max(); - unsigned base_10 = 1; - unsigned res = 1; - while (base_10 < max) + auto parallelImplementation = [this, &filename, &writeSingleFile]( + MPI_Comm comm) { + auto path = fullPath(*filename); + auto dirpath = path + ".parallel"; + if (!auxiliary::create_directories(dirpath)) { - base_10 *= 10; - if (n / base_10 == 0) - { - return res; - } - ++res; + throw std::runtime_error( + "Failed creating directory '" + dirpath + + "' for parallel JSON output"); } - return res; - }; - - auto parallelImplementation = - [this, &filename, &writeSingleFile, &num_digits](MPI_Comm comm) { - auto path = fullPath(*filename); - auto dirpath = path + ".parallel"; - if (!auxiliary::create_directories(dirpath)) - { - throw std::runtime_error( - "Failed creating directory '" + dirpath + - "' for parallel JSON output"); - } - int rank = 0, size = 0; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - std::stringstream subfilePath; - // writeSingleFile will prepend the base dir - subfilePath << *filename << ".parallel/mpi_rank_" - << std::setw(num_digits(size - 1)) << std::setfill('0') - << rank << [&]() { - switch (m_fileFormat) - { - case FileFormat::Json: - return ".json"; - case FileFormat::Toml: - return ".toml"; - } - throw std::runtime_error("Unreachable!"); - }(); - writeSingleFile(subfilePath.str()); - if (rank == 0) - { - constexpr char const *readme_msg = R"( + int rank = 0, size = 0; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + std::stringstream subfilePath; + // writeSingleFile will prepend the base dir + subfilePath << *filename << ".parallel/mpi_rank_"; + write_rank_to_stream_with_sufficient_padding(subfilePath, rank, size) + << [&]() { + switch (m_fileFormat) + { + case FileFormat::Json: + return ".json"; + case FileFormat::Toml: + return ".toml"; + } + throw std::runtime_error("Unreachable!"); + }(); + writeSingleFile(subfilePath.str()); + if (rank == 0) + { + constexpr char const *readme_msg = R"( This folder has been created by a parallel instance of the JSON backend in openPMD. There is one JSON file for each parallel writer MPI rank. The parallel JSON backend performs no metadata or data aggregation at all. @@ -2202,26 +2227,26 @@ There is no support in the openPMD-api for reading this folder as a single dataset. For reading purposes, either pick a single .json file and read that, or merge the .json files somehow (no tooling provided for this (yet)). )"; - std::fstream readme_file; - readme_file.open( - dirpath + "/README.txt", - std::ios_base::out | std::ios_base::trunc); - readme_file << readme_msg + 1; - readme_file.close(); - if (!readme_file.good() && - !filename.fileState->printedReadmeWarningAlready) - { - std::cerr - << "[Warning] Something went wrong in trying to create " - "README file at '" - << dirpath - << "/README.txt'. Will ignore and continue. The README " - "message would have been:\n----------\n" - << readme_msg + 1 << "----------" << std::endl; - filename.fileState->printedReadmeWarningAlready = true; - } + std::fstream readme_file; + readme_file.open( + dirpath + "/README.txt", + std::ios_base::out | std::ios_base::trunc); + readme_file << readme_msg + 1; + readme_file.close(); + if (!readme_file.good() && + !filename.fileState->printedReadmeWarningAlready) + { + std::cerr + << "[Warning] Something went wrong in trying to create " + "README file at '" + << dirpath + << "/README.txt'. Will ignore and continue. The README " + "message would have been:\n----------\n" + << readme_msg + 1 << "----------" << std::endl; + filename.fileState->printedReadmeWarningAlready = true; } - }; + } + }; std::shared_ptr res; if (m_communicator.has_value()) diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index fefaa00858..2a0f4fc683 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -43,6 +43,7 @@ auto ExternalBlockStorage::store( Extent blockExtent, nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, + std::optional infix, T const *data) -> std::string { auto &dataset = fullJsonDataset[path]; @@ -115,7 +116,12 @@ auto ExternalBlockStorage::store( block["offset"] = blockOffset; block["extent"] = blockExtent; std::stringstream filesystem_identifier; - filesystem_identifier << path.to_string() << "--" << index_as_str; + filesystem_identifier << path.to_string(); + if (infix.has_value()) + { + filesystem_identifier << "--" << *infix; + } + filesystem_identifier << "--" << index_as_str; auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), data, @@ -148,6 +154,7 @@ void ExternalBlockStorage::sanitizeString(std::string &s) Extent blockExtent, \ nlohmann::json & fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ + std::optional infix, \ type const *data) -> std::string; #define OPENPMD_INSTANTIATE(type) \ OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) From 97b80207baa751362855ca1ead1277b6d07b6944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 15:59:45 +0200 Subject: [PATCH 11/36] WIP: Config for external block storage from JSON --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 132 ++++-- .../openPMD/toolkit/ExternalBlockStorage.hpp | 5 + src/IO/JSON/JSONIOHandlerImpl.cpp | 445 ++++++++++-------- 3 files changed, 341 insertions(+), 241 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index f8f3cc49c0..2482e30e6d 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -33,6 +33,7 @@ #include #include +#include #if openPMD_HAVE_MPI #include #endif @@ -196,6 +197,15 @@ struct JsonDatatypeHandling namespace openPMD { +namespace dataset_mode_types +{ + struct Dataset_t + {}; + struct Template_t + {}; + using External_t = std::shared_ptr; +} // namespace dataset_mode_types + class JSONIOHandlerImpl : public AbstractIOHandlerImpl { using json = nlohmann::json; @@ -218,8 +228,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string originalExtension); #endif - ExternalBlockStorage externalBlockStorage; - void init(openPMD::json::TracingJSON config); ~JSONIOHandlerImpl() override; @@ -286,42 +294,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::future flush(); -private: -#if openPMD_HAVE_MPI - std::optional m_communicator; -#endif - - using FILEHANDLE = std::fstream; - - // map each Writable to its associated file - // contains only the filename, without the OS path - std::unordered_map m_files; - - std::unordered_map> m_jsonVals; - - // files that have logically, but not physically been written to - std::unordered_set m_dirty; - - /* - * Is set by constructor. - */ - FileFormat m_fileFormat{}; - - /* - * Under which key do we find the backend configuration? - * -> "json" for the JSON backend, "toml" for the TOML backend. - */ - std::string backendConfigKey() const; - - /* - * First return value: The location of the JSON value (either "json" or - * "toml") Second return value: The value that was maybe found at this place - */ - std::pair> - getBackendConfig(openPMD::json::TracingJSON &) const; - - std::string m_originalExtension; - /* * Was the config value explicitly user-chosen, or are we still working with * defaults? @@ -336,17 +308,36 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // Dataset IO mode // ///////////////////// - enum class DatasetMode + struct DatasetMode + : std::variant< + dataset_mode_types::Dataset_t, + dataset_mode_types::Template_t, + dataset_mode_types::External_t> { - Dataset, - Template + using Dataset_t = dataset_mode_types::Dataset_t; + using Template_t = dataset_mode_types::Template_t; + using External_t = dataset_mode_types::External_t; + constexpr static Dataset_t Dataset{}; + constexpr static Template_t Template{}; + + using variant_t = std::variant< + dataset_mode_types::Dataset_t, + dataset_mode_types::Template_t, + External_t>; + using variant_t ::operator=; + + // casts needed because of + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90943 + inline auto as_base() const -> variant_t const & + { + return *this; + } + inline auto as_base() -> variant_t & + { + return *this; + } }; - // IOMode m_mode{}; - // SpecificationVia m_IOModeSpecificationVia = - // SpecificationVia::DefaultValue; bool m_printedSkippedWriteWarningAlready - // = false; - struct DatasetMode_s { // Initialized in init() @@ -361,8 +352,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl m_mode, m_specificationVia, m_skipWarnings}; } }; - DatasetMode_s m_datasetMode; - DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const; /////////////////////// // Attribute IO mode // @@ -381,8 +370,50 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl AttributeMode m_mode{}; SpecificationVia m_specificationVia = SpecificationVia::DefaultValue; }; - AttributeMode_s m_attributeMode; +private: +#if openPMD_HAVE_MPI + std::optional m_communicator; +#endif + + using FILEHANDLE = std::fstream; + + // map each Writable to its associated file + // contains only the filename, without the OS path + std::unordered_map m_files; + + std::unordered_map> m_jsonVals; + + // files that have logically, but not physically been written to + std::unordered_set m_dirty; + + /* + * Is set by constructor. + */ + FileFormat m_fileFormat{}; + + /* + * Under which key do we find the backend configuration? + * -> "json" for the JSON backend, "toml" for the TOML backend. + */ + std::string backendConfigKey() const; + + /* + * First return value: The location of the JSON value (either "json" or + * "toml") Second return value: The value that was maybe found at this place + */ + std::pair> + getBackendConfig(openPMD::json::TracingJSON &) const; + static std::pair> + getBackendConfig( + openPMD::json::TracingJSON &, std::string const &configLocation); + + std::string m_originalExtension; + + DatasetMode_s m_datasetMode; + DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + + AttributeMode_s m_attributeMode; AttributeMode_s retrieveAttributeMode(openPMD::json::TracingJSON &config) const; @@ -432,7 +463,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // essentially: m_i = \prod_{j=0}^{i-1} extent_j static Extent getMultiplicators(Extent const &extent); - static std::pair getExtent(nlohmann::json &j); + static std::pair + getExtent(nlohmann::json &j, DatasetMode const &baseMode); // remove single '/' in the beginning and end of a string static std::string removeSlashes(std::string); diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index f1b5b83671..7a26647970 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -71,6 +71,11 @@ class ExternalBlockStorage std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; + auto externalStorageLocation() const -> nlohmann::json + { + return "implement me"; + } + static void sanitizeString(std::string &s); }; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 2550a7f02a..9656a271d0 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -31,6 +31,7 @@ #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/auxiliary/TypeTraits.hpp" +#include "openPMD/auxiliary/Variant.hpp" #include "openPMD/backend/Attribute.hpp" #include "openPMD/backend/Writable.hpp" #include "openPMD/toolkit/ExternalBlockStorage.hpp" @@ -43,6 +44,7 @@ #include #include #include +#include namespace openPMD { @@ -257,15 +259,74 @@ namespace internal } } // namespace internal +namespace +{ + void parse_internal_mode( + nlohmann::json const &mode_j, + std::string const &configLocation, + JSONIOHandlerImpl::DatasetMode_s &res) + { + using DatasetMode = JSONIOHandlerImpl::DatasetMode; + using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; + + DatasetMode &ioMode = res.m_mode; + SpecificationVia &specificationVia = res.m_specificationVia; + bool &skipWarnings = res.m_skipWarnings; + + auto modeOption = openPMD::json::asLowerCaseStringDynamic(mode_j); + if (!modeOption.has_value()) + { + throw error::BackendConfigSchema( + {configLocation, "mode"}, + "Invalid value of non-string type (accepted values are " + "'dataset' and 'template'."); + } + auto mode = modeOption.value(); + if (mode == "dataset") + { + ioMode = DatasetMode::Dataset; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template") + { + ioMode = DatasetMode::Template; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template_no_warn") + { + ioMode = DatasetMode::Template; + specificationVia = SpecificationVia::Manually; + skipWarnings = true; + } + else + { + throw error::BackendConfigSchema( + {configLocation, "dataset", "mode"}, + "Invalid value: '" + mode + + "' (accepted values are 'dataset' and 'template'."); + } + } + void parse_external_mode( + [[maybe_unused]] json::TracingJSON mode, + [[maybe_unused]] std::string const &configLocation, + JSONIOHandlerImpl::DatasetMode_s &res) + { + using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; + + res.m_mode = std::make_shared( + ExternalBlockStorage::makeStdioSession("./external_blocks")); + res.m_specificationVia = SpecificationVia::Manually; + } +} // namespace + auto JSONIOHandlerImpl::retrieveDatasetMode( openPMD::json::TracingJSON &config) const -> DatasetMode_s { // start with / copy from current config auto res = m_datasetMode; - DatasetMode &ioMode = res.m_mode; - SpecificationVia &specificationVia = res.m_specificationVia; - bool &skipWarnings = res.m_skipWarnings; - if (auto [configLocation, maybeConfig] = getBackendConfig(config); + + if (auto [configLocation, maybeConfig] = + getBackendConfig(config, backendConfigKey()); maybeConfig.has_value()) { auto jsonConfig = maybeConfig.value(); @@ -274,38 +335,14 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( auto datasetConfig = jsonConfig["dataset"]; if (datasetConfig.json().contains("mode")) { - auto modeOption = openPMD::json::asLowerCaseStringDynamic( - datasetConfig["mode"].json()); - if (!modeOption.has_value()) - { - throw error::BackendConfigSchema( - {configLocation, "mode"}, - "Invalid value of non-string type (accepted values are " - "'dataset' and 'template'."); - } - auto mode = modeOption.value(); - if (mode == "dataset") - { - ioMode = DatasetMode::Dataset; - specificationVia = SpecificationVia::Manually; - } - else if (mode == "template") - { - ioMode = DatasetMode::Template; - specificationVia = SpecificationVia::Manually; - } - else if (mode == "template_no_warn") + auto mode = datasetConfig["mode"]; + if (mode.json().is_object()) { - ioMode = DatasetMode::Template; - specificationVia = SpecificationVia::Manually; - skipWarnings = true; + parse_external_mode(std::move(mode), configLocation, res); } else { - throw error::BackendConfigSchema( - {configLocation, "dataset", "mode"}, - "Invalid value: '" + mode + - "' (accepted values are 'dataset' and 'template'."); + parse_internal_mode(mode.json(), configLocation, res); } } } @@ -377,7 +414,13 @@ std::string JSONIOHandlerImpl::backendConfigKey() const std::pair> JSONIOHandlerImpl::getBackendConfig(openPMD::json::TracingJSON &config) const { - std::string configLocation = backendConfigKey(); + return getBackendConfig(config, backendConfigKey()); +} + +std::pair> +JSONIOHandlerImpl::getBackendConfig( + openPMD::json::TracingJSON &config, std::string const &configLocation) +{ if (config.json().contains(configLocation)) { return std::make_pair( @@ -444,9 +487,6 @@ void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) (void)_; warnUnusedJson(backendConfig.value()); } - - externalBlockStorage = - ExternalBlockStorage::makeStdioSession("./external_blocks/"); } JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; @@ -642,47 +682,51 @@ void JSONIOHandlerImpl::createDataset( auto &dset = jsonVal[name]; dset["datatype"] = internal::jsonDatatypeToString(parameter.dtype); - switch (localMode) - { - case DatasetMode::Dataset: { - auto extent = parameter.extent; - switch (parameter.dtype) - { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); - break; - } - default: - break; - } - if (parameter.extent.size() != 1 || - parameter.extent[0] != Dataset::UNDEFINED_EXTENT) - { - // TOML does not support nulls, so initialize with zero - dset["data"] = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional{} - : parameter.dtype); - } - break; - } - case DatasetMode::Template: - if (parameter.extent != Extent{0} && - parameter.extent[0] != Dataset::UNDEFINED_EXTENT) - { - dset["extent"] = parameter.extent; - } - else - { - // no-op - // If extent is empty or no datatype is defined, don't bother - // writing it. - // The datatype is written above anyway. - } - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + auto extent = parameter.extent; + switch (parameter.dtype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + break; + } + if (parameter.extent.size() != 1 || + parameter.extent[0] != Dataset::UNDEFINED_EXTENT) + { + // TOML does not support nulls, so initialize with zero + dset["data"] = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json + ? std::optional{} + : parameter.dtype); + } + }, + [&](DatasetMode::Template_t const &) { + if (parameter.extent != Extent{0} && + parameter.extent[0] != Dataset::UNDEFINED_EXTENT) + { + dset["extent"] = parameter.extent; + } + else + { + // no-op + // If extent is empty or no datatype is defined, don't + // bother writing it. The datatype is written above + // anyway. + } + }, + [&](DatasetMode::External_t const &) { + dset["extent"] = parameter.extent; + }}, + localMode.as_base()); + writable->written = true; m_dirty.emplace(file); } @@ -732,7 +776,8 @@ void JSONIOHandlerImpl::extendDataset( try { Extent datasetExtent; - std::tie(datasetExtent, localIOMode) = getExtent(j); + std::tie(datasetExtent, localIOMode) = + getExtent(j, m_datasetMode.m_mode); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Cannot change dimensionality of a dataset") @@ -750,38 +795,40 @@ void JSONIOHandlerImpl::extendDataset( "[JSON] The specified location contains no valid dataset"); } - switch (localIOMode) - { - case DatasetMode::Dataset: { - auto extent = parameters.extent; - auto datatype = stringToDatatype(j["datatype"].get()); - switch (datatype) - { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); - break; - } - default: - // nothing to do - break; - } - // TOML does not support nulls, so initialize with zero - nlohmann::json newData = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional{} - : datatype); - nlohmann::json &oldData = j["data"]; - mergeInto(newData, oldData); - j["data"] = newData; - } - break; - case DatasetMode::Template: { - j["extent"] = parameters.extent; - } - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + auto extent = parameters.extent; + auto datatype = + stringToDatatype(j["datatype"].get()); + switch (datatype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + // nothing to do + break; + } + // TOML does not support nulls, so initialize with zero + nlohmann::json newData = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json ? std::optional{} + : datatype); + nlohmann::json &oldData = j["data"]; + mergeInto(newData, oldData); + j["data"] = newData; + }, + [&](DatasetMode::Template_t const &) { + j["extent"] = parameters.extent; + }, + [&](DatasetMode::External_t const &) { + j["extent"] = parameters.extent; + }}, + localIOMode.as_base()); writable->written = true; } @@ -977,7 +1024,7 @@ void JSONIOHandlerImpl::openDataset( *parameters.dtype = Datatype(stringToDatatype(datasetJson["datatype"].get())); - *parameters.extent = getExtent(datasetJson).first; + *parameters.extent = getExtent(datasetJson, m_datasetMode.m_mode).first; writable->written = true; } @@ -1199,49 +1246,49 @@ void JSONIOHandlerImpl::writeDataset( auto &jsonRoot = *obtainJsonContents(file); auto &j = jsonRoot[filePosition->id]; - switch (verifyDataset(parameters, j)) - { - case DatasetMode::Dataset: - break; - case DatasetMode::Template: { - std::optional rankInfix; + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + switchType(parameters.dtype, j, parameters); + }, + [&](DatasetMode::Template_t const &) { + if (!m_datasetMode.m_skipWarnings) + { + std::cerr << "[JSON/TOML backend: Warning] Trying to write " + "data to a " + "template dataset. Will skip." + << '\n'; + m_datasetMode.m_skipWarnings = true; + } + }, + [&](DatasetMode::External_t const &external) { + std::optional rankInfix; #if openPMD_HAVE_MPI - if (m_communicator.has_value()) - { - auto &comm = *m_communicator; - // TODO maybe cache the result for this computation - int rank, size; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - std::stringstream s; - s << "r"; - write_rank_to_stream_with_sufficient_padding(s, rank, size); - rankInfix = s.str(); - } + if (m_communicator.has_value()) + { + auto &comm = *m_communicator; + // TODO maybe cache the result for this computation + int rank, size; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + std::stringstream s; + s << "r"; + write_rank_to_stream_with_sufficient_padding(s, rank, size); + rankInfix = s.str(); + } #endif - switchDatasetType( - parameters.dtype, - externalBlockStorage, - parameters.data.get(), - j.at("extent").get(), - parameters.offset, - parameters.extent, - jsonRoot, - filePosition->id, - std::move(rankInfix)); - // if (!m_datasetMode.m_skipWarnings) - // { - // std::cerr - // << "[JSON/TOML backend: Warning] Trying to write data to a " - // "template dataset. Will skip." - // << '\n'; - // m_datasetMode.m_skipWarnings = true; - // } - return; - } - } - - switchType(parameters.dtype, j, parameters); + switchDatasetType( + parameters.dtype, + *external, + parameters.data.get(), + j.at("extent").get(), + parameters.offset, + parameters.extent, + jsonRoot, + filePosition->id, + std::move(rankInfix)); + }}, + verifyDataset(parameters, j).as_base()); writable->written = true; } @@ -1320,30 +1367,35 @@ void JSONIOHandlerImpl::readDataset( auto &j = obtainJsonContents(writable); DatasetMode localMode = verifyDataset(parameters, j); - switch (localMode) - { - case DatasetMode::Template: - std::cerr << "[Warning] Cannot read chunks in Template mode of JSON " - "backend. Will fill with zeroes instead." - << '\n'; - switchNonVectorType( - parameters.dtype, parameters.data.get(), parameters.extent); - return; - case DatasetMode::Dataset: - try - { - switchType(parameters.dtype, j["data"], parameters); - } - catch (json::basic_json::type_error &) - { - throw error::ReadError( - error::AffectedObject::Dataset, - error::Reason::UnexpectedContent, - "JSON", - "The given path does not contain a valid dataset."); - } - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + try + { + switchType( + parameters.dtype, j["data"], parameters); + } + catch (json::basic_json::type_error &) + { + throw error::ReadError( + error::AffectedObject::Dataset, + error::Reason::UnexpectedContent, + "JSON", + "The given path does not contain a valid dataset."); + } + }, + [&](DatasetMode::Template_t const &) { + std::cerr + << "[Warning] Cannot read chunks in Template mode of JSON " + "backend. Will fill with zeroes instead." + << '\n'; + switchNonVectorType( + parameters.dtype, parameters.data.get(), parameters.extent); + }, + [&](DatasetMode::External_t const &) { + throw std::runtime_error("Unimplemented"); + }}, + localMode.as_base()); } namespace @@ -1865,7 +1917,8 @@ Extent JSONIOHandlerImpl::getMultiplicators(Extent const &extent) return res; } -auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) +auto JSONIOHandlerImpl::getExtent( + nlohmann::json &j, DatasetMode const &baseMode) -> std::pair { Extent res; @@ -1894,7 +1947,10 @@ auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) } else if (j.contains("extent")) { - ioMode = DatasetMode::Template; + ioMode = + std::holds_alternative(baseMode.as_base()) + ? baseMode + : DatasetMode{DatasetMode::Template}; res = j["extent"].get(); } else @@ -2137,18 +2193,25 @@ auto JSONIOHandlerImpl::putJsonContents( return it; } - switch (m_datasetMode.m_mode) - { - case DatasetMode::Dataset: - (*it->second)["platform_byte_widths"] = platformSpecifics(); - (*it->second)[JSONDefaults::openpmd_internal] - [JSONDefaults::DatasetMode] = "dataset"; - break; - case DatasetMode::Template: - (*it->second)[JSONDefaults::openpmd_internal] - [JSONDefaults::DatasetMode] = "template"; - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + (*it->second)["platform_byte_widths"] = platformSpecifics(); + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "dataset"; + }, + [&](DatasetMode::Template_t const &) { + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "template"; + }, + [&](DatasetMode::External_t const &external) { + (*it->second)["platform_byte_widths"] = platformSpecifics(); + (*it->second)["external_storage"] = + external->externalStorageLocation(); + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "external"; + }}, + m_datasetMode.m_mode.as_base()); switch (m_attributeMode.m_mode) { @@ -2377,7 +2440,7 @@ auto JSONIOHandlerImpl::verifyDataset( try { Extent datasetExtent; - std::tie(datasetExtent, res) = getExtent(j); + std::tie(datasetExtent, res) = getExtent(j, m_datasetMode.m_mode); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Read/Write request does not fit the dataset's dimension"); From b2ccf98599f4324f24ad3d787d934476bfb9bebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 30 Sep 2025 15:34:00 +0200 Subject: [PATCH 12/36] Add configuration --- src/IO/JSON/JSONIOHandlerImpl.cpp | 116 ++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 4 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 9656a271d0..686d8fe641 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -306,15 +306,123 @@ namespace "' (accepted values are 'dataset' and 'template'."); } } + + template + auto optionalOrElse(std::optional o, OrElse &&orElse) -> T + { + if (o.has_value()) + { + return *std::move(o); + } + else + { + return std::forward(orElse)(); + } + } + void parse_external_mode( - [[maybe_unused]] json::TracingJSON mode, - [[maybe_unused]] std::string const &configLocation, + json::TracingJSON mode, + std::string const &configLocation, JSONIOHandlerImpl::DatasetMode_s &res) { using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; + using ExternalBlockStorage = openPMD::ExternalBlockStorage; + + auto get_mandatory = [&](char const *key, + bool lowercase) -> std::string { + if (!mode.json().contains(key)) + { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, "Mandatory key."); + } + auto const &val = mode.json({key}); + return optionalOrElse( + lowercase ? openPMD::json::asLowerCaseStringDynamic(val) + : openPMD::json::asStringDynamic(val), + [&]() -> std::string { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, + "Must be of string type."); + }); + }; + auto if_contains_optional = + [&](char const *key, bool lowercase, auto &&then) { + if (!mode.json().contains(key)) + { + return; + } + auto const &val = mode.json({key}); + static_cast(then)(optionalOrElse( + lowercase ? openPMD::json::asLowerCaseStringDynamic(val) + : openPMD::json::asStringDynamic(val), + [&]() -> std::string { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, + "Must be of string type."); + })); + }; + auto modeString = get_mandatory("type", true); + + if (modeString == "stdio") + { + auto builder = ExternalBlockStorage::makeStdioSession( + get_mandatory("directory", false)); + + if_contains_optional("open_mode", false, [&](std::string openMode) { + builder.setOpenMode(std::move(openMode)); + }); + + res.m_mode = + std::make_shared(builder.build()); + } + else if (modeString == "aws") + { + openPMD::internal::AwsBuilder builder( + get_mandatory("bucket_name", false), + get_mandatory("access_key_id", false), + get_mandatory("secret_access_key", false)); + + if_contains_optional( + "session_token", false, [&](std::string sessionToken) { + builder.setSessionToken(std::move(sessionToken)); + }); + if_contains_optional( + "endpoint", false, [&](std::string endpointOverride) { + builder.setEndpointOverride(std::move(endpointOverride)); + }); + if_contains_optional("region", false, [&](std::string region) { + builder.setRegion(std::move(region)); + }); + if_contains_optional( + "scheme", true, [&](std::string const &scheme) { + if (scheme == "http") + { + builder.setScheme( + openPMD::internal::AwsBuilder::Scheme::HTTP); + } + else if (scheme == "https") + { + builder.setScheme( + openPMD::internal::AwsBuilder::Scheme::HTTPS); + } + else + { + throw error::BackendConfigSchema( + {configLocation, "mode", "scheme"}, + "Must be either 'http' or 'https'."); + } + }); + + res.m_mode = + std::make_shared(builder.build()); + } + else + { + throw error::BackendConfigSchema( + {configLocation, "mode", "type"}, + "Must be either 'stdio' or 'aws'."); + } - res.m_mode = std::make_shared( - ExternalBlockStorage::makeStdioSession("./external_blocks")); res.m_specificationVia = SpecificationVia::Manually; } } // namespace From 2a6684106acd767ce13d76e8928696292af86709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 30 Sep 2025 18:31:16 +0200 Subject: [PATCH 13/36] Add option to init AWS API --- include/openPMD/Series.hpp | 3 + src/Series.cpp | 121 ++++++++++++++++++++++++------------- 2 files changed, 83 insertions(+), 41 deletions(-) diff --git a/include/openPMD/Series.hpp b/include/openPMD/Series.hpp index 75c91675d0..91035fa3e8 100644 --- a/include/openPMD/Series.hpp +++ b/include/openPMD/Series.hpp @@ -35,6 +35,7 @@ #include "openPMD/config.hpp" #include "openPMD/snapshots/Snapshots.hpp" #include "openPMD/version.hpp" +#include #if openPMD_HAVE_MPI #include @@ -239,6 +240,8 @@ namespace internal std::optional> m_deferred_initialization = std::nullopt; + std::optional m_manageAwsAPI = std::nullopt; + void close(); #if openPMD_HAVE_MPI diff --git a/src/Series.cpp b/src/Series.cpp index a1411a2dbb..a5b8a0672d 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -47,6 +47,8 @@ #include "openPMD/snapshots/StatefulIterator.hpp" #include "openPMD/version.hpp" +#include + #include #include #include @@ -1043,38 +1045,25 @@ void Series::init( } } -template -auto Series::initIOHandler( - std::string const &filepath, - std::string const &options, - Access at, - bool resolve_generic_extension, - MPI_Communicator &&...comm) - -> std::tuple, TracingJSON> +namespace { - auto &series = get(); - - json::TracingJSON optionsJson = json::parseOptions( - options, - std::forward(comm)..., - /* considerFiles = */ true); - auto input = parseInput(filepath); - if (resolve_generic_extension && input->format == Format::GENERIC && - !access::create(at)) + template + void do_resolve_generic_extension_read( + ParsedInput_t &input, std::string const &filepath, Access at) { auto isPartOfSeries = - input->iterationEncoding == IterationEncoding::fileBased + input.iterationEncoding == IterationEncoding::fileBased ? matcher( - input->filenamePrefix, - input->filenamePadding, - input->filenamePostfix, + input.filenamePrefix, + input.filenamePadding, + input.filenamePostfix, std::nullopt) - : matcher(input->name, -1, "", std::nullopt); + : matcher(input.name, -1, "", std::nullopt); std::optional extension; std::set additional_extensions; autoDetectPadding( isPartOfSeries, - input->path, + input.path, [&extension, &additional_extensions](std::string const &, Match const &match) { auto const &ext = match.extension.value(); @@ -1107,8 +1096,8 @@ auto Series::initIOHandler( std::nullopt, error.str()); } - input->filenameExtension = *extension; - input->format = determineFormat(*extension); + input.filenameExtension = *extension; + input.format = determineFormat(*extension); } else if (access::read(at)) { @@ -1120,30 +1109,68 @@ auto Series::initIOHandler( } } + template + void do_resolve_generic_extension_write(ParsedInput_t &input) + { + { + if (input.format == /* still */ Format::GENERIC) + { + throw error::WrongAPIUsage( + "Unable to automatically determine filename extension. " + "Please " + "specify in some way."); + } + else if (input.format == Format::ADIOS2_BP) + { + // Since ADIOS2 has multiple extensions depending on the engine, + // we need to pass this job on to the backend + input.filenameExtension = ".%E"; + } + else + { + input.filenameExtension = suffix(input.format); + } + } + } +} // namespace + +template +auto Series::initIOHandler( + std::string const &filepath, + std::string const &options, + Access at, + bool resolve_generic_extension, + MPI_Communicator &&...comm) + -> std::tuple, TracingJSON> +{ + auto &series = get(); + + json::TracingJSON optionsJson = json::parseOptions( + options, + std::forward(comm)..., + /* considerFiles = */ true); + auto input = parseInput(filepath); + + if (resolve_generic_extension && input->format == Format::GENERIC && + !access::create(at)) + { + do_resolve_generic_extension_read(*input, filepath, at); + } + // default options series.m_parseLazily = at == Access::READ_LINEAR; // now check for user-specified options parseJsonOptions(optionsJson, *input); + if (series.m_manageAwsAPI.has_value()) + { + Aws::InitAPI(*series.m_manageAwsAPI); + } + if (resolve_generic_extension && !input->filenameExtension.has_value()) { - if (input->format == /* still */ Format::GENERIC) - { - throw error::WrongAPIUsage( - "Unable to automatically determine filename extension. Please " - "specify in some way."); - } - else if (input->format == Format::ADIOS2_BP) - { - // Since ADIOS2 has multiple extensions depending on the engine, - // we need to pass this job on to the backend - input->filenameExtension = ".%E"; - } - else - { - input->filenameExtension = suffix(input->format); - } + do_resolve_generic_extension_write(*input); } return std::make_tuple(std::move(input), std::move(optionsJson)); } @@ -3146,6 +3173,14 @@ void Series::parseJsonOptions(TracingJSON &options, ParsedInput &input) { series.m_rankTable.m_rankTableSource = std::move(rankTableSource); } + { + bool doManageAwsAPI = false; + getJsonOption(options, "init_aws_api", doManageAwsAPI); + if (doManageAwsAPI) + { + series.m_manageAwsAPI = std::make_optional(); + } + } // backend key { std::map const backendDescriptors{ @@ -3232,6 +3267,10 @@ namespace internal // we must not throw in a destructor try { + if (m_manageAwsAPI.has_value()) + { + Aws::ShutdownAPI(*m_manageAwsAPI); + } close(); } catch (std::exception const &ex) From 2a1c520851a2f523815d5b7a2bfd1291eb40479c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 1 Oct 2025 14:47:55 +0200 Subject: [PATCH 14/36] Add verifySSL parameter --- include/openPMD/toolkit/AwsBuilder.hpp | 2 ++ src/IO/JSON/JSONIOHandlerImpl.cpp | 16 ++++++++++++++++ src/toolkit/AwsBuilder.cpp | 11 +++++++++++ 3 files changed, 29 insertions(+) diff --git a/include/openPMD/toolkit/AwsBuilder.hpp b/include/openPMD/toolkit/AwsBuilder.hpp index 3bb8cef491..4a8ad691b9 100644 --- a/include/openPMD/toolkit/AwsBuilder.hpp +++ b/include/openPMD/toolkit/AwsBuilder.hpp @@ -30,6 +30,7 @@ struct AwsBuilder std::optional m_endpointOverride; std::optional m_region; std::optional m_scheme; + std::optional m_verifySSL; auto setBucketName(std::string bucketName) -> AwsBuilder &; auto setCredentials(std::string accessKeyId, std::string secretKey) @@ -38,6 +39,7 @@ struct AwsBuilder auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; auto setRegion(std::string regionName) -> AwsBuilder &; auto setScheme(Scheme s) -> AwsBuilder &; + auto setVerifySSL(bool verify) -> AwsBuilder &; operator ::openPMD::ExternalBlockStorage(); auto build() -> ::openPMD::ExternalBlockStorage; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 686d8fe641..8695f5958d 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -361,6 +361,19 @@ namespace "Must be of string type."); })); }; + auto if_contains_optional_bool = [&](char const *key, auto &&then) { + if (!mode.json().contains(key)) + { + return; + } + auto const &val = mode.json({key}); + if (!val.is_boolean()) + { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, "Must be of boolean type."); + } + static_cast(then)(val.get()); + }; auto modeString = get_mandatory("type", true); if (modeString == "stdio") @@ -393,6 +406,9 @@ namespace if_contains_optional("region", false, [&](std::string region) { builder.setRegion(std::move(region)); }); + if_contains_optional_bool("verify_ssl", [&](bool verifySSL) { + builder.setVerifySSL(verifySSL); + }); if_contains_optional( "scheme", true, [&](std::string const &scheme) { if (scheme == "http") diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp index 13caa1f878..cb21fd399a 100644 --- a/src/toolkit/AwsBuilder.cpp +++ b/src/toolkit/AwsBuilder.cpp @@ -48,6 +48,12 @@ auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & return *this; } +auto AwsBuilder::setVerifySSL(bool verify) -> AwsBuilder & +{ + m_verifySSL = verify; + return *this; +} + auto internal::AwsBuilder::setSessionToken(std::string sessionToken) -> AwsBuilder & { @@ -88,6 +94,11 @@ AwsBuilder::operator ExternalBlockStorage() config.connectTimeoutMs = 5000; config.requestTimeoutMs = 15000; + if (m_verifySSL.has_value()) + { + config.verifySSL = *m_verifySSL; + } + auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { if (m_sessionToken.has_value()) { From 99669e3d250cac878097295cb63fb6df915873ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 1 Oct 2025 14:48:12 +0200 Subject: [PATCH 15/36] Add TODO comment --- src/IO/JSON/JSONIOHandlerImpl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 8695f5958d..7f953d8ac5 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -391,6 +391,8 @@ namespace else if (modeString == "aws") { openPMD::internal::AwsBuilder builder( + // TODO: bucket_name: introduce expansion pattern for openPMD + // file name get_mandatory("bucket_name", false), get_mandatory("access_key_id", false), get_mandatory("secret_access_key", false)); From 57a0505c3358c368ce87377d939c892d8fea98f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 1 Oct 2025 15:22:16 +0200 Subject: [PATCH 16/36] Add meta information object --- include/openPMD/toolkit/Aws.hpp | 8 +++- .../openPMD/toolkit/ExternalBlockStorage.hpp | 8 ++-- include/openPMD/toolkit/Stdio.hpp | 2 + src/toolkit/Aws.cpp | 21 ++++++++- src/toolkit/AwsBuilder.cpp | 4 +- src/toolkit/ExternalBlockStorage.cpp | 6 +++ src/toolkit/Stdio.cpp | 47 ++++++++++++++++++- 7 files changed, 86 insertions(+), 10 deletions(-) diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index 737629ec2b..5051fa2fbc 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -11,11 +11,17 @@ struct ExternalBlockStorageAws : ExternalBlockStorageBackend private: Aws::S3::S3Client m_client; std::string m_bucketName; + std::optional m_endpoint; public: - ExternalBlockStorageAws(Aws::S3::S3Client, std::string bucketName); + ExternalBlockStorageAws( + Aws::S3::S3Client, + std::string bucketName, + std::optional endpoint); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; + [[nodiscard]] auto externalStorageLocation() const + -> nlohmann::json override; ~ExternalBlockStorageAws() override; }; } // namespace openPMD::internal diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 7a26647970..6634321809 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -24,6 +24,9 @@ struct ExternalBlockStorageBackend virtual auto put(std::string const &identifier, void const *data, size_t len) -> std::string = 0; + [[nodiscard]] virtual auto externalStorageLocation() const + -> nlohmann::json = 0; + virtual ~ExternalBlockStorageBackend(); }; } // namespace openPMD::internal @@ -71,10 +74,7 @@ class ExternalBlockStorage std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; - auto externalStorageLocation() const -> nlohmann::json - { - return "implement me"; - } + [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json; static void sanitizeString(std::string &s); }; diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp index 10a3e724be..1fb8713b67 100644 --- a/include/openPMD/toolkit/Stdio.hpp +++ b/include/openPMD/toolkit/Stdio.hpp @@ -14,6 +14,8 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend ExternalBlockStorageStdio(std::string directory, std::string openMode); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; + [[nodiscard]] auto externalStorageLocation() const + -> nlohmann::json override; ~ExternalBlockStorageStdio() override; }; } // namespace openPMD::internal diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 2f05ec9402..50aff10007 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -30,8 +30,12 @@ struct imemstream : std::iostream namespace openPMD::internal { ExternalBlockStorageAws::ExternalBlockStorageAws( - Aws::S3::S3Client client, std::string bucketName) - : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) + Aws::S3::S3Client client, + std::string bucketName, + std::optional endpoint) + : m_client{std::move(client)} + , m_bucketName(std::move(bucketName)) + , m_endpoint(std::move(endpoint)) { Aws::S3::Model::CreateBucketRequest create_request; create_request.SetBucket(m_bucketName); @@ -77,4 +81,17 @@ auto ExternalBlockStorageAws::put( return sanitized; } +[[nodiscard]] auto ExternalBlockStorageAws::externalStorageLocation() const + -> nlohmann::json +{ + nlohmann::json j; + j["provider"] = "s3"; + if (m_endpoint.has_value()) + { + j["endpoint"] = *m_endpoint; + } + j["bucket"] = m_bucketName; + return j; +} + } // namespace openPMD::internal diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp index cb21fd399a..cc3cdc87ef 100644 --- a/src/toolkit/AwsBuilder.cpp +++ b/src/toolkit/AwsBuilder.cpp @@ -117,7 +117,9 @@ AwsBuilder::operator ExternalBlockStorage() false); return ExternalBlockStorage{std::make_unique( - std::move(s3_client), std::move(m_bucketName))}; + std::move(s3_client), + std::move(m_bucketName), + std::move(m_endpointOverride))}; } auto AwsBuilder::build() -> ExternalBlockStorage diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 2a0f4fc683..2d29023c9c 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -134,6 +134,12 @@ auto ExternalBlockStorage::store( return index_as_str; } +[[nodiscard]] auto ExternalBlockStorage::externalStorageLocation() const + -> nlohmann::json +{ + return m_worker->externalStorageLocation(); +} + void ExternalBlockStorage::sanitizeString(std::string &s) { for (char &c : s) diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp index dac83d35c9..c3fecf6f2a 100644 --- a/src/toolkit/Stdio.cpp +++ b/src/toolkit/Stdio.cpp @@ -5,6 +5,39 @@ #include #include +namespace +{ +auto concat_filepath(std::string const &s1, std::string const &s2) + -> std::string +{ + if (s1.empty()) + { + return s2; + } + if (s2.empty()) + { + return s1; + } + bool ends_with_slash = + *s1.crbegin() == openPMD::auxiliary::directory_separator; + bool starts_with_slash = + *s2.cbegin() == openPMD::auxiliary::directory_separator; + + if (ends_with_slash ^ starts_with_slash) + { + return s1 + s2; + } + else if (ends_with_slash && starts_with_slash) + { + return s1 + (s2.c_str() + 1); + } + else + { + return s1 + openPMD::auxiliary::directory_separator + s2; + } +} +} // namespace + namespace openPMD::internal { ExternalBlockStorageStdio::ExternalBlockStorageStdio( @@ -33,7 +66,7 @@ auto ExternalBlockStorageStdio::put( { auto sanitized = identifier + ".dat"; ExternalBlockStorage::sanitizeString(sanitized); - std::string filepath = m_directory + "/" + sanitized; + std::string filepath = concat_filepath(m_directory, sanitized); if (len == 0) { @@ -63,6 +96,16 @@ auto ExternalBlockStorageStdio::put( filepath); } - return filepath; + return sanitized; +} + +[[nodiscard]] auto ExternalBlockStorageStdio::externalStorageLocation() const + -> nlohmann::json +{ + nlohmann::json j; + j["provider"] = "stdio"; + j["directory"] = m_directory; + j["open_mode"] = m_openMode; + return j; } } // namespace openPMD::internal From a5d89c1cec7fe818cab22ac89b32006ebc27fdef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 5 Dec 2025 17:02:04 +0100 Subject: [PATCH 17/36] Prepare reloading ext block storage from old file --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 7 +++ src/IO/JSON/JSONIOHandlerImpl.cpp | 43 ++++++++++++++----- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 6634321809..6f164dfab0 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -37,9 +37,16 @@ namespace openPMD // TODO: replace this with a concept upon switching to C++20 struct DatatypeHandling_Interface { + /* + * Returns false if the same JSON location was previously encoded as + * another datatype. + */ template static auto encodeDatatype(nlohmann::json &) -> bool; + /* + * Returns false if no encoded datatype could be found + */ template static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool; }; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 7f953d8ac5..24feb8a9f2 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -322,20 +322,38 @@ namespace void parse_external_mode( json::TracingJSON mode, + // In read mode, the metadata section stored under 'external_storage' + // These are default values, overridable with the first argument + std::optional previousCfg, std::string const &configLocation, JSONIOHandlerImpl::DatasetMode_s &res) { using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; using ExternalBlockStorage = openPMD::ExternalBlockStorage; - auto get_mandatory = [&](char const *key, - bool lowercase) -> std::string { - if (!mode.json().contains(key)) + auto get_key = + [&](char const *key) -> std::optional { + if (mode.json().contains(key)) { - throw error::BackendConfigSchema( - {configLocation, "mode", key}, "Mandatory key."); + return {&mode.json({key})}; + } + else if (previousCfg.has_value() && (*previousCfg)->contains(key)) + { + return {&(**previousCfg).at(key)}; + } + else + { + return std::nullopt; } - auto const &val = mode.json({key}); + }; + + auto get_mandatory = [&](char const *key, + bool lowercase) -> std::string { + auto const &val = *optionalOrElse( + get_key("mode"), [&]() -> nlohmann::json const * { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, "Mandatory key."); + }); return optionalOrElse( lowercase ? openPMD::json::asLowerCaseStringDynamic(val) : openPMD::json::asStringDynamic(val), @@ -347,11 +365,12 @@ namespace }; auto if_contains_optional = [&](char const *key, bool lowercase, auto &&then) { - if (!mode.json().contains(key)) + auto const maybeVal = get_key(key); + if (!maybeVal.has_value()) { return; } - auto const &val = mode.json({key}); + auto const &val = **maybeVal; static_cast(then)(optionalOrElse( lowercase ? openPMD::json::asLowerCaseStringDynamic(val) : openPMD::json::asStringDynamic(val), @@ -362,11 +381,12 @@ namespace })); }; auto if_contains_optional_bool = [&](char const *key, auto &&then) { - if (!mode.json().contains(key)) + auto const maybeVal = get_key(key); + if (!maybeVal.has_value()) { return; } - auto const &val = mode.json({key}); + auto const &val = **maybeVal; if (!val.is_boolean()) { throw error::BackendConfigSchema( @@ -464,7 +484,8 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( auto mode = datasetConfig["mode"]; if (mode.json().is_object()) { - parse_external_mode(std::move(mode), configLocation, res); + parse_external_mode( + std::move(mode), std::nullopt, configLocation, res); } else { From 7b045fe939a6fd7cd5b252de2867be64d5797a41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 5 Dec 2025 17:45:52 +0100 Subject: [PATCH 18/36] Reload config when reading from a JSON file --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 9 +- src/IO/JSON/JSONIOHandlerImpl.cpp | 89 +++++++++++++++++-- 2 files changed, 90 insertions(+), 8 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 2482e30e6d..1353352908 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -410,8 +410,15 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string m_originalExtension; + /* + * In read mode, we can only open the external block storage backend upon + * opening the JSON file, because it contains meta information relevant + * for configuring the backend. + */ + std::optional + m_deferredExternalBlockstorageConfig; DatasetMode_s m_datasetMode; - DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config); AttributeMode_s m_attributeMode; AttributeMode_s diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 24feb8a9f2..2dea756140 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -24,6 +24,7 @@ #include "openPMD/Error.hpp" #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/IO/AbstractIOHandlerImpl.hpp" +#include "openPMD/IO/Access.hpp" #include "openPMD/ThrowError.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/auxiliary/JSONMatcher.hpp" @@ -349,8 +350,8 @@ namespace auto get_mandatory = [&](char const *key, bool lowercase) -> std::string { - auto const &val = *optionalOrElse( - get_key("mode"), [&]() -> nlohmann::json const * { + auto const &val = + *optionalOrElse(get_key(key), [&]() -> nlohmann::json const * { throw error::BackendConfigSchema( {configLocation, "mode", key}, "Mandatory key."); }); @@ -394,7 +395,7 @@ namespace } static_cast(then)(val.get()); }; - auto modeString = get_mandatory("type", true); + auto modeString = get_mandatory("provider", true); if (modeString == "stdio") { @@ -465,8 +466,8 @@ namespace } } // namespace -auto JSONIOHandlerImpl::retrieveDatasetMode( - openPMD::json::TracingJSON &config) const -> DatasetMode_s +auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) + -> DatasetMode_s { // start with / copy from current config auto res = m_datasetMode; @@ -484,8 +485,20 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( auto mode = datasetConfig["mode"]; if (mode.json().is_object()) { - parse_external_mode( - std::move(mode), std::nullopt, configLocation, res); + if (access::writeOnly(m_handler->m_backendAccess)) + { + parse_external_mode( + std::move(mode), std::nullopt, configLocation, res); + } + else + { + // sic! initialize the deferred json config as a new + // tracing object + m_deferredExternalBlockstorageConfig = + std::make_optional( + config.json(), config.originallySpecifiedAs); + config.declareFullyRead(); + } } else { @@ -661,6 +674,14 @@ void JSONIOHandlerImpl::createFile( access::write(m_handler->m_backendAccess), "[JSON] Creating a file in read-only mode is not possible."); + if (m_deferredExternalBlockstorageConfig.has_value()) + { + throw error::Internal( + "Creation of external block storage backend was deferred until " + "opening the first file, but a file is created before any was " + "opened."); + } + /* * Need to resolve this later than init() since the openPMD version might be * specified after the creation of the IOHandler. @@ -1104,6 +1125,28 @@ void JSONIOHandlerImpl::openFile( auto file = std::get<0>(getPossiblyExisting(name)); + if (m_deferredExternalBlockstorageConfig.has_value()) + { + auto const &contents = obtainJsonContents(file); + auto previousConfig = [&]() -> std::optional { + if (contents->contains("external_storage")) + { + return std::make_optional( + &contents->at("external_storage")); + } + else + { + return std::nullopt; + } + }(); + parse_external_mode( + std::move(*m_deferredExternalBlockstorageConfig), + previousConfig, + backendConfigKey(), + m_datasetMode); + m_attributeMode.m_specificationVia = SpecificationVia::Manually; + } + associateWithFile(writable, file); writable->written = true; @@ -2242,6 +2285,9 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) auto res = serialImplementation(); #endif + bool initialize_external_block_storage = + m_deferredExternalBlockstorageConfig.has_value(); + if (res->contains(JSONDefaults::openpmd_internal)) { auto const &openpmd_internal = res->at(JSONDefaults::openpmd_internal); @@ -2272,6 +2318,10 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) { m_datasetMode.m_mode = DatasetMode::Template; } + else if (modeOption.value() == "external") + { + initialize_external_block_storage = true; + } else { std::cerr << "[JSON/TOML backend] Warning: Invalid value '" @@ -2315,6 +2365,31 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) } } } + + if (initialize_external_block_storage) + { + auto previousConfig = [&]() -> std::optional { + if (res->contains("external_storage")) + { + return std::make_optional( + &res->at("external_storage")); + } + else + { + return std::nullopt; + } + }(); + parse_external_mode( + m_deferredExternalBlockstorageConfig.has_value() + ? std::move(*m_deferredExternalBlockstorageConfig) + : openPMD::json::TracingJSON(), + previousConfig, + backendConfigKey(), + m_datasetMode); + m_attributeMode.m_specificationVia = SpecificationVia::Manually; + m_deferredExternalBlockstorageConfig.reset(); + } + m_jsonVals.emplace(file, res); return res; } From 68602122ad5557f4a17f9d44060c6f795d54ec63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 11:39:53 +0100 Subject: [PATCH 19/36] WIP: Read from EBS --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 15 +++++++ src/IO/JSON/JSONIOHandlerImpl.cpp | 31 ++++++++++--- src/toolkit/ExternalBlockStorage.cpp | 44 ++++++++++++++++++- 3 files changed, 83 insertions(+), 7 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 6f164dfab0..9a69ed8dff 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -81,6 +81,21 @@ class ExternalBlockStorage std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; + template + void read( + std::string const &identifier, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T *data); + + template + void read( + Offset blockOffset, + Extent blockExtent, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T *data); + [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json; static void sanitizeString(std::string &s); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 2dea756140..69d5f2b8df 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1547,14 +1547,28 @@ namespace static constexpr char const *errorMsg = "[JSON Backend] Fill with zeroes."; }; + + struct RetrieveExternally + { + template + static void + call(ExternalBlockStorage &blockStorage, void *ptr, Args &&...args) + { + blockStorage.read( + std::forward(args)..., static_cast(ptr)); + } + + static constexpr char const *errorMsg = "RetrieveExternally"; + }; } // namespace void JSONIOHandlerImpl::readDataset( Writable *writable, Parameter ¶meters) { - refreshFileFromParent(writable); - setAndGetFilePosition(writable); - auto &j = obtainJsonContents(writable); + auto file = refreshFileFromParent(writable); + auto filePosition = setAndGetFilePosition(writable); + auto &jsonRoot = *obtainJsonContents(file); + auto &j = jsonRoot[filePosition->id]; DatasetMode localMode = verifyDataset(parameters, j); std::visit( @@ -1582,8 +1596,15 @@ void JSONIOHandlerImpl::readDataset( switchNonVectorType( parameters.dtype, parameters.data.get(), parameters.extent); }, - [&](DatasetMode::External_t const &) { - throw std::runtime_error("Unimplemented"); + [&](DatasetMode::External_t &external) { + switchDatasetType( + parameters.dtype, + *external, + parameters.data.get(), + parameters.offset, + parameters.extent, + jsonRoot, + filePosition->id); }}, localMode.as_base()); } diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 2d29023c9c..0d2813eccc 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -112,7 +112,7 @@ auto ExternalBlockStorage::store( check_metadata("byte_width", sizeof(T)); check_metadata("extent", globalExtent); - auto &block = dataset[index_as_str]; + auto &block = dataset["external_blocks"][index_as_str]; block["offset"] = blockOffset; block["extent"] = blockExtent; std::stringstream filesystem_identifier; @@ -134,6 +134,35 @@ auto ExternalBlockStorage::store( return index_as_str; } +namespace +{ + template + void read_impl( + ExternalBlockStorageBackend *backend, + nlohmann::json const &external_block, + T *data) + {} +} // namespace + +template +void ExternalBlockStorage::read( + std::string const &identifier, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T *data) +{} + +template +void ExternalBlockStorage::read( + Offset blockOffset, + Extent blockExtent, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T *data) +{ + auto &dataset = fullJsonDataset[path]; +} + [[nodiscard]] auto ExternalBlockStorage::externalStorageLocation() const -> nlohmann::json { @@ -161,7 +190,18 @@ void ExternalBlockStorage::sanitizeString(std::string &s) nlohmann::json & fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ std::optional infix, \ - type const *data) -> std::string; + type const *data) -> std::string; \ + template void ExternalBlockStorage::read( \ + std::string const &identifier, \ + nlohmann::json &fullJsonDataset, \ + nlohmann::json::json_pointer const &path, \ + type *data); \ + template void ExternalBlockStorage::read( \ + Offset blockOffset, \ + Extent blockExtent, \ + nlohmann::json & fullJsonDataset, \ + nlohmann::json::json_pointer const &path, \ + type *data); #define OPENPMD_INSTANTIATE(type) \ OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) From 7a7557807379213560e2f1dd9db9ef6ecf0e8a02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 12:12:25 +0100 Subject: [PATCH 20/36] Base implementation for get() --- include/openPMD/toolkit/Aws.hpp | 1 + .../openPMD/toolkit/ExternalBlockStorage.hpp | 2 + include/openPMD/toolkit/Stdio.hpp | 1 + src/toolkit/Aws.cpp | 34 +++++++++++++++++ src/toolkit/ExternalBlockStorage.cpp | 2 +- src/toolkit/Stdio.cpp | 37 ++++++++++++++++++- 6 files changed, 75 insertions(+), 2 deletions(-) diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index 5051fa2fbc..5c66eee9c0 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -20,6 +20,7 @@ struct ExternalBlockStorageAws : ExternalBlockStorageBackend std::optional endpoint); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; + void get(std::string const &external_ref, void *data, size_t len) override; [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json override; ~ExternalBlockStorageAws() override; diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 9a69ed8dff..cb5031cac2 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -24,6 +24,8 @@ struct ExternalBlockStorageBackend virtual auto put(std::string const &identifier, void const *data, size_t len) -> std::string = 0; + virtual void + get(std::string const &external_ref, void *data, size_t len) = 0; [[nodiscard]] virtual auto externalStorageLocation() const -> nlohmann::json = 0; diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp index 1fb8713b67..9428151d2e 100644 --- a/include/openPMD/toolkit/Stdio.hpp +++ b/include/openPMD/toolkit/Stdio.hpp @@ -14,6 +14,7 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend ExternalBlockStorageStdio(std::string directory, std::string openMode); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; + void get(std::string const &external_ref, void *data, size_t len) override; [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json override; ~ExternalBlockStorageStdio() override; diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 50aff10007..f536e1fdf7 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -1,9 +1,11 @@ #include "openPMD/toolkit/Aws.hpp" #include +#include #include #include +#include namespace { @@ -81,6 +83,38 @@ auto ExternalBlockStorageAws::put( return sanitized; } +void ExternalBlockStorageAws::get( + std::string const &external_ref, void *data, size_t len) +{ + if (len == 0) + { + return; + } + + Aws::S3::Model::GetObjectRequest get_request; + get_request.SetBucket(m_bucketName); + get_request.SetKey(external_ref); + + auto get_outcome = m_client.GetObject(get_request); + if (!get_outcome.IsSuccess()) + { + throw std::runtime_error( + std::string("ExternalBlockStorageAws::get failed: ") + + get_outcome.GetError().GetMessage()); + } + + auto &body = get_outcome.GetResult().GetBody(); + body.read( + reinterpret_cast(data), static_cast(len)); + std::streamsize read_bytes = body.gcount(); + if (read_bytes != static_cast(len)) + { + throw std::runtime_error( + "ExternalBlockStorageAws: failed to read expected number of bytes " + "from S3 object"); + } +} + [[nodiscard]] auto ExternalBlockStorageAws::externalStorageLocation() const -> nlohmann::json { diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 0d2813eccc..d59cc9a4f6 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -138,7 +138,7 @@ namespace { template void read_impl( - ExternalBlockStorageBackend *backend, + internal::ExternalBlockStorageBackend *backend, nlohmann::json const &external_block, T *data) {} diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp index c3fecf6f2a..ddf7da7178 100644 --- a/src/toolkit/Stdio.cpp +++ b/src/toolkit/Stdio.cpp @@ -73,7 +73,7 @@ auto ExternalBlockStorageStdio::put( return filepath; } - FILE *file = std::fopen(filepath.c_str(), "wb"); + FILE *file = std::fopen(filepath.c_str(), m_openMode.c_str()); if (!file) { throw std::runtime_error( @@ -99,6 +99,41 @@ auto ExternalBlockStorageStdio::put( return sanitized; } +void ExternalBlockStorageStdio::get( + std::string const &external_ref, void *data, size_t len) +{ + if (len == 0) + { + return; + } + + std::string filepath = concat_filepath(m_directory, external_ref); + + FILE *file = std::fopen(filepath.c_str(), "rb"); + if (!file) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to open file for reading: " + + filepath); + } + + size_t read = std::fread(data, 1, len, file); + if (read != len) + { + std::fclose(file); + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to read full data from file: " + + filepath); + } + + if (std::fclose(file) != 0) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to close file after reading: " + + filepath); + } +} + [[nodiscard]] auto ExternalBlockStorageStdio::externalStorageLocation() const -> nlohmann::json { From aa17b838e4d5d9e12a23b317f0a892aceb0fd7f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 13:43:31 +0100 Subject: [PATCH 21/36] Untested read impl --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 15 ++++ .../openPMD/toolkit/ExternalBlockStorage.hpp | 7 ++ src/toolkit/Aws.cpp | 3 +- src/toolkit/ExternalBlockStorage.cpp | 78 +++++++++++++++---- 4 files changed, 87 insertions(+), 16 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 1353352908..b3cefec55a 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -177,6 +177,21 @@ struct JsonDatatypeHandling } } + template + static auto checkDatatype(nlohmann::json const &j) -> bool + { + auto const &needed_datatype = + jsonDatatypeToString(determineDatatype()); + if (auto it = j.find("datatype"); it != j.end()) + { + return it.value().get() == needed_datatype; + } + else + { + return false; + } + } + template static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool { diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index cb5031cac2..082b767d70 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -46,6 +46,13 @@ struct DatatypeHandling_Interface template static auto encodeDatatype(nlohmann::json &) -> bool; + /* + * Returns false if the encoded datatype does not match T_required + * or if no datatype has been encoded. + */ + template + static auto checkDatatype(nlohmann::json const &j) -> bool; + /* * Returns false if no encoded datatype could be found */ diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index f536e1fdf7..e8c12ab16b 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -67,7 +67,8 @@ auto ExternalBlockStorageAws::put( auto input_data = Aws::MakeShared( "PutObjectInputStream", reinterpret_cast(data), len); - std::static_pointer_cast(input_data); + put_request.SetBody(input_data); + put_request.SetContentLength(static_cast(len)); auto put_outcome = m_client.PutObject(put_request); diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index d59cc9a4f6..05d02f6a95 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -2,6 +2,7 @@ #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" +#include "openPMD/auxiliary/StringManip.hpp" #include @@ -16,6 +17,30 @@ ExternalBlockStorageBackend::~ExternalBlockStorageBackend() = default; namespace openPMD { + +namespace +{ + auto flat_extent(Extent const &e) -> size_t + { + return std::accumulate( + e.begin(), e.end(), 1, [](size_t left, size_t right) { + return left * right; + }); + } + + template + void read_impl( + internal::ExternalBlockStorageBackend *backend, + nlohmann::json const &external_block, + T *data, + size_t len) + { + auto const &external_ref = + external_block.at("external_ref").get(); + backend->get(external_ref, data, sizeof(T) * len); + } +} // namespace + ExternalBlockStorage::ExternalBlockStorage() = default; ExternalBlockStorage::ExternalBlockStorage( std::unique_ptr worker) @@ -125,25 +150,11 @@ auto ExternalBlockStorage::store( auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), data, - std::accumulate( - blockExtent.begin(), - blockExtent.end(), - sizeof(T), - [](size_t left, size_t right) { return left * right; })); + sizeof(T) * flat_extent(blockExtent)); block["external_ref"] = escaped_filesystem_identifier; return index_as_str; } -namespace -{ - template - void read_impl( - internal::ExternalBlockStorageBackend *backend, - nlohmann::json const &external_block, - T *data) - {} -} // namespace - template void ExternalBlockStorage::read( std::string const &identifier, @@ -161,6 +172,43 @@ void ExternalBlockStorage::read( T *data) { auto &dataset = fullJsonDataset[path]; + if (!DatatypeHandling::template checkDatatype(dataset)) + { + throw std::runtime_error("Inconsistent chunk storage in datatype."); + } + auto external_blocks = dataset["external_blocks"]; + bool found_a_precise_match = false; + for (auto it = external_blocks.begin(); it != external_blocks.end(); ++it) + { + auto const &block = it.value(); + try + { + auto const &o = block.at("offset").get(); + auto const &e = block.at("extent").get(); + // Look only for exact matches for now + if (o != blockOffset || e != blockExtent) + { + continue; + } + found_a_precise_match = true; + read_impl(m_worker.get(), block, data, flat_extent(blockExtent)); + break; + } + catch (nlohmann::json::exception const &e) + { + std::cerr << "[ExternalBlockStorage::read] Could not parse block '" + << it.key() << "'. Original error was:\n" + << e.what(); + } + } + if (!found_a_precise_match) + { + throw std::runtime_error( + "[ExternalBlockStorage::read] Unable to find a precise match for " + "offset " + + auxiliary::vec_as_string(blockOffset) + " and extent " + + auxiliary::vec_as_string(blockExtent)); + } } [[nodiscard]] auto ExternalBlockStorage::externalStorageLocation() const From ba1d0ffa9e8586a28d88b195b731e4e9cdf48380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 14:21:00 +0100 Subject: [PATCH 22/36] Basically working reading needed also support for availableChunks --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 2 +- src/IO/JSON/JSONIOHandlerImpl.cpp | 41 +++++++++++++++++-- src/toolkit/ExternalBlockStorage.cpp | 6 +-- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 082b767d70..cda4e3bd46 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -101,7 +101,7 @@ class ExternalBlockStorage void read( Offset blockOffset, Extent blockExtent, - nlohmann::json &fullJsonDataset, + nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, T *data); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 69d5f2b8df..fe55892ca6 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1104,9 +1104,44 @@ void JSONIOHandlerImpl::availableChunks( { refreshFileFromParent(writable); auto filePosition = setAndGetFilePosition(writable); - auto &j = obtainJsonContents(writable)["data"]; - *parameters.chunks = chunksInJSON(j); - chunk_assignment::mergeChunks(*parameters.chunks); + auto &j = obtainJsonContents(writable); + + auto [extent, datasetmode] = getExtent(j, m_datasetMode.m_mode); + + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + *parameters.chunks = chunksInJSON(j.at("data")); + chunk_assignment::mergeChunks(*parameters.chunks); + }, + [&](DatasetMode::Template_t const &) { + /* no-op, no chunks to be loaded */ + }, + [&](DatasetMode::External_t &) { + auto external_blocks = j.at("external_blocks"); + auto &res = *parameters.chunks; + res.reserve(external_blocks.size()); + for (auto it = external_blocks.begin(); + it != external_blocks.end(); + ++it) + { + auto const &block = it.value(); + try + { + auto const &o = block.at("offset").get(); + auto const &e = block.at("extent").get(); + res.emplace_back(o, e); + } + catch (nlohmann::json::exception const &e) + { + std::cerr << "[JSONIOHandlerImpl::availableChunks] " + "Could not parse block '" + << it.key() << "'. Original error was:\n" + << e.what(); + } + } + }}, + datasetmode.as_base()); } void JSONIOHandlerImpl::openFile( diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 05d02f6a95..8e44f9efc6 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -167,7 +167,7 @@ template void ExternalBlockStorage::read( Offset blockOffset, Extent blockExtent, - nlohmann::json &fullJsonDataset, + nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, T *data) { @@ -176,7 +176,7 @@ void ExternalBlockStorage::read( { throw std::runtime_error("Inconsistent chunk storage in datatype."); } - auto external_blocks = dataset["external_blocks"]; + auto external_blocks = dataset.at("external_blocks"); bool found_a_precise_match = false; for (auto it = external_blocks.begin(); it != external_blocks.end(); ++it) { @@ -247,7 +247,7 @@ void ExternalBlockStorage::sanitizeString(std::string &s) template void ExternalBlockStorage::read( \ Offset blockOffset, \ Extent blockExtent, \ - nlohmann::json & fullJsonDataset, \ + nlohmann::json const &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ type *data); #define OPENPMD_INSTANTIATE(type) \ From 43814097621e23b0c524a6cc3fac32277c305d5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 14:29:59 +0100 Subject: [PATCH 23/36] cleanup --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 12 +++---- src/toolkit/ExternalBlockStorage.cpp | 32 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index cda4e3bd46..25b776e620 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -82,9 +82,9 @@ class ExternalBlockStorage // returns created JSON key template auto store( - Extent globalExtent, - Offset blockOffset, - Extent blockExtent, + Extent const &globalExtent, + Offset const &blockOffset, + Extent const &blockExtent, nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, std::optional infix, // e.g. for distinguishing MPI ranks @@ -93,14 +93,14 @@ class ExternalBlockStorage template void read( std::string const &identifier, - nlohmann::json &fullJsonDataset, + nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, T *data); template void read( - Offset blockOffset, - Extent blockExtent, + Offset const &blockOffset, + Extent const &blockExtent, nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, T *data); diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 8e44f9efc6..cee4a34fd1 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -63,9 +63,9 @@ auto ExternalBlockStorage::makeAwsSession( template auto ExternalBlockStorage::store( - Extent globalExtent, - Offset blockOffset, - Extent blockExtent, + Extent const &globalExtent, + Offset const &blockOffset, + Extent const &blockExtent, nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, std::optional infix, @@ -157,16 +157,16 @@ auto ExternalBlockStorage::store( template void ExternalBlockStorage::read( - std::string const &identifier, - nlohmann::json &fullJsonDataset, - nlohmann::json::json_pointer const &path, - T *data) + [[maybe_unused]] std::string const &identifier, + [[maybe_unused]] nlohmann::json const &fullJsonDataset, + [[maybe_unused]] nlohmann::json::json_pointer const &path, + [[maybe_unused]] T *data) {} template void ExternalBlockStorage::read( - Offset blockOffset, - Extent blockExtent, + Offset const &blockOffset, + Extent const &blockExtent, nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, T *data) @@ -232,21 +232,21 @@ void ExternalBlockStorage::sanitizeString(std::string &s) #define OPENPMD_INSTANTIATE_DATATYPEHANDLING(datatypehandling, type) \ template auto ExternalBlockStorage::store( \ - Extent globalExtent, \ - Offset blockOffset, \ - Extent blockExtent, \ - nlohmann::json & fullJsonDataset, \ + Extent const &globalExtent, \ + Offset const &blockOffset, \ + Extent const &blockExtent, \ + nlohmann::json &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ std::optional infix, \ type const *data) -> std::string; \ template void ExternalBlockStorage::read( \ std::string const &identifier, \ - nlohmann::json &fullJsonDataset, \ + nlohmann::json const &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ type *data); \ template void ExternalBlockStorage::read( \ - Offset blockOffset, \ - Extent blockExtent, \ + Offset const &blockOffset, \ + Extent const &blockExtent, \ nlohmann::json const &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ type *data); From 4610ab1bb2fa9fa998b1a25e03a407b06fe5fdfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 15:00:50 +0100 Subject: [PATCH 24/36] Naming fixes --- src/IO/JSON/JSONIOHandlerImpl.cpp | 18 ++++++++++-------- src/toolkit/Aws.cpp | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index fe55892ca6..399cbc7eed 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -353,14 +353,15 @@ namespace auto const &val = *optionalOrElse(get_key(key), [&]() -> nlohmann::json const * { throw error::BackendConfigSchema( - {configLocation, "mode", key}, "Mandatory key."); + {configLocation, "dataset", "mode", key}, + "Mandatory key."); }); return optionalOrElse( lowercase ? openPMD::json::asLowerCaseStringDynamic(val) : openPMD::json::asStringDynamic(val), [&]() -> std::string { throw error::BackendConfigSchema( - {configLocation, "mode", key}, + {configLocation, "dataset", "mode", key}, "Must be of string type."); }); }; @@ -377,7 +378,7 @@ namespace : openPMD::json::asStringDynamic(val), [&]() -> std::string { throw error::BackendConfigSchema( - {configLocation, "mode", key}, + {configLocation, "dataset", "mode", key}, "Must be of string type."); })); }; @@ -391,7 +392,8 @@ namespace if (!val.is_boolean()) { throw error::BackendConfigSchema( - {configLocation, "mode", key}, "Must be of boolean type."); + {configLocation, "dataset", "mode", key}, + "Must be of boolean type."); } static_cast(then)(val.get()); }; @@ -414,7 +416,7 @@ namespace openPMD::internal::AwsBuilder builder( // TODO: bucket_name: introduce expansion pattern for openPMD // file name - get_mandatory("bucket_name", false), + get_mandatory("bucket", false), get_mandatory("access_key_id", false), get_mandatory("secret_access_key", false)); @@ -447,7 +449,7 @@ namespace else { throw error::BackendConfigSchema( - {configLocation, "mode", "scheme"}, + {configLocation, "dataset", "mode", "scheme"}, "Must be either 'http' or 'https'."); } }); @@ -458,7 +460,7 @@ namespace else { throw error::BackendConfigSchema( - {configLocation, "mode", "type"}, + {configLocation, "dataset", "mode", "provider"}, "Must be either 'stdio' or 'aws'."); } @@ -496,7 +498,7 @@ auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) // tracing object m_deferredExternalBlockstorageConfig = std::make_optional( - config.json(), config.originallySpecifiedAs); + mode.json(), mode.originallySpecifiedAs); config.declareFullyRead(); } } diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index e8c12ab16b..5a7c17ead4 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -120,7 +120,7 @@ void ExternalBlockStorageAws::get( -> nlohmann::json { nlohmann::json j; - j["provider"] = "s3"; + j["provider"] = "aws"; if (m_endpoint.has_value()) { j["endpoint"] = *m_endpoint; From ed239cf061d54e7243bf528a14a778c9f4afec40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 15:48:25 +0100 Subject: [PATCH 25/36] wahhh? --- test/SerialIOTest.cpp | 60 ++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 432dd864f3..8c8143776e 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5064,9 +5064,8 @@ this = "should not warn" // BP3 engine writes files, BP4 writes directories REQUIRE(openPMD::auxiliary::file_exists("../samples/jsonConfiguredBP3.bp")); - REQUIRE( - openPMD::auxiliary::directory_exists( - "../samples/jsonConfiguredBP4.bp")); + REQUIRE(openPMD::auxiliary::directory_exists( + "../samples/jsonConfiguredBP4.bp")); std::string readConfigBP3 = R"END( { @@ -5243,6 +5242,27 @@ TEST_CASE("bp4_steps", "[serial][adios2]") void serial_iterator(std::string const &file) { + auto const write_config = R"( +init_aws_api = true +rank_table = "posix_hostname" + +[json.attribute] +mode = "short" + +[json.dataset.mode] +provider = "aws" +access_key_id = "test" +secret_access_key = "test" +endpoint = "http://localhost:4566" +bucket = "simdata" + )"; + auto const read_config = R"( +init_aws_api = true + +[json.dataset.mode] +access_key_id = "test" +secret_access_key = "test" + )"; constexpr Extent::value_type extent = 1000; { Series writeSeries( @@ -5250,7 +5270,7 @@ void serial_iterator(std::string const &file) Access::CREATE_LINEAR #ifndef _WIN32 , - R"({"rank_table": "posix_hostname"})" + write_config #endif ); auto iterations = writeSeries.snapshots(); @@ -5265,7 +5285,7 @@ void serial_iterator(std::string const &file) } } - Series readSeries(file, Access::READ_ONLY); + Series readSeries(file, Access::READ_ONLY, read_config); size_t last_iteration_index = 0; size_t numberOfIterations = 0; @@ -5301,19 +5321,23 @@ void serial_iterator(std::string const &file) TEST_CASE("serial_iterator", "[serial][adios2]") { - for (auto const &t : testedFileExtensions()) - { -#ifdef _WIN32 - serial_iterator("../samples/serial_iterator_filebased_%T." + t); - serial_iterator("../samples/serial_iterator_groupbased." + t); -#else - // Add some regex characters into the file names to see that we can deal - // with that. Don't do that on Windows because Windows does not like - // those characters within file paths. - serial_iterator("../samples/serial_iterator_filebased_+?_%T." + t); - serial_iterator("../samples/serial_iterator_groupbased_+?." + t); -#endif - } + serial_iterator("../samples/serial_iterator.json"); + // for (auto const &t : testedFileExtensions()) + // { + // #ifdef _WIN32 + // serial_iterator("../samples/serial_iterator_filebased_%T." + t); + // serial_iterator("../samples/serial_iterator_groupbased." + t); + // #else + // // Add some regex characters into the file names to see that we + // can deal + // // with that. Don't do that on Windows because Windows does not + // like + // // those characters within file paths. + // serial_iterator("../samples/serial_iterator_filebased_+?_%T." + + // t); serial_iterator("../samples/serial_iterator_groupbased_+?." + + // t); + // #endif + // } } void variableBasedSingleIteration(std::string const &file) From eed60247bc37471aedcee4332dc014653d5f7f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 16:01:15 +0100 Subject: [PATCH 26/36] Fix double initialization of EBS --- src/IO/JSON/JSONIOHandlerImpl.cpp | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 399cbc7eed..a6a9a058ca 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1162,27 +1162,15 @@ void JSONIOHandlerImpl::openFile( auto file = std::get<0>(getPossiblyExisting(name)); - if (m_deferredExternalBlockstorageConfig.has_value()) - { - auto const &contents = obtainJsonContents(file); - auto previousConfig = [&]() -> std::optional { - if (contents->contains("external_storage")) - { - return std::make_optional( - &contents->at("external_storage")); - } - else - { - return std::nullopt; - } - }(); - parse_external_mode( - std::move(*m_deferredExternalBlockstorageConfig), - previousConfig, - backendConfigKey(), - m_datasetMode); - m_attributeMode.m_specificationVia = SpecificationVia::Manually; - } + // Need to access data in order to resolve external block storage + // configuration. EBS for read modes is configured at two places: + // + // 1. In the JSON config (stored at m_deferredExternalBlockstorageConfig) + // 2. In the previous JSON file that we are now opening + // + // Since the configuration may exclusively take place in either of the two + // options, files need to be opened now in any case. + obtainJsonContents(file); associateWithFile(writable, file); From 72b41a3855edc5a6eb1206714116d112fb6198d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 16:13:12 +0100 Subject: [PATCH 27/36] Revert "wahhh?" This reverts commit ed239cf061d54e7243bf528a14a778c9f4afec40. --- test/SerialIOTest.cpp | 60 +++++++++++++------------------------------ 1 file changed, 18 insertions(+), 42 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 8c8143776e..432dd864f3 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5064,8 +5064,9 @@ this = "should not warn" // BP3 engine writes files, BP4 writes directories REQUIRE(openPMD::auxiliary::file_exists("../samples/jsonConfiguredBP3.bp")); - REQUIRE(openPMD::auxiliary::directory_exists( - "../samples/jsonConfiguredBP4.bp")); + REQUIRE( + openPMD::auxiliary::directory_exists( + "../samples/jsonConfiguredBP4.bp")); std::string readConfigBP3 = R"END( { @@ -5242,27 +5243,6 @@ TEST_CASE("bp4_steps", "[serial][adios2]") void serial_iterator(std::string const &file) { - auto const write_config = R"( -init_aws_api = true -rank_table = "posix_hostname" - -[json.attribute] -mode = "short" - -[json.dataset.mode] -provider = "aws" -access_key_id = "test" -secret_access_key = "test" -endpoint = "http://localhost:4566" -bucket = "simdata" - )"; - auto const read_config = R"( -init_aws_api = true - -[json.dataset.mode] -access_key_id = "test" -secret_access_key = "test" - )"; constexpr Extent::value_type extent = 1000; { Series writeSeries( @@ -5270,7 +5250,7 @@ secret_access_key = "test" Access::CREATE_LINEAR #ifndef _WIN32 , - write_config + R"({"rank_table": "posix_hostname"})" #endif ); auto iterations = writeSeries.snapshots(); @@ -5285,7 +5265,7 @@ secret_access_key = "test" } } - Series readSeries(file, Access::READ_ONLY, read_config); + Series readSeries(file, Access::READ_ONLY); size_t last_iteration_index = 0; size_t numberOfIterations = 0; @@ -5321,23 +5301,19 @@ secret_access_key = "test" TEST_CASE("serial_iterator", "[serial][adios2]") { - serial_iterator("../samples/serial_iterator.json"); - // for (auto const &t : testedFileExtensions()) - // { - // #ifdef _WIN32 - // serial_iterator("../samples/serial_iterator_filebased_%T." + t); - // serial_iterator("../samples/serial_iterator_groupbased." + t); - // #else - // // Add some regex characters into the file names to see that we - // can deal - // // with that. Don't do that on Windows because Windows does not - // like - // // those characters within file paths. - // serial_iterator("../samples/serial_iterator_filebased_+?_%T." + - // t); serial_iterator("../samples/serial_iterator_groupbased_+?." + - // t); - // #endif - // } + for (auto const &t : testedFileExtensions()) + { +#ifdef _WIN32 + serial_iterator("../samples/serial_iterator_filebased_%T." + t); + serial_iterator("../samples/serial_iterator_groupbased." + t); +#else + // Add some regex characters into the file names to see that we can deal + // with that. Don't do that on Windows because Windows does not like + // those characters within file paths. + serial_iterator("../samples/serial_iterator_filebased_+?_%T." + t); + serial_iterator("../samples/serial_iterator_groupbased_+?." + t); +#endif + } } void variableBasedSingleIteration(std::string const &file) From 8fdeb39ca1ce22b9cb2a8439a4b8413dca245f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 16:29:58 +0100 Subject: [PATCH 28/36] Actually use slashes in S3 allow --- src/toolkit/Aws.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 5a7c17ead4..c9a7b71fad 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -58,8 +58,9 @@ ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; auto ExternalBlockStorageAws::put( std::string const &identifier, void const *data, size_t len) -> std::string { - auto sanitized = identifier; - ExternalBlockStorage::sanitizeString(sanitized); + auto sanitized = !identifier.empty() && identifier.at(0) == '/' + ? identifier.substr(1) + : identifier; Aws::S3::Model::PutObjectRequest put_request; put_request.SetBucket(m_bucketName); From e1295ffe3c364ecb39b585112a275d5df6c28187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 19:26:50 +0100 Subject: [PATCH 29/36] Warn on unused restart config --- src/IO/JSON/JSONIOHandlerImpl.cpp | 33 ++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index a6a9a058ca..4a2a96da4c 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -144,11 +144,30 @@ namespace return *accum_ptr; } - void warnUnusedJson(openPMD::json::TracingJSON const &jsonConfig) + auto prepend_to_json(nlohmann::json j) -> nlohmann::json + { + return j; + } + + template + auto prepend_to_json(nlohmann::json j, Arg &&arg, Args &&...args) + -> nlohmann::json + { + return nlohmann::json{ + {std::forward(arg), + prepend_to_json(std::move(j), std::forward(args)...)}}; + } + + template + void warnUnusedJson( + openPMD::json::TracingJSON const &jsonConfig, + Args &&...extra_json_hierarchy) { auto shadow = jsonConfig.invertShadow(); if (shadow.size() > 0) { + shadow = prepend_to_json( + std::move(shadow), std::forward(extra_json_hierarchy)...); switch (jsonConfig.originallySpecifiedAs) { case openPMD::json::SupportedLanguages::JSON: @@ -2425,14 +2444,14 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) return std::nullopt; } }(); + auto manual_config = m_deferredExternalBlockstorageConfig.has_value() + ? std::move(*m_deferredExternalBlockstorageConfig) + : openPMD::json::TracingJSON(); parse_external_mode( - m_deferredExternalBlockstorageConfig.has_value() - ? std::move(*m_deferredExternalBlockstorageConfig) - : openPMD::json::TracingJSON(), - previousConfig, - backendConfigKey(), - m_datasetMode); + manual_config, previousConfig, backendConfigKey(), m_datasetMode); + warnUnusedJson(manual_config, "dataset", "mode"); m_attributeMode.m_specificationVia = SpecificationVia::Manually; + m_deferredExternalBlockstorageConfig.reset(); } From 81ab21dcf078c689a7e9fc570a1087818bc712aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 19:27:20 +0100 Subject: [PATCH 30/36] Reapply "wahhh?" This reverts commit 72b41a3855edc5a6eb1206714116d112fb6198d8. --- test/SerialIOTest.cpp | 60 ++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 432dd864f3..8c8143776e 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5064,9 +5064,8 @@ this = "should not warn" // BP3 engine writes files, BP4 writes directories REQUIRE(openPMD::auxiliary::file_exists("../samples/jsonConfiguredBP3.bp")); - REQUIRE( - openPMD::auxiliary::directory_exists( - "../samples/jsonConfiguredBP4.bp")); + REQUIRE(openPMD::auxiliary::directory_exists( + "../samples/jsonConfiguredBP4.bp")); std::string readConfigBP3 = R"END( { @@ -5243,6 +5242,27 @@ TEST_CASE("bp4_steps", "[serial][adios2]") void serial_iterator(std::string const &file) { + auto const write_config = R"( +init_aws_api = true +rank_table = "posix_hostname" + +[json.attribute] +mode = "short" + +[json.dataset.mode] +provider = "aws" +access_key_id = "test" +secret_access_key = "test" +endpoint = "http://localhost:4566" +bucket = "simdata" + )"; + auto const read_config = R"( +init_aws_api = true + +[json.dataset.mode] +access_key_id = "test" +secret_access_key = "test" + )"; constexpr Extent::value_type extent = 1000; { Series writeSeries( @@ -5250,7 +5270,7 @@ void serial_iterator(std::string const &file) Access::CREATE_LINEAR #ifndef _WIN32 , - R"({"rank_table": "posix_hostname"})" + write_config #endif ); auto iterations = writeSeries.snapshots(); @@ -5265,7 +5285,7 @@ void serial_iterator(std::string const &file) } } - Series readSeries(file, Access::READ_ONLY); + Series readSeries(file, Access::READ_ONLY, read_config); size_t last_iteration_index = 0; size_t numberOfIterations = 0; @@ -5301,19 +5321,23 @@ void serial_iterator(std::string const &file) TEST_CASE("serial_iterator", "[serial][adios2]") { - for (auto const &t : testedFileExtensions()) - { -#ifdef _WIN32 - serial_iterator("../samples/serial_iterator_filebased_%T." + t); - serial_iterator("../samples/serial_iterator_groupbased." + t); -#else - // Add some regex characters into the file names to see that we can deal - // with that. Don't do that on Windows because Windows does not like - // those characters within file paths. - serial_iterator("../samples/serial_iterator_filebased_+?_%T." + t); - serial_iterator("../samples/serial_iterator_groupbased_+?." + t); -#endif - } + serial_iterator("../samples/serial_iterator.json"); + // for (auto const &t : testedFileExtensions()) + // { + // #ifdef _WIN32 + // serial_iterator("../samples/serial_iterator_filebased_%T." + t); + // serial_iterator("../samples/serial_iterator_groupbased." + t); + // #else + // // Add some regex characters into the file names to see that we + // can deal + // // with that. Don't do that on Windows because Windows does not + // like + // // those characters within file paths. + // serial_iterator("../samples/serial_iterator_filebased_+?_%T." + + // t); serial_iterator("../samples/serial_iterator_groupbased_+?." + + // t); + // #endif + // } } void variableBasedSingleIteration(std::string const &file) From 2440250347ccbbd40f576696316680e0cac1a7e4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Dec 2025 18:35:27 +0000 Subject: [PATCH 31/36] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test/SerialIOTest.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 8c8143776e..b042f07288 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5064,8 +5064,9 @@ this = "should not warn" // BP3 engine writes files, BP4 writes directories REQUIRE(openPMD::auxiliary::file_exists("../samples/jsonConfiguredBP3.bp")); - REQUIRE(openPMD::auxiliary::directory_exists( - "../samples/jsonConfiguredBP4.bp")); + REQUIRE( + openPMD::auxiliary::directory_exists( + "../samples/jsonConfiguredBP4.bp")); std::string readConfigBP3 = R"END( { From 6e759561f4222307132812c78d55c128534e39f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Dec 2025 11:21:30 +0100 Subject: [PATCH 32/36] Use rank identifier also in JSON strings necessary precondition for MPI merging --- src/toolkit/ExternalBlockStorage.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index cee4a34fd1..4a4d37fa96 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -90,7 +90,7 @@ auto ExternalBlockStorage::store( }(); constexpr size_t padding = 6; - std::string index_as_str = [running_index]() { + std::string index_as_str = [running_index, &infix]() { auto res = std::to_string(running_index); auto size = res.size(); if (size >= padding) @@ -98,6 +98,10 @@ auto ExternalBlockStorage::store( return res; } std::stringstream padded; + if (infix.has_value()) + { + padded << *infix << "--"; + } for (size_t i = 0; i < padding - size; ++i) { padded << '0'; @@ -142,10 +146,6 @@ auto ExternalBlockStorage::store( block["extent"] = blockExtent; std::stringstream filesystem_identifier; filesystem_identifier << path.to_string(); - if (infix.has_value()) - { - filesystem_identifier << "--" << *infix; - } filesystem_identifier << "--" << index_as_str; auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), From c371105df6ff6408945d04c92e8417509bf5129e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Dec 2025 13:30:37 +0100 Subject: [PATCH 33/36] Extract implementation of convert-toml-json to header --- include/openPMD/cli/convert-toml-json.hpp | 119 ++++++++++++++++++++++ src/cli/convert-toml-json.cpp | 69 ++----------- 2 files changed, 128 insertions(+), 60 deletions(-) create mode 100644 include/openPMD/cli/convert-toml-json.hpp diff --git a/include/openPMD/cli/convert-toml-json.hpp b/include/openPMD/cli/convert-toml-json.hpp new file mode 100644 index 0000000000..0b59a2ca22 --- /dev/null +++ b/include/openPMD/cli/convert-toml-json.hpp @@ -0,0 +1,119 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace from_format_to_format +{ +namespace json = openPMD::json; +struct ID +{ + template + static auto call(nlohmann::json const &&val) + // template <> + // auto call(nlohmann::json const &val) -> + // nlohmann::json const& + { + if constexpr (originallySpecifiedAs == json::SupportedLanguages::JSON) + { + return val; + } + else + { + return json::jsonToToml(val); + } + } +}; + +struct switch_ +{ + template + struct other_type; + template + static auto call(nlohmann::json const &&val) + { + return ID::call::value>( + std::move(val)); + } +}; +template <> +struct switch_::other_type +{ + static constexpr json::SupportedLanguages value = + json::SupportedLanguages::TOML; +}; +template <> +struct switch_::other_type +{ + static constexpr json::SupportedLanguages value = + json::SupportedLanguages::JSON; +}; +} // namespace from_format_to_format + +template +class convert_json_toml +{ + static void with_parsed_cmdline_args(std::string jsonOrToml) + { + namespace json = openPMD::json; + auto [config, originallySpecifiedAs] = json::parseOptions( + jsonOrToml, + /* considerFiles = */ true, + /* convertLowercase = */ false); + { + // NOLINTNEXTLINE(bugprone-unused-local-non-trivial-variable) + [[maybe_unused]] auto _ = std::move(jsonOrToml); + } + switch (originallySpecifiedAs) + { + using SL = json::SupportedLanguages; + case SL::JSON: { + auto asToml = json::jsonToToml(config); + std::cout << json::format_toml(asToml); + } + break; + case SL::TOML: + std::cout << config << '\n'; + break; + } + } + +public: + static void run_application( + int argc, char const **argv, void (*print_help_message)(char const *)) + { + std::string jsonOrToml; + switch (argc) + { + case 0: + case 1: + // Just read the whole stream into memory + // Not very elegant, but we'll hold the entire JSON/TOML dataset + // in memory at some point anyway, so it doesn't really matter + { + std::stringbuf readEverything; + std::cin >> &readEverything; + jsonOrToml = readEverything.str(); + } + break; + case 2: + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) + { + print_help_message(argv[1]); + exit(0); + } + jsonOrToml = argv[1]; + break; + default: + throw std::runtime_error( + std::string("Usage: ") + argv[0] + + " [file location or inline JSON/TOML]"); + } + with_parsed_cmdline_args(std::move(jsonOrToml)); + } +}; diff --git a/src/cli/convert-toml-json.cpp b/src/cli/convert-toml-json.cpp index 60017fd8e4..83b4a16ab3 100644 --- a/src/cli/convert-toml-json.cpp +++ b/src/cli/convert-toml-json.cpp @@ -1,55 +1,8 @@ -#include -#include -#include +#include "openPMD/cli/convert-toml-json.hpp" -#include -#include -#include - -namespace json = openPMD::json; - -void parsed_main(std::string jsonOrToml) -{ - auto [config, originallySpecifiedAs] = json::parseOptions( - jsonOrToml, /* considerFiles = */ true, /* convertLowercase = */ false); - { - // NOLINTNEXTLINE(bugprone-unused-local-non-trivial-variable) - [[maybe_unused]] auto _ = std::move(jsonOrToml); - } - switch (originallySpecifiedAs) - { - using SL = json::SupportedLanguages; - case SL::JSON: { - auto asToml = json::jsonToToml(config); - std::cout << json::format_toml(asToml); - } - break; - case SL::TOML: - std::cout << config << '\n'; - break; - } -} - -int main(int argc, char const **argv) +void print_help_message(char const *program_name) { - std::string jsonOrToml; - switch (argc) - { - case 0: - case 1: - // Just read the whole stream into memory - // Not very elegant, but we'll hold the entire JSON/TOML dataset - // in memory at some point anyway, so it doesn't really matter - { - std::stringbuf readEverything; - std::cin >> &readEverything; - jsonOrToml = readEverything.str(); - } - break; - case 2: - if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) - { - std::cout << "Usage: " << std::string(argv[0]) << R"( [json_or_toml] + std::cout << "Usage: " << std::string(program_name) << R"( [json_or_toml] 'json_or_toml' can be a JSON or TOML dataset specified inline or a reference to a file prepended by an '@'. Inline datasets will be interpreted as JSON if they start with an '{', as TOML @@ -60,14 +13,10 @@ Inline dataset specifications can be replaced by input read from stdin. If the input is JSON, then it will be converted to TOML and written to stdout, equivalently from TOML to JSON. )"; - exit(0); - } - jsonOrToml = argv[1]; - break; - default: - throw std::runtime_error( - std::string("Usage: ") + argv[0] + - " [file location or inline JSON/TOML]"); - } - parsed_main(std::move(jsonOrToml)); +} + +int main(int argc, char const **argv) +{ + convert_json_toml::run_application( + argc, argv, print_help_message); } From 854bbdb285255a8939cbc9bb8d2106b411c910aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Dec 2025 13:50:20 +0100 Subject: [PATCH 34/36] Implement merging --- include/openPMD/cli/convert-toml-json.hpp | 47 +++++++++++++++-------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/include/openPMD/cli/convert-toml-json.hpp b/include/openPMD/cli/convert-toml-json.hpp index 0b59a2ca22..5ffcabe127 100644 --- a/include/openPMD/cli/convert-toml-json.hpp +++ b/include/openPMD/cli/convert-toml-json.hpp @@ -6,6 +6,7 @@ #include #include +#include #include namespace from_format_to_format @@ -58,17 +59,11 @@ struct switch_::other_type template class convert_json_toml { - static void with_parsed_cmdline_args(std::string jsonOrToml) + static void + with_parsed_cmdline_args(openPMD::json::ParsedConfig parsed_config) { namespace json = openPMD::json; - auto [config, originallySpecifiedAs] = json::parseOptions( - jsonOrToml, - /* considerFiles = */ true, - /* convertLowercase = */ false); - { - // NOLINTNEXTLINE(bugprone-unused-local-non-trivial-variable) - [[maybe_unused]] auto _ = std::move(jsonOrToml); - } + auto [config, originallySpecifiedAs] = std::move(parsed_config); switch (originallySpecifiedAs) { using SL = json::SupportedLanguages; @@ -83,6 +78,30 @@ class convert_json_toml } } + static auto merge(char const **begin, char const **end) + -> openPMD::json::ParsedConfig + { + namespace json = openPMD::json; + if (begin == end) + { + throw std::runtime_error( + "merge: need at least one JSON/TOML file."); + } + auto config = json::parseOptions( + *begin, + /* considerFiles = */ true, + /* convertLowercase = */ false); + for (++begin; begin != end; ++begin) + { + auto [next, _] = json::parseOptions( + *begin, + /* considerFiles = */ true, + /* convertLowercase = */ false); + json::merge_internal(config.config, next, /* do_prune = */ false); + } + return config; + } + public: static void run_application( int argc, char const **argv, void (*print_help_message)(char const *)) @@ -101,19 +120,15 @@ class convert_json_toml jsonOrToml = readEverything.str(); } break; - case 2: + default: if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) { print_help_message(argv[1]); exit(0); } - jsonOrToml = argv[1]; + auto parsed_config = merge(argv + 1, argv + argc); + with_parsed_cmdline_args(std::move(parsed_config)); break; - default: - throw std::runtime_error( - std::string("Usage: ") + argv[0] + - " [file location or inline JSON/TOML]"); } - with_parsed_cmdline_args(std::move(jsonOrToml)); } }; From f65ce01cc96d880502bffe3515ef16d6303981d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Dec 2025 14:12:30 +0100 Subject: [PATCH 35/36] Add merge-json --- CMakeLists.txt | 1 + include/openPMD/cli/convert-toml-json.hpp | 23 ++++++++++++++++++----- src/cli/convert-toml-json.cpp | 2 +- src/cli/merge-json.cpp | 22 ++++++++++++++++++++++ 4 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 src/cli/merge-json.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 95edb7a266..97f1bdc543 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -713,6 +713,7 @@ set(openPMD_TEST_NAMES set(openPMD_CLI_TOOL_NAMES ls convert-toml-json + merge-json ) set(openPMD_PYTHON_CLI_TOOL_NAMES pipe diff --git a/include/openPMD/cli/convert-toml-json.hpp b/include/openPMD/cli/convert-toml-json.hpp index 5ffcabe127..e632f8107c 100644 --- a/include/openPMD/cli/convert-toml-json.hpp +++ b/include/openPMD/cli/convert-toml-json.hpp @@ -59,6 +59,15 @@ struct switch_::other_type template class convert_json_toml { + static void print(toml::value &val) + { + namespace json = openPMD::json; + std::cout << json::format_toml(val); + } + static void print(nlohmann::json const &val) + { + std::cout << val << '\n'; + } static void with_parsed_cmdline_args(openPMD::json::ParsedConfig parsed_config) { @@ -68,13 +77,17 @@ class convert_json_toml { using SL = json::SupportedLanguages; case SL::JSON: { - auto asToml = json::jsonToToml(config); - std::cout << json::format_toml(asToml); + auto for_print = + FromFormatToFormat::template call(std::move(config)); + print(for_print); + } + break; + case SL::TOML: { + auto for_print = + FromFormatToFormat::template call(std::move(config)); + print(for_print); } break; - case SL::TOML: - std::cout << config << '\n'; - break; } } diff --git a/src/cli/convert-toml-json.cpp b/src/cli/convert-toml-json.cpp index 83b4a16ab3..fbb1562b58 100644 --- a/src/cli/convert-toml-json.cpp +++ b/src/cli/convert-toml-json.cpp @@ -17,6 +17,6 @@ equivalently from TOML to JSON. int main(int argc, char const **argv) { - convert_json_toml::run_application( + convert_json_toml::run_application( argc, argv, print_help_message); } diff --git a/src/cli/merge-json.cpp b/src/cli/merge-json.cpp new file mode 100644 index 0000000000..c9cb2def1f --- /dev/null +++ b/src/cli/merge-json.cpp @@ -0,0 +1,22 @@ +#include "openPMD/cli/convert-toml-json.hpp" + +void print_help_message(char const *program_name) +{ + std::cout << "Usage: " << std::string(program_name) << R"( [json_or_toml]+ +'json_or_toml' can be a JSON or TOML dataset specified inline or a reference +to a file prepended by an '@'. +Inline datasets will be interpreted as JSON if they start with an '{', as TOML +otherwise. Datasets from a file will be interpreted as JSON or TOML depending +on the file ending '.json' or '.toml' respectively. +Inline dataset specifications can be replaced by input read from stdin. + +If the JSON/TOML files are mixed, then the output type (JSON or TOML) will be +determined by the type of the first file. +)"; +} + +int main(int argc, char const **argv) +{ + convert_json_toml::run_application( + argc, argv, print_help_message); +} From dca7c1817b8e8c96bddca32ee92b6e59309bba29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Dec 2025 15:45:43 +0100 Subject: [PATCH 36/36] Create merge script in parallel json output --- src/IO/JSON/JSONIOHandlerImpl.cpp | 70 ++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 4a2a96da4c..49511587c5 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -37,7 +37,11 @@ #include "openPMD/backend/Writable.hpp" #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#if openPMD_USE_FILESYSTEM_HEADER +#include +#endif #include +#include #include #include @@ -2581,7 +2585,7 @@ merge the .json files somehow (no tooling provided for this (yet)). readme_file.open( dirpath + "/README.txt", std::ios_base::out | std::ios_base::trunc); - readme_file << readme_msg + 1; + readme_file << &readme_msg[1]; readme_file.close(); if (!readme_file.good() && !filename.fileState->printedReadmeWarningAlready) @@ -2595,6 +2599,70 @@ merge the .json files somehow (no tooling provided for this (yet)). << readme_msg + 1 << "----------" << std::endl; filename.fileState->printedReadmeWarningAlready = true; } + + constexpr char const *merge_script = R"END( +#!/usr/bin/env bash + +set -euo pipefail + +parallel_dir="$(dirname "$BASH_SOURCE")" +parallel_dir="$(cd "$parallel_dir" && pwd)" +serial_dir="${parallel_dir%.json.parallel}" +if [[ "$serial_dir" = "$parallel_dir" ]]; then + serial_dir="$parallel_dir/merged.json" +else + serial_dir="$serial_dir.json" +fi +echo "Will merge files to '$serial_dir'." >&2 +if [[ -e "$serial_dir" ]]; then + echo "Target dir already exists, aborting." >&2 + exit 1 +fi +if ! which openpmd-merge-json 2>/dev/null; then + echo "Did not find 'openpmd-merge-json' on PATH, aborting." >&2 + exit 1 +fi +for file in "$parallel_dir"/mpi_rank_*.json; do + echo "@$file" +done | + xargs openpmd-merge-json >"$serial_dir" +# TODO: xargs will only work up to a certain number of files)END"; + std::string const merge_script_path = dirpath + "/merge.sh"; + std::fstream merge_file; + merge_file.open( + merge_script_path, std::ios_base::out | std::ios_base::trunc); + merge_file << &merge_script[1]; + merge_file.close(); + + if (!merge_file.good() && + !filename.fileState->printedReadmeWarningAlready) + { + std::cerr + << "[Warning] Something went wrong in trying to create " + "merge script at '" + << merge_script_path << "'. Will ignore and continue." + << std::endl; + filename.fileState->printedReadmeWarningAlready = true; + } + +#if openPMD_USE_FILESYSTEM_HEADER + try + { + std::filesystem::permissions( + merge_script_path, + std::filesystem::perms::owner_exec | + std::filesystem::perms::owner_exec | + std::filesystem::perms::owner_exec, + std::filesystem::perm_options::add); + } + catch (std::filesystem::filesystem_error const &e) + { + std::cerr << "Failed setting executable permissions on '" + << merge_script_path + << "', will ignore. Original error was:\n" + << e.what() << std::endl; + } +#endif } };