Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
766cd9e
wip first commit
franzpoeschel Sep 26, 2025
c2e53d5
Some cleaning
franzpoeschel Sep 26, 2025
6a6330f
Hmm maybe usable
franzpoeschel Sep 26, 2025
4f41e60
Quickly use this in the JSON backend
franzpoeschel Sep 26, 2025
7301e94
Better and generalized handling for datatypes
franzpoeschel Sep 29, 2025
ec47d7d
structure for aws
franzpoeschel Sep 29, 2025
c17553c
first untested implementation for S3
franzpoeschel Sep 29, 2025
39679a0
Reordering
franzpoeschel Sep 29, 2025
6ff99e4
continue restructuring
franzpoeschel Sep 29, 2025
1a47575
Some first little MPI awareness
franzpoeschel Sep 29, 2025
97b8020
WIP: Config for external block storage from JSON
franzpoeschel Sep 29, 2025
b2ccf98
Add configuration
franzpoeschel Sep 30, 2025
2a66841
Add option to init AWS API
franzpoeschel Sep 30, 2025
2a1c520
Add verifySSL parameter
franzpoeschel Oct 1, 2025
99669e3
Add TODO comment
franzpoeschel Oct 1, 2025
57a0505
Add meta information object
franzpoeschel Oct 1, 2025
a5d89c1
Prepare reloading ext block storage from old file
franzpoeschel Dec 5, 2025
7b045fe
Reload config when reading from a JSON file
franzpoeschel Dec 5, 2025
6860212
WIP: Read from EBS
franzpoeschel Dec 8, 2025
7a75578
Base implementation for get()
franzpoeschel Dec 8, 2025
aa17b83
Untested read impl
franzpoeschel Dec 8, 2025
ba1d0ff
Basically working reading
franzpoeschel Dec 8, 2025
4381409
cleanup
franzpoeschel Dec 8, 2025
4610ab1
Naming fixes
franzpoeschel Dec 8, 2025
ed239cf
wahhh?
franzpoeschel Dec 8, 2025
eed6024
Fix double initialization of EBS
franzpoeschel Dec 8, 2025
72b41a3
Revert "wahhh?"
franzpoeschel Dec 8, 2025
8fdeb39
Actually use slashes in S3
franzpoeschel Dec 8, 2025
e1295ff
Warn on unused restart config
franzpoeschel Dec 8, 2025
81ab21d
Reapply "wahhh?"
franzpoeschel Dec 8, 2025
2440250
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 8, 2025
6e75956
Use rank identifier also in JSON strings
franzpoeschel Dec 9, 2025
c371105
Extract implementation of convert-toml-json to header
franzpoeschel Dec 9, 2025
854bbdb
Implement merging
franzpoeschel Dec 9, 2025
f65ce01
Add merge-json
franzpoeschel Dec 9, 2025
dca7c18
Create merge script in parallel json output
franzpoeschel Dec 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,8 @@ else()
endif()
unset(openPMD_REQUIRED_ADIOS2_COMPONENTS)

find_package(AWSSDK REQUIRED COMPONENTS s3)

# external library: pybind11 (optional)
include(${openPMD_SOURCE_DIR}/cmake/dependencies/pybind11.cmake)

Expand Down Expand Up @@ -434,7 +436,12 @@ set(CORE_SOURCE
src/snapshots/IteratorTraits.cpp
src/snapshots/RandomAccessIterator.cpp
src/snapshots/Snapshots.cpp
src/snapshots/StatefulIterator.cpp)
src/snapshots/StatefulIterator.cpp
src/toolkit/ExternalBlockStorage.cpp
src/toolkit/AwsBuilder.cpp
src/toolkit/Aws.cpp
src/toolkit/StdioBuilder.cpp
src/toolkit/Stdio.cpp)
set(IO_SOURCE
src/IO/AbstractIOHandler.cpp
src/IO/AbstractIOHandlerImpl.cpp
Expand Down Expand Up @@ -562,6 +569,8 @@ if(openPMD_HAVE_ADIOS2)
endif()
endif()

target_link_libraries(openPMD PUBLIC ${AWSSDK_LIBRARIES})

# Runtime parameter and API status checks ("asserts")
if(openPMD_USE_VERIFY)
target_compile_definitions(openPMD PRIVATE openPMD_USE_VERIFY=1)
Expand Down Expand Up @@ -704,6 +713,7 @@ set(openPMD_TEST_NAMES
set(openPMD_CLI_TOOL_NAMES
ls
convert-toml-json
merge-json
)
set(openPMD_PYTHON_CLI_TOOL_NAMES
pipe
Expand Down
193 changes: 145 additions & 48 deletions include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@
#include "openPMD/auxiliary/JSON_internal.hpp"
#include "openPMD/backend/Variant_internal.hpp"
#include "openPMD/config.hpp"
#include "openPMD/toolkit/ExternalBlockStorage.hpp"

#include <istream>
#include <nlohmann/json.hpp>
#include <variant>
#if openPMD_HAVE_MPI
#include <mpi.h>
#endif
Expand Down Expand Up @@ -153,8 +155,72 @@ void from_json(const nlohmann::json &j, std::complex<T> &p)
}
} // namespace std

namespace openPMD::internal
{
auto jsonDatatypeToString(Datatype dt) -> std::string;

struct JsonDatatypeHandling
{
template <typename T>
static auto encodeDatatype(nlohmann::json &j) -> bool
{
auto const &needed_datatype =
jsonDatatypeToString(determineDatatype<T>());
if (auto it = j.find("datatype"); it != j.end())
{
return it.value().get<std::string>() == needed_datatype;
}
else
{
j["datatype"] = needed_datatype;
return true;
}
}

template <typename T_required>
static auto checkDatatype(nlohmann::json const &j) -> bool
{
auto const &needed_datatype =
jsonDatatypeToString(determineDatatype<T_required>());
if (auto it = j.find("datatype"); it != j.end())
{
return it.value().get<std::string>() == needed_datatype;
}
else
{
return false;
}
}

template <typename Functor, typename... Args>
static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool
{
if (auto it = j.find("datatype"); it != j.end())
{
switchDatasetType<Functor>(
stringToDatatype(it.value().get<std::string>()),
std::forward<Args>(args)...);
return true;
}
else
{
return false;
}
}
};
} // namespace openPMD::internal

namespace openPMD
{
namespace dataset_mode_types
{
struct Dataset_t
{};
struct Template_t
{};
using External_t = std::shared_ptr<ExternalBlockStorage>;
} // namespace dataset_mode_types

class JSONIOHandlerImpl : public AbstractIOHandlerImpl
{
using json = nlohmann::json;
Expand Down Expand Up @@ -243,42 +309,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl

std::future<void> flush();

private:
#if openPMD_HAVE_MPI
std::optional<MPI_Comm> m_communicator;
#endif

using FILEHANDLE = std::fstream;

// map each Writable to its associated file
// contains only the filename, without the OS path
std::unordered_map<Writable *, File> m_files;

std::unordered_map<File, std::shared_ptr<nlohmann::json>> m_jsonVals;

// files that have logically, but not physically been written to
std::unordered_set<File> m_dirty;

/*
* Is set by constructor.
*/
FileFormat m_fileFormat{};

/*
* Under which key do we find the backend configuration?
* -> "json" for the JSON backend, "toml" for the TOML backend.
*/
std::string backendConfigKey() const;

/*
* First return value: The location of the JSON value (either "json" or
* "toml") Second return value: The value that was maybe found at this place
*/
std::pair<std::string, std::optional<openPMD::json::TracingJSON>>
getBackendConfig(openPMD::json::TracingJSON &) const;

std::string m_originalExtension;

/*
* Was the config value explicitly user-chosen, or are we still working with
* defaults?
Expand All @@ -293,17 +323,36 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
// Dataset IO mode //
/////////////////////

enum class DatasetMode
struct DatasetMode
: std::variant<
dataset_mode_types::Dataset_t,
dataset_mode_types::Template_t,
dataset_mode_types::External_t>
{
Dataset,
Template
using Dataset_t = dataset_mode_types::Dataset_t;
using Template_t = dataset_mode_types::Template_t;
using External_t = dataset_mode_types::External_t;
constexpr static Dataset_t Dataset{};
constexpr static Template_t Template{};

using variant_t = std::variant<
dataset_mode_types::Dataset_t,
dataset_mode_types::Template_t,
External_t>;
using variant_t ::operator=;

// casts needed because of
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90943
inline auto as_base() const -> variant_t const &
{
return *this;
}
inline auto as_base() -> variant_t &
{
return *this;
}
};

// IOMode m_mode{};
// SpecificationVia m_IOModeSpecificationVia =
// SpecificationVia::DefaultValue; bool m_printedSkippedWriteWarningAlready
// = false;

struct DatasetMode_s
{
// Initialized in init()
Expand All @@ -318,8 +367,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
m_mode, m_specificationVia, m_skipWarnings};
}
};
DatasetMode_s m_datasetMode;
DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const;

///////////////////////
// Attribute IO mode //
Expand All @@ -338,8 +385,57 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
AttributeMode m_mode{};
SpecificationVia m_specificationVia = SpecificationVia::DefaultValue;
};
AttributeMode_s m_attributeMode;

private:
#if openPMD_HAVE_MPI
std::optional<MPI_Comm> m_communicator;
#endif

using FILEHANDLE = std::fstream;

// map each Writable to its associated file
// contains only the filename, without the OS path
std::unordered_map<Writable *, File> m_files;

std::unordered_map<File, std::shared_ptr<nlohmann::json>> m_jsonVals;

// files that have logically, but not physically been written to
std::unordered_set<File> m_dirty;

/*
* Is set by constructor.
*/
FileFormat m_fileFormat{};

/*
* Under which key do we find the backend configuration?
* -> "json" for the JSON backend, "toml" for the TOML backend.
*/
std::string backendConfigKey() const;

/*
* First return value: The location of the JSON value (either "json" or
* "toml") Second return value: The value that was maybe found at this place
*/
std::pair<std::string, std::optional<openPMD::json::TracingJSON>>
getBackendConfig(openPMD::json::TracingJSON &) const;
static std::pair<std::string, std::optional<openPMD::json::TracingJSON>>
getBackendConfig(
openPMD::json::TracingJSON &, std::string const &configLocation);

std::string m_originalExtension;

/*
* In read mode, we can only open the external block storage backend upon
* opening the JSON file, because it contains meta information relevant
* for configuring the backend.
*/
std::optional<openPMD::json::TracingJSON>
m_deferredExternalBlockstorageConfig;
DatasetMode_s m_datasetMode;
DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config);

AttributeMode_s m_attributeMode;
AttributeMode_s
retrieveAttributeMode(openPMD::json::TracingJSON &config) const;

Expand Down Expand Up @@ -389,7 +485,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
// essentially: m_i = \prod_{j=0}^{i-1} extent_j
static Extent getMultiplicators(Extent const &extent);

static std::pair<Extent, DatasetMode> getExtent(nlohmann::json &j);
static std::pair<Extent, DatasetMode>
getExtent(nlohmann::json &j, DatasetMode const &baseMode);

// remove single '/' in the beginning and end of a string
static std::string removeSlashes(std::string);
Expand Down
3 changes: 3 additions & 0 deletions include/openPMD/Series.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "openPMD/config.hpp"
#include "openPMD/snapshots/Snapshots.hpp"
#include "openPMD/version.hpp"
#include <aws/core/Aws.h>

#if openPMD_HAVE_MPI
#include <mpi.h>
Expand Down Expand Up @@ -239,6 +240,8 @@ namespace internal
std::optional<std::function<AbstractIOHandler *(Series &)>>
m_deferred_initialization = std::nullopt;

std::optional<Aws::SDKOptions> m_manageAwsAPI = std::nullopt;

void close();

#if openPMD_HAVE_MPI
Expand Down
Loading
Loading