diff --git a/CMakeLists.txt b/CMakeLists.txt index dd3e5a9efd..e5272ae2de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -703,6 +703,7 @@ set(openPMD_EXAMPLE_NAMES 10_streaming_read 12_span_write 13_write_dynamic_configuration + 14_toml_template ) set(openPMD_PYTHON_EXAMPLE_NAMES 2_read_serial @@ -1327,6 +1328,9 @@ if(openPMD_BUILD_TESTING) ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/thetaMode/data_%T.bp \ --outfile ../samples/git-sample/thetaMode/data%T.json \ + --outconfig ' \ + json.attribute.mode = \"short\" \n\ + json.dataset.mode = \"template_no_warn\"' \ " WORKING_DIRECTORY ${openPMD_RUNTIME_OUTPUT_DIRECTORY} ) diff --git a/docs/source/backends/json.rst b/docs/source/backends/json.rst index bbae92aaf6..9b618c9e03 100644 --- a/docs/source/backends/json.rst +++ b/docs/source/backends/json.rst @@ -38,20 +38,47 @@ when working with the JSON backend. Datasets and groups have the same namespace, meaning that there may not be a subgroup and a dataset with the same name contained in one group. -Any **openPMD dataset** is a JSON object with three keys: +Datasets +........ - * ``attributes``: Attributes associated with the dataset. May be ``null`` or not present if no attributes are associated with the dataset. - * ``datatype``: A string describing the type of the stored data. - * ``data`` A nested array storing the actual data in row-major manner. +Datasets can be stored in two modes, either as actual datasets or as dataset templates. +The mode is selected by the :ref:`JSON/TOML parameter` ``json.dataset.mode`` (resp. ``toml.dataset.mode``) with possible values ``["dataset", "template"]`` (default: ``"dataset"``). + +Stored as an actual dataset, an **openPMD dataset** is a JSON object with three JSON keys: + + * ``datatype`` (required): A string describing the type of the stored data. + * ``data`` (required): A nested array storing the actual data in row-major manner. The data needs to be consistent with the fields ``datatype`` and ``extent``. Checking whether this key points to an array can be (and is internally) used to distinguish groups from datasets. + * ``attributes``: Attributes associated with the dataset. May be ``null`` or not present if no attributes are associated with the dataset. + +Stored as a **dataset template**, an openPMD dataset is represented by three JSON keys: + + * ``datatype`` (required): As above. + * ``extent`` (required): A list of integers, describing the extent of the dataset. + This replaces the ``data`` key from the non-template representation. + * ``attributes``: As above. -**Attributes** are stored as a JSON object with a key for each attribute. +This mode stores only the dataset metadata. +Chunk load/store operations are ignored. + +Attributes +.......... + +In order to avoid name clashes, attributes are generally stored within a separate subgroup ``attributes``. + +Attributes can be stored in two formats. +The format is selected by the :ref:`JSON/TOML parameter` ``json.attribute.mode`` (resp. ``toml.attribute.mode``) with possible values ``["long", "short"]`` (default: ``"long"`` for JSON in openPMD 1.*, ``"short"`` otherwise, i.e. generally in openPMD 2.*, but always in TOML). + +Attributes in **long format** store the datatype explicitly, by representing attributes as JSON objects. Every such attribute is itself a JSON object with two keys: * ``datatype``: A string describing the type of the value. * ``value``: The actual value of type ``datatype``. +Attributes in **short format** are stored as just the simple value corresponding with the attribute. +Since JSON/TOML values are pretty-printed into a human-readable format, byte-level type details can be lost when reading those values again later on (e.g. the distinction between different integer types). + TOML File Format ---------------- diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index f6d15a7ac8..8b60d21a90 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -104,6 +104,8 @@ The key ``rank_table`` allows specifying the creation of a **rank table**, used Configuration Structure per Backend ----------------------------------- +Please refer to the respective backends' documentations for further information on their configuration. + .. _backendconfig-adios2: ADIOS2 @@ -231,8 +233,21 @@ The parameters eligible for being passed to flush calls may be configured global .. _backendconfig-other: -Other backends -^^^^^^^^^^^^^^ +JSON/TOML +^^^^^^^^^ -Do currently not read the configuration string. -Please refer to the respective backends' documentations for further information on their configuration. +A full configuration of the JSON backend: + +.. literalinclude:: json.json + :language: json + +The TOML backend is configured analogously, replacing the ``"json"`` key with ``"toml"``. + +All keys found under ``json.dataset`` are applicable globally as well as per dataset. +Explanation of the single keys: + +* ``json.dataset.mode`` / ``toml.dataset.mode``: One of ``"dataset"`` (default) or ``"template"``. + In "dataset" mode, the dataset will be written as an n-dimensional (recursive) array, padded with nulls (JSON) or zeroes (TOML) for missing values. + In "template" mode, only the dataset metadata (type, extent and attributes) are stored and no chunks can be written or read (i.e. write/read operations will be skipped). +* ``json.attribute.mode`` / ``toml.attribute.mode``: One of ``"long"`` (default in openPMD 1.*) or ``"short"`` (default in openPMD 2.* and generally in TOML). + The long format explicitly encodes the attribute type in the dataset on disk, the short format only writes the actual attribute as a JSON/TOML value, requiring readers to recover the type. diff --git a/docs/source/details/json.json b/docs/source/details/json.json new file mode 100644 index 0000000000..c1491f7245 --- /dev/null +++ b/docs/source/details/json.json @@ -0,0 +1,10 @@ +{ + "json": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +} diff --git a/examples/14_toml_template.cpp b/examples/14_toml_template.cpp new file mode 100644 index 0000000000..29d19fb53a --- /dev/null +++ b/examples/14_toml_template.cpp @@ -0,0 +1,111 @@ +#include + +std::string backendEnding() +{ + auto extensions = openPMD::getFileExtensions(); + if (auto it = std::find(extensions.begin(), extensions.end(), "toml"); + it != extensions.end()) + { + return *it; + } + else + { + // Fallback for buggy old NVidia compiler + return "json"; + } +} + +void write() +{ + std::string config = R"( +{ + "iteration_encoding": "variable_based", + "json": { + "dataset": {"mode": "template"}, + "attribute": {"mode": "short"} + }, + "toml": { + "dataset": {"mode": "template"}, + "attribute": {"mode": "short"} + } +} +)"; + + openPMD::Series writeTemplate( + "../samples/tomlTemplate." + backendEnding(), + openPMD::Access::CREATE, + config); + auto iteration = writeTemplate.writeIterations()[0]; + + openPMD::Dataset ds{openPMD::Datatype::FLOAT, {5, 5}}; + + auto temperature = + iteration.meshes["temperature"][openPMD::RecordComponent::SCALAR]; + temperature.resetDataset(ds); + + auto E = iteration.meshes["E"]; + E["x"].resetDataset(ds); + E["y"].resetDataset(ds); + /* + * Don't specify datatype and extent for this one to indicate that this + * information is not yet known. + */ + E["z"].resetDataset({}); + + ds.extent = {10}; + + auto electrons = iteration.particles["e"]; + electrons["position"]["x"].resetDataset(ds); + electrons["position"]["y"].resetDataset(ds); + electrons["position"]["z"].resetDataset(ds); + + electrons["positionOffset"]["x"].resetDataset(ds); + electrons["positionOffset"]["y"].resetDataset(ds); + electrons["positionOffset"]["z"].resetDataset(ds); + electrons["positionOffset"]["x"].makeConstant(3.14); + electrons["positionOffset"]["y"].makeConstant(3.14); + electrons["positionOffset"]["z"].makeConstant(3.14); + + ds.dtype = openPMD::determineDatatype(); + electrons.particlePatches["numParticles"][openPMD::RecordComponent::SCALAR] + .resetDataset(ds); + electrons + .particlePatches["numParticlesOffset"][openPMD::RecordComponent::SCALAR] + .resetDataset(ds); + electrons.particlePatches["offset"]["x"].resetDataset(ds); + electrons.particlePatches["offset"]["y"].resetDataset(ds); + electrons.particlePatches["offset"]["z"].resetDataset(ds); + electrons.particlePatches["extent"]["x"].resetDataset(ds); + electrons.particlePatches["extent"]["y"].resetDataset(ds); + electrons.particlePatches["extent"]["z"].resetDataset(ds); +} + +void read() +{ + /* + * The config is entirely optional, these things are also detected + * automatically when reading + */ + + // std::string config = R"( + // { + // "iteration_encoding": "variable_based", + // "toml": { + // "dataset": {"mode": "template"}, + // "attribute": {"mode": "short"} + // } + // } + // )"; + + openPMD::Series read( + "../samples/tomlTemplate." + backendEnding(), + openPMD::Access::READ_LINEAR); + read.parseBase(); + openPMD::helper::listSeries(read); +} + +int main() +{ + write(); + read(); +} diff --git a/include/openPMD/Dataset.hpp b/include/openPMD/Dataset.hpp index 0032888541..d79380105a 100644 --- a/include/openPMD/Dataset.hpp +++ b/include/openPMD/Dataset.hpp @@ -41,7 +41,24 @@ class Dataset public: enum : std::uint64_t { - JOINED_DIMENSION = std::numeric_limits::max() + /** + * Setting one dimension of the extent as JOINED_DIMENSION means that + * the extent along that dimension will be defined by the sum of all + * parallel processes' contributions. + * Only one dimension can be joined. For store operations, the offset + * should be an empty array and the extent should give the actual + * extent of the chunk (i.e. the number of joined elements along the + * joined dimension, equal to the global extent in all other + * dimensions). For more details, refer to + * docs/source/usage/workflow.rst. + */ + JOINED_DIMENSION = std::numeric_limits::max(), + /** + * Some backends (i.e. JSON and TOML in template mode) support the + * creation of dataset with undefined datatype and extent. + * The extent should be given as {UNDEFINED_EXTENT} for that. + */ + UNDEFINED_EXTENT = std::numeric_limits::max() - 1 }; Dataset(Datatype, Extent, std::string options = "{}"); @@ -49,10 +66,15 @@ class Dataset /** * @brief Constructor that sets the datatype to undefined. * - * Helpful for resizing datasets, since datatypes need not be given twice. + * Helpful for: + * + * 1. Resizing datasets, since datatypes need not be given twice. + * 2. Initializing datasets as undefined, as used by template mode in the + * JSON/TOML backend. In this case, the default (undefined) specification + * for the Extent may be used. * */ - Dataset(Extent); + Dataset(Extent = {UNDEFINED_EXTENT}); Dataset &extend(Extent newExtent); diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp index 649252a877..e8e55457eb 100644 --- a/include/openPMD/IO/AbstractIOHandler.hpp +++ b/include/openPMD/IO/AbstractIOHandler.hpp @@ -201,6 +201,7 @@ class AbstractIOHandler { friend class Series; friend class ADIOS2IOHandlerImpl; + friend class JSONIOHandlerImpl; friend class detail::ADIOS2File; private: diff --git a/include/openPMD/IO/JSON/JSONIOHandler.hpp b/include/openPMD/IO/JSON/JSONIOHandler.hpp index 7cb6870f5b..e22fdb93d1 100644 --- a/include/openPMD/IO/JSON/JSONIOHandler.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandler.hpp @@ -23,6 +23,7 @@ #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" +#include "openPMD/auxiliary/JSON_internal.hpp" #if openPMD_HAVE_MPI #include diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index b67ac9138a..38966e3b82 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -180,6 +180,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string originalExtension); #endif + void init(openPMD::json::TracingJSON config); + ~JSONIOHandlerImpl() override; void @@ -265,8 +267,85 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl */ FileFormat m_fileFormat{}; + /* + * Under which key do we find the backend configuration? + * -> "json" for the JSON backend, "toml" for the TOML backend. + */ + std::string backendConfigKey() const; + + /* + * First return value: The location of the JSON value (either "json" or + * "toml") Second return value: The value that was maybe found at this place + */ + std::pair> + getBackendConfig(openPMD::json::TracingJSON &) const; + std::string m_originalExtension; + /* + * Was the config value explicitly user-chosen, or are we still working with + * defaults? + */ + enum class SpecificationVia + { + DefaultValue, + Manually + }; + + ///////////////////// + // Dataset IO mode // + ///////////////////// + + enum class DatasetMode + { + Dataset, + Template + }; + + // IOMode m_mode{}; + // SpecificationVia m_IOModeSpecificationVia = + // SpecificationVia::DefaultValue; bool m_printedSkippedWriteWarningAlready + // = false; + + struct DatasetMode_s + { + // Initialized in init() + DatasetMode m_mode{}; + SpecificationVia m_specificationVia; + bool m_skipWarnings; + + template + operator std::tuple() + { + return std::tuple{ + m_mode, m_specificationVia, m_skipWarnings}; + } + }; + DatasetMode_s m_datasetMode; + DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + + /////////////////////// + // Attribute IO mode // + /////////////////////// + + enum class AttributeMode + { + Short, + Long + }; + + struct AttributeMode_s + { + // Will be modified in init() based on the openPMD version and the + // active file format (JSON/TOML) + AttributeMode m_mode{}; + SpecificationVia m_specificationVia = SpecificationVia::DefaultValue; + }; + AttributeMode_s m_attributeMode; + + AttributeMode_s + retrieveAttributeMode(openPMD::json::TracingJSON &config) const; + // HELPER FUNCTIONS // will use the IOHandler to retrieve the correct directory. @@ -313,7 +392,7 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // essentially: m_i = \prod_{j=0}^{i-1} extent_j static Extent getMultiplicators(Extent const &extent); - static Extent getExtent(nlohmann::json &j); + static std::pair getExtent(nlohmann::json &j); // remove single '/' in the beginning and end of a string static std::string removeSlashes(std::string); @@ -371,7 +450,7 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // check whether the json reference contains a valid dataset template - void verifyDataset(Param const ¶meters, nlohmann::json &); + DatasetMode verifyDataset(Param const ¶meters, nlohmann::json &); static nlohmann::json platformSpecifics(); diff --git a/include/openPMD/RecordComponent.hpp b/include/openPMD/RecordComponent.hpp index 9072b93a32..ee29a6d7fa 100644 --- a/include/openPMD/RecordComponent.hpp +++ b/include/openPMD/RecordComponent.hpp @@ -173,7 +173,7 @@ class RecordComponent : public BaseRecordComponent * * @return RecordComponent& */ - virtual RecordComponent &resetDataset(Dataset); + RecordComponent &resetDataset(Dataset); uint8_t getDimensionality() const; Extent getExtent() const; diff --git a/include/openPMD/RecordComponent.tpp b/include/openPMD/RecordComponent.tpp index 542503e806..7beaae8b9d 100644 --- a/include/openPMD/RecordComponent.tpp +++ b/include/openPMD/RecordComponent.tpp @@ -21,6 +21,8 @@ #pragma once +#include "openPMD/Datatype.hpp" +#include "openPMD/Error.hpp" #include "openPMD/RecordComponent.hpp" #include "openPMD/Span.hpp" #include "openPMD/auxiliary/Memory.hpp" @@ -93,12 +95,42 @@ inline std::shared_ptr RecordComponent::loadChunk(Offset o, Extent e) #endif } +namespace detail +{ + template + struct do_convert + { + template + static std::optional call(Attribute &attr) + { + if constexpr (std::is_convertible_v) + { + return std::make_optional(attr.get()); + } + else + { + return std::nullopt; + } + } + + static constexpr char const *errorMsg = "is_conversible"; + }; +} // namespace detail + template inline void RecordComponent::loadChunk(std::shared_ptr data, Offset o, Extent e) { Datatype dtype = determineDatatype(data); - if (dtype != getDatatype()) + /* + * For constant components, we implement type conversion, so there is + * a separate check further below. + * This is especially useful for the short-attribute representation in the + * JSON/TOML backends as they might implicitly turn a LONG into an INT in a + * constant component. The frontend needs to catch such edge cases. + * Ref. `if (constant())` branch. + */ + if (dtype != getDatatype() && !constant()) if (!isSameInteger(getDatatype()) && !isSameFloatingPoint(getDatatype()) && !isSameComplexFloatingPoint(getDatatype()) && @@ -160,10 +192,25 @@ RecordComponent::loadChunk(std::shared_ptr data, Offset o, Extent e) for (auto const &dimensionSize : extent) numPoints *= dimensionSize; - T value = rc.m_constantValue.get(); + std::optional val = + switchNonVectorType>( + /* from = */ getDatatype(), rc.m_constantValue); - T *raw_ptr = data.get(); - std::fill(raw_ptr, raw_ptr + numPoints, value); + if (val.has_value()) + { + T *raw_ptr = data.get(); + std::fill(raw_ptr, raw_ptr + numPoints, *val); + } + else + { + std::string const data_type_str = datatypeToString(getDatatype()); + std::string const requ_type_str = + datatypeToString(determineDatatype()); + std::string err_msg = + "Type conversion during chunk loading not possible! "; + err_msg += "Data: " + data_type_str + "; Load as: " + requ_type_str; + throw error::WrongAPIUsage(err_msg); + } } else { diff --git a/include/openPMD/backend/PatchRecordComponent.hpp b/include/openPMD/backend/PatchRecordComponent.hpp index 63875b11e2..5c0cf6bfe7 100644 --- a/include/openPMD/backend/PatchRecordComponent.hpp +++ b/include/openPMD/backend/PatchRecordComponent.hpp @@ -66,8 +66,6 @@ class PatchRecordComponent : public RecordComponent PatchRecordComponent &setUnitSI(double); - PatchRecordComponent &resetDataset(Dataset) override; - uint8_t getDimensionality() const; Extent getExtent() const; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index b1046c4602..a432737188 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -25,12 +25,13 @@ #include "openPMD/Error.hpp" #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/IO/AbstractIOHandlerImpl.hpp" -#include "openPMD/IO/Access.hpp" +#include "openPMD/ThrowError.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/auxiliary/TypeTraits.hpp" +#include "openPMD/backend/Attribute.hpp" #include "openPMD/backend/Writable.hpp" #include @@ -64,6 +65,14 @@ namespace openPMD throw std::runtime_error((TEXT)); \ } +namespace JSONDefaults +{ + using const_str = char const *const; + constexpr const_str openpmd_internal = "__openPMD_internal"; + constexpr const_str DatasetMode = "dataset_mode"; + constexpr const_str AttributeMode = "attribute_mode"; +} // namespace JSONDefaults + namespace { struct DefaultValue @@ -94,7 +103,11 @@ namespace } #endif - static constexpr char const *errorMsg = "JSON default value"; + template + static nlohmann::json call() + { + return 0; + } }; /* @@ -126,34 +139,311 @@ namespace } return *accum_ptr; } + + void warnUnusedJson(openPMD::json::TracingJSON const &jsonConfig) + { + auto shadow = jsonConfig.invertShadow(); + if (shadow.size() > 0) + { + switch (jsonConfig.originallySpecifiedAs) + { + case openPMD::json::SupportedLanguages::JSON: + std::cerr << "Warning: parts of the backend configuration for " + "JSON/TOML backend remain unused:\n" + << shadow << '\n'; + break; + case openPMD::json::SupportedLanguages::TOML: { + auto asToml = openPMD::json::jsonToToml(shadow); + std::cerr << "Warning: parts of the backend configuration for " + "JSON/TOML backend remain unused:\n" + << json::format_toml(asToml) << '\n'; + break; + } + } + } + } + + // Does the same as datatypeToString(), but this makes sure that we don't + // accidentally change the JSON schema by modifying datatypeToString() + std::string jsonDatatypeToString(Datatype dt) + { + switch (dt) + { + using DT = Datatype; + case DT::CHAR: + return "CHAR"; + case DT::UCHAR: + return "UCHAR"; + case DT::SCHAR: + return "SCHAR"; + case DT::SHORT: + return "SHORT"; + case DT::INT: + return "INT"; + case DT::LONG: + return "LONG"; + case DT::LONGLONG: + return "LONGLONG"; + case DT::USHORT: + return "USHORT"; + case DT::UINT: + return "UINT"; + case DT::ULONG: + return "ULONG"; + case DT::ULONGLONG: + return "ULONGLONG"; + case DT::FLOAT: + return "FLOAT"; + case DT::DOUBLE: + return "DOUBLE"; + case DT::LONG_DOUBLE: + return "LONG_DOUBLE"; + case DT::CFLOAT: + return "CFLOAT"; + case DT::CDOUBLE: + return "CDOUBLE"; + case DT::CLONG_DOUBLE: + return "CLONG_DOUBLE"; + case DT::STRING: + return "STRING"; + case DT::VEC_CHAR: + return "VEC_CHAR"; + case DT::VEC_SHORT: + return "VEC_SHORT"; + case DT::VEC_INT: + return "VEC_INT"; + case DT::VEC_LONG: + return "VEC_LONG"; + case DT::VEC_LONGLONG: + return "VEC_LONGLONG"; + case DT::VEC_UCHAR: + return "VEC_UCHAR"; + case DT::VEC_USHORT: + return "VEC_USHORT"; + case DT::VEC_UINT: + return "VEC_UINT"; + case DT::VEC_ULONG: + return "VEC_ULONG"; + case DT::VEC_ULONGLONG: + return "VEC_ULONGLONG"; + case DT::VEC_FLOAT: + return "VEC_FLOAT"; + case DT::VEC_DOUBLE: + return "VEC_DOUBLE"; + case DT::VEC_LONG_DOUBLE: + return "VEC_LONG_DOUBLE"; + case DT::VEC_CFLOAT: + return "VEC_CFLOAT"; + case DT::VEC_CDOUBLE: + return "VEC_CDOUBLE"; + case DT::VEC_CLONG_DOUBLE: + return "VEC_CLONG_DOUBLE"; + case DT::VEC_SCHAR: + return "VEC_SCHAR"; + case DT::VEC_STRING: + return "VEC_STRING"; + case DT::ARR_DBL_7: + return "ARR_DBL_7"; + case DT::BOOL: + return "BOOL"; + case DT::UNDEFINED: + return "UNDEFINED"; + } + return "Unreachable!"; + } } // namespace +auto JSONIOHandlerImpl::retrieveDatasetMode( + openPMD::json::TracingJSON &config) const -> DatasetMode_s +{ + // start with / copy from current config + auto res = m_datasetMode; + DatasetMode &ioMode = res.m_mode; + SpecificationVia &specificationVia = res.m_specificationVia; + bool &skipWarnings = res.m_skipWarnings; + if (auto [configLocation, maybeConfig] = getBackendConfig(config); + maybeConfig.has_value()) + { + auto jsonConfig = maybeConfig.value(); + if (jsonConfig.json().contains("dataset")) + { + auto datasetConfig = jsonConfig["dataset"]; + if (datasetConfig.json().contains("mode")) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + datasetConfig["mode"].json()); + if (!modeOption.has_value()) + { + throw error::BackendConfigSchema( + {configLocation, "mode"}, + "Invalid value of non-string type (accepted values are " + "'dataset' and 'template'."); + } + auto mode = modeOption.value(); + if (mode == "dataset") + { + ioMode = DatasetMode::Dataset; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template") + { + ioMode = DatasetMode::Template; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template_no_warn") + { + ioMode = DatasetMode::Template; + specificationVia = SpecificationVia::Manually; + skipWarnings = true; + } + else + { + throw error::BackendConfigSchema( + {configLocation, "dataset", "mode"}, + "Invalid value: '" + mode + + "' (accepted values are 'dataset' and 'template'."); + } + } + } + } + return res; +} + +auto JSONIOHandlerImpl::retrieveAttributeMode( + openPMD::json::TracingJSON &config) const -> AttributeMode_s +{ + // start with / copy from current config + auto res = m_attributeMode; + AttributeMode &mode = res.m_mode; + SpecificationVia &specificationVia = res.m_specificationVia; + if (auto [configLocation, maybeConfig] = getBackendConfig(config); + maybeConfig.has_value()) + { + auto jsonConfig = maybeConfig.value(); + if (jsonConfig.json().contains("attribute")) + { + auto attributeConfig = jsonConfig["attribute"]; + if (attributeConfig.json().contains("mode")) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + attributeConfig["mode"].json()); + if (!modeOption.has_value()) + { + throw error::BackendConfigSchema( + {configLocation, "mode"}, + "Invalid value of non-string type (accepted values are " + "'dataset' and 'template'."); + } + auto modeCfg = modeOption.value(); + if (modeCfg == "short") + { + mode = AttributeMode::Short; + specificationVia = SpecificationVia::Manually; + } + else if (modeCfg == "long") + { + mode = AttributeMode::Long; + specificationVia = SpecificationVia::Manually; + } + else + { + throw error::BackendConfigSchema( + {configLocation, "attribute", "mode"}, + "Invalid value: '" + modeCfg + + "' (accepted values are 'short' and 'long'."); + } + } + } + } + return res; +} + +std::string JSONIOHandlerImpl::backendConfigKey() const +{ + switch (m_fileFormat) + { + case FileFormat::Json: + return "json"; + case FileFormat::Toml: + return "toml"; + } + throw std::runtime_error("Unreachable!"); +} + +std::pair> +JSONIOHandlerImpl::getBackendConfig(openPMD::json::TracingJSON &config) const +{ + std::string configLocation = backendConfigKey(); + if (config.json().contains(configLocation)) + { + return std::make_pair( + std::move(configLocation), config[configLocation]); + } + else + { + return std::make_pair(std::move(configLocation), std::nullopt); + } +} + JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] openPMD::json::TracingJSON config, + openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} -{} +{ + init(std::move(config)); +} #if openPMD_HAVE_MPI JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, MPI_Comm comm, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] openPMD::json::TracingJSON config, + openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) , m_communicator{comm} , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} -{} +{ + init(std::move(config)); +} #endif +void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) +{ + // set the defaults + switch (m_fileFormat) + { + case FileFormat::Json: + // Set the attribute mode to Long for now, needs to be evaluated + // again when creating a new file, since the openPMD version might + // be specified via Series::setOpenPMD() after initialization of the + // JSON backend. + m_attributeMode.m_mode = AttributeMode::Long; + m_datasetMode.m_mode = DatasetMode::Dataset; + break; + case FileFormat::Toml: + m_attributeMode.m_mode = AttributeMode::Short; + m_datasetMode.m_mode = DatasetMode::Dataset; + break; + } + + // now modify according to config + m_datasetMode = retrieveDatasetMode(config); + m_attributeMode = retrieveAttributeMode(config); + + if (auto [_, backendConfig] = getBackendConfig(config); + backendConfig.has_value()) + { + (void)_; + warnUnusedJson(backendConfig.value()); + } +} + JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; std::future JSONIOHandlerImpl::flush() @@ -179,6 +469,25 @@ void JSONIOHandlerImpl::createFile( access::write(m_handler->m_backendAccess), "[JSON] Creating a file in read-only mode is not possible."); + /* + * Need to resolve this later than init() since the openPMD version might be + * specified after the creation of the IOHandler. + */ + if (m_attributeMode.m_specificationVia == SpecificationVia::DefaultValue) + { + switch (m_fileFormat) + { + case FileFormat::Json: + m_attributeMode.m_mode = + m_handler->m_standard >= OpenpmdStandard::v_2_0_0 + ? AttributeMode::Short + : AttributeMode::Long; + break; + default: + break; + } + } + if (!writable->written) { std::string name = parameters.name + m_originalExtension; @@ -292,6 +601,25 @@ void JSONIOHandlerImpl::createDataset( "JSON", "Joined Arrays currently only supported in ADIOS2"); } + openPMD::json::TracingJSON config = openPMD::json::parseOptions( + parameter.options, /* considerFiles = */ false); + // Retrieves mode from dataset-specific configuration, falls back to global + // value if not defined + DatasetMode_s dm; + auto [localMode, _, skipWarnings] = retrieveDatasetMode(config); + (void)_; + // No use in introducing logic to skip warnings only for one particular + // dataset. If warnings are skipped, then they are skipped consistently. + // Use |= since `false` is the default value and we don't wish to reset + // the flag. + m_datasetMode.m_skipWarnings |= skipWarnings; + + parameter.warnUnusedParameters( + config, + backendConfigKey(), + "Warning: parts of the dataset-specific backend configuration for " + "JSON/TOML backend remain unused"); + if (!writable->written) { /* Sanitize name */ @@ -308,24 +636,49 @@ void JSONIOHandlerImpl::createDataset( } setAndGetFilePosition(writable, name); auto &dset = jsonVal[name]; - dset["datatype"] = datatypeToString(parameter.dtype); - auto extent = parameter.extent; - switch (parameter.dtype) + dset["datatype"] = jsonDatatypeToString(parameter.dtype); + + switch (localMode) { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); + case DatasetMode::Dataset: { + auto extent = parameter.extent; + switch (parameter.dtype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + break; + } + if (parameter.extent.size() != 1 || + parameter.extent[0] != Dataset::UNDEFINED_EXTENT) + { + // TOML does not support nulls, so initialize with zero + dset["data"] = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json ? std::optional{} + : parameter.dtype); + } break; } - default: + case DatasetMode::Template: + if (parameter.extent != Extent{0} && + parameter.extent[0] != Dataset::UNDEFINED_EXTENT) + { + dset["extent"] = parameter.extent; + } + else + { + // no-op + // If extent is empty or no datatype is defined, don't bother + // writing it. + // The datatype is written above anyway. + } break; } - // TOML does not support nulls, so initialize with zero - dset["data"] = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional() - : parameter.dtype); writable->written = true; m_dirty.emplace(file); } @@ -364,9 +717,11 @@ void JSONIOHandlerImpl::extendDataset( refreshFileFromParent(writable); auto &j = obtainJsonContents(writable); + DatasetMode localIOMode; try { - auto datasetExtent = getExtent(j); + Extent datasetExtent; + std::tie(datasetExtent, localIOMode) = getExtent(j); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Cannot change dimensionality of a dataset") @@ -383,28 +738,40 @@ void JSONIOHandlerImpl::extendDataset( throw std::runtime_error( "[JSON] The specified location contains no valid dataset"); } - auto extent = parameters.extent; - auto datatype = stringToDatatype(j["datatype"].get()); - switch (datatype) + + switch (localIOMode) { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); - break; + case DatasetMode::Dataset: { + auto extent = parameters.extent; + auto datatype = stringToDatatype(j["datatype"].get()); + switch (datatype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + // nothing to do + break; + } + // TOML does not support nulls, so initialize with zero + nlohmann::json newData = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json ? std::optional{} + : datatype); + nlohmann::json &oldData = j["data"]; + mergeInto(newData, oldData); + j["data"] = newData; } - default: - // nothing to do - break; + break; + case DatasetMode::Template: { + j["extent"] = parameters.extent; + } + break; } - // TOML does not support nulls, so initialize with zero - nlohmann::json newData = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional() - : datatype); - nlohmann::json &oldData = j["data"]; - mergeInto(newData, oldData); - j["data"] = newData; + writable->written = true; } @@ -700,7 +1067,7 @@ void JSONIOHandlerImpl::openDataset( *parameters.dtype = Datatype(stringToDatatype(datasetJson["datatype"].get())); - *parameters.extent = getExtent(datasetJson); + *parameters.extent = getExtent(datasetJson).first; writable->written = true; } @@ -883,7 +1250,21 @@ void JSONIOHandlerImpl::writeDataset( auto file = refreshFileFromParent(writable); auto &j = obtainJsonContents(writable); - verifyDataset(parameters, j); + switch (verifyDataset(parameters, j)) + { + case DatasetMode::Dataset: + break; + case DatasetMode::Template: + if (!m_datasetMode.m_skipWarnings) + { + std::cerr + << "[JSON/TOML backend: Warning] Trying to write data to a " + "template dataset. Will skip." + << '\n'; + m_datasetMode.m_skipWarnings = true; + } + return; + } switchType(parameters.dtype, j, parameters); @@ -919,30 +1300,334 @@ void JSONIOHandlerImpl::writeAttribute( } nlohmann::json value; switchType(parameter.dtype, value, parameter.resource); - (*jsonVal)[filePosition->id]["attributes"][parameter.name] = { - {"datatype", datatypeToString(parameter.dtype)}, {"value", value}}; + switch (m_attributeMode.m_mode) + { + case AttributeMode::Long: + (*jsonVal)[filePosition->id]["attributes"][parameter.name] = { + {"datatype", jsonDatatypeToString(parameter.dtype)}, + {"value", value}}; + break; + case AttributeMode::Short: + // short form + (*jsonVal)[filePosition->id]["attributes"][parameter.name] = value; + break; + } writable->written = true; m_dirty.emplace(file); } +namespace +{ + struct FillWithZeroes + { + template + static void call(void *ptr, Extent const &extent) + { + T *casted = static_cast(ptr); + size_t flattenedExtent = std::accumulate( + extent.begin(), + extent.end(), + size_t(1), + [](size_t left, size_t right) { return left * right; }); + std::fill_n(casted, flattenedExtent, T{}); + } + + static constexpr char const *errorMsg = + "[JSON Backend] Fill with zeroes."; + }; +} // namespace + void JSONIOHandlerImpl::readDataset( Writable *writable, Parameter ¶meters) { refreshFileFromParent(writable); setAndGetFilePosition(writable); auto &j = obtainJsonContents(writable); - verifyDataset(parameters, j); + DatasetMode localMode = verifyDataset(parameters, j); - try + switch (localMode) { - switchType(parameters.dtype, j["data"], parameters); + case DatasetMode::Template: + std::cerr << "[Warning] Cannot read chunks in Template mode of JSON " + "backend. Will fill with zeroes instead." + << '\n'; + switchNonVectorType( + parameters.dtype, parameters.data.get(), parameters.extent); + return; + case DatasetMode::Dataset: + try + { + switchType(parameters.dtype, j["data"], parameters); + } + catch (json::basic_json::type_error &) + { + throw error::ReadError( + error::AffectedObject::Dataset, + error::Reason::UnexpectedContent, + "JSON", + "The given path does not contain a valid dataset."); + } + break; } - catch (json::basic_json::type_error &) +} + +namespace +{ + /* + * While the short attribute representation is more easily human-readable + * (and ultimately also closer to the idea of JSON), this means that + * recovering the actual datatype of an attribute is now more difficult. + * The functions in this anonymous namespace take care of doing that. + */ + + /* + * Input: Element type `T` that has already been resolved and a JSON value + * `j` containing a flat array with elements of type `T`. + * Output: An openPMD Attribute containing that array. + */ + template + Attribute recoverVectorAttributeFromJson(nlohmann::json const &j) { - throw std::runtime_error( - "[JSON] The given path does not contain a valid dataset."); + if (!j.is_array()) + { + throw std::runtime_error( + "[JSON backend: recoverVectorAttributeFromJson] Internal " + "control flow error."); + } + + if (j.size() == 7 && + (std::is_same_v || + std::is_same_v || + std::is_same_v)) + { + /* + * The JSON value does not contain enough information to distinguish + * ARRAY_DOUBLE_7 from other VECTOR types. Return the array type if + * it applies, the frontend must deal with correctly converting + * to vector types when needed. + */ + std::array res; + for (size_t i = 0; i < 7; ++i) + { + res[i] = j[i].get(); + } + return res; + } + else + { + std::vector res; + res.reserve(j.size()); + for (auto const &i : j) + { + res.push_back(i.get()); + } + return res; + } + } + + /* + * Input: A JSON array whose first element has been found to be some numeric + * type. + * + * We now need to decide, if the array has type unsigned, integer or + * float. All elements need to be inspected for this since the first element + * might be `1`, but the third might be `-3.14`, and we need a datatype + * generic enough to represent all elements. + * + * Output: That datatype as instance of the nlohmann::json::value_t enum. + */ + nlohmann::json::value_t unifyNumericType(nlohmann::json const &j) + { + if (!j.is_array() || j.empty()) + { + throw std::runtime_error( + "[JSON backend: recoverVectorAttributeFromJson] Internal " + "control flow error."); + } + auto dtypeRanking = [](nlohmann::json::value_t dtype) -> unsigned { + switch (dtype) + { + case nlohmann::json::value_t::number_unsigned: + return 0; + case nlohmann::json::value_t::number_integer: + return 1; + case nlohmann::json::value_t::number_float: + return 2; + default: + throw std::runtime_error( + "[JSON backend] Encountered vector with mixed number and " + "non-number datatypes."); + } + }; + auto higherDtype = + [&dtypeRanking]( + nlohmann::json::value_t dt1, + nlohmann::json::value_t dt2) -> nlohmann::json::value_t { + if (dtypeRanking(dt1) > dtypeRanking(dt2)) + { + return dt1; + } + else + { + return dt2; + } + }; + + nlohmann::json::value_t res = j[0].type(); + for (size_t i = 1; i < j.size(); ++i) + { + res = higherDtype(res, j[i].type()); + } + return res; + } + + /* Input: A JSON array `j`, additionally its name for use in error messages + * Output: The array as an openPMD Attribute with adequate recovered + * datatype + */ + Attribute recoverVectorAttributeFromJson( + nlohmann::json const &j, std::string const &nameForErrorMessages) + { + if (j.empty()) + { +#if 0 // probably no need to warn here + std::cerr << "Cannot recover datatype of empty vector without " + "explicit type annotation for attribute '" + << nameForErrorMessages + << "'. Will continue with VEC_INT datatype." + << '\n'; +#endif + /* + * Since an empty array's datatype cannot be recovered without + * type annotations, we need to use some type. + * In that case, use integers. + */ + return std::vector{}; + } + + auto valueType = j[0].type(); + /* + * If the vector is of numeric type, it might happen that the + * first entry is an integer, but a later entry is a float. + * We need to pick the most generic datatype in that case. + */ + if (valueType == nlohmann::json::value_t::number_float || + valueType == nlohmann::json::value_t::number_unsigned || + valueType == nlohmann::json::value_t::number_integer) + { + valueType = unifyNumericType(j); + } + switch (valueType) + { + case nlohmann::json::value_t::null: + throw error::ReadError( + error::AffectedObject::Attribute, + error::Reason::UnexpectedContent, + "JSON", + " Attribute must not be null: '" + nameForErrorMessages + "'."); + case nlohmann::json::value_t::object: + throw error::ReadError( + error::AffectedObject::Attribute, + error::Reason::UnexpectedContent, + "JSON", + " Invalid contained datatype (object) " + "inside vector-type attribute: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::array: + throw error::ReadError( + error::AffectedObject::Attribute, + error::Reason::UnexpectedContent, + "JSON", + " Invalid contained datatype (array) " + "inside vector-type attribute: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::string: + return recoverVectorAttributeFromJson(j); + case nlohmann::json::value_t::boolean: + throw error::ReadError( + error::AffectedObject::Attribute, + error::Reason::UnexpectedContent, + "JSON", + " Attribute must not be vector of bool: " + "'" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::number_integer: + return recoverVectorAttributeFromJson< + nlohmann::json::number_integer_t>(j); + case nlohmann::json::value_t::number_unsigned: + return recoverVectorAttributeFromJson< + nlohmann::json::number_unsigned_t>(j); + case nlohmann::json::value_t::number_float: + return recoverVectorAttributeFromJson< + nlohmann::json::number_float_t>(j); + case nlohmann::json::value_t::binary: + throw error::ReadError( + error::AffectedObject::Attribute, + error::Reason::UnexpectedContent, + "JSON", + " Attribute must not have binary type: " + "'" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::discarded: + throw std::runtime_error( + "Internal JSON parser datatype leaked into JSON " + "value."); + } + throw std::runtime_error("Unreachable!"); } -} + + /* + * Read a shorthand-type JSON attribute into an openPMD attribute, + * recovering the datatype from the JSON value. + * Note that precise datatype-preserving roundtrips are not possible due to + * JSON not encoding byte-level type details. + */ + Attribute recoverAttributeFromJson( + nlohmann::json const &j, std::string const &nameForErrorMessages) + { + // @todo use ReadError once it's mainlined + switch (j.type()) + { + case nlohmann::json::value_t::null: + throw error::ReadError( + error::AffectedObject::Attribute, + error::Reason::UnexpectedContent, + "JSON", + "Attribute must not be null: '" + nameForErrorMessages + "'."); + case nlohmann::json::value_t::object: + throw error::ReadError( + error::AffectedObject::Attribute, + error::Reason::UnexpectedContent, + "JSON", + "Shorthand-style attribute must not be an " + "object: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::array: + return recoverVectorAttributeFromJson(j, nameForErrorMessages); + case nlohmann::json::value_t::string: + return j.get(); + case nlohmann::json::value_t::boolean: + return j.get(); + case nlohmann::json::value_t::number_integer: + return j.get(); + case nlohmann::json::value_t::number_unsigned: + return j.get(); + case nlohmann::json::value_t::number_float: + return j.get(); + case nlohmann::json::value_t::binary: + throw error::ReadError( + error::AffectedObject::Attribute, + error::Reason::UnexpectedContent, + "JSON", + " Attribute must not have binary type: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::discarded: + throw std::runtime_error( + "Internal JSON parser datatype leaked into JSON value."); + } + throw std::runtime_error("Unreachable!"); + } +} // namespace void JSONIOHandlerImpl::readAttribute( Writable *writable, Parameter ¶meters) @@ -968,9 +1653,19 @@ void JSONIOHandlerImpl::readAttribute( auto &j = jsonLoc[name]; try { - *parameters.dtype = - Datatype(stringToDatatype(j["datatype"].get())); - switchType(*parameters.dtype, j["value"], parameters); + if (j.is_object()) + { + *parameters.dtype = + Datatype(stringToDatatype(j["datatype"].get())); + switchType( + *parameters.dtype, j["value"], parameters); + } + else + { + Attribute attr = recoverAttributeFromJson(j, name); + *parameters.dtype = attr.dtype; + *parameters.resource = attr.getResource(); + } } catch (json::type_error &) { @@ -1196,28 +1891,44 @@ Extent JSONIOHandlerImpl::getMultiplicators(Extent const &extent) return res; } -Extent JSONIOHandlerImpl::getExtent(nlohmann::json &j) +auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) + -> std::pair { Extent res; - nlohmann::json *ptr = &j["data"]; - while (ptr->is_array()) + DatasetMode ioMode; + if (j.contains("data")) + { + ioMode = DatasetMode::Dataset; + nlohmann::json *ptr = &j["data"]; + while (ptr->is_array()) + { + res.push_back(ptr->size()); + ptr = &(*ptr)[0]; + } + switch (stringToDatatype(j["datatype"].get())) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: + // the last "dimension" is only the two entries for the complex + // number, so remove that again + res.erase(res.end() - 1); + break; + default: + break; + } + } + else if (j.contains("extent")) { - res.push_back(ptr->size()); - ptr = &(*ptr)[0]; + ioMode = DatasetMode::Template; + res = j["extent"].get(); } - switch (stringToDatatype(j["datatype"].get())) + else { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: - // the last "dimension" is only the two entries for the complex - // number, so remove that again - res.erase(res.end() - 1); - break; - default: - break; + ioMode = DatasetMode::Template; + res = {Dataset::UNDEFINED_EXTENT}; } - return res; + return std::make_pair(std::move(res), ioMode); } std::string JSONIOHandlerImpl::removeSlashes(std::string s) @@ -1354,6 +2065,79 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) auto res = serialImplementation(); #endif + if (res->contains(JSONDefaults::openpmd_internal)) + { + auto const &openpmd_internal = res->at(JSONDefaults::openpmd_internal); + + // Init dataset mode according to file's default. + // Note that dataset parsing will expect and properly deal with both + // representations. The mode to be detected here will determine the the + // layout of newly created datasets, e.g. in READ_WRITE or APPEND mode. + if (m_datasetMode.m_specificationVia == + SpecificationVia::DefaultValue && + openpmd_internal.contains(JSONDefaults::DatasetMode)) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + openpmd_internal.at(JSONDefaults::DatasetMode)); + if (!modeOption.has_value()) + { + std::cerr + << "[JSON/TOML backend] Warning: Invalid value of " + "non-string type at internal meta table for entry '" + << JSONDefaults::DatasetMode + << "'. Will ignore and continue.\n"; + } + else if (modeOption.value() == "dataset") + { + m_datasetMode.m_mode = DatasetMode::Dataset; + } + else if (modeOption.value() == "template") + { + m_datasetMode.m_mode = DatasetMode::Template; + } + else + { + std::cerr << "[JSON/TOML backend] Warning: Invalid value '" + << modeOption.value() + << "' at internal meta table for entry '" + << JSONDefaults::DatasetMode + << "'. Will ignore and continue." << '\n'; + } + } + + // Same for attribute mode + if (m_attributeMode.m_specificationVia == + SpecificationVia::DefaultValue && + openpmd_internal.contains(JSONDefaults::AttributeMode)) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + openpmd_internal.at(JSONDefaults::AttributeMode)); + if (!modeOption.has_value()) + { + std::cerr + << "[JSON/TOML backend] Warning: Invalid value of " + "non-string type at internal meta table for entry '" + << JSONDefaults::AttributeMode + << "'. Will ignore and continue." << '\n'; + } + else if (modeOption.value() == "long") + { + m_attributeMode.m_mode = AttributeMode::Long; + } + else if (modeOption.value() == "short") + { + m_attributeMode.m_mode = AttributeMode::Short; + } + else + { + std::cerr << "[JSON/TOML backend] Warning: Invalid value '" + << modeOption.value() + << "' at internal meta table for entry '" + << JSONDefaults::DatasetMode + << "'. Will ignore and continue." << '\n'; + } + } + } m_jsonVals.emplace(file, res); return res; } @@ -1379,7 +2163,30 @@ auto JSONIOHandlerImpl::putJsonContents( return it; } - (*it->second)["platform_byte_widths"] = platformSpecifics(); + switch (m_datasetMode.m_mode) + { + case DatasetMode::Dataset: + (*it->second)["platform_byte_widths"] = platformSpecifics(); + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "dataset"; + break; + case DatasetMode::Template: + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "template"; + break; + } + + switch (m_attributeMode.m_mode) + { + case AttributeMode::Short: + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::AttributeMode] = "short"; + break; + case AttributeMode::Long: + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::AttributeMode] = "long"; + break; + } auto writeSingleFile = [this, &it](std::string const &writeThisFile) { auto [fh, _, fh_with_precision] = @@ -1496,6 +2303,7 @@ merge the .json files somehow (no tooling provided for this (yet)). #else serialImplementation(); #endif + if (unsetDirty) { m_dirty.erase(filename); @@ -1582,8 +2390,8 @@ bool JSONIOHandlerImpl::isDataset(nlohmann::json const &j) { return false; } - auto i = j.find("data"); - return i != j.end() && i.value().is_array(); + auto i = j.find("datatype"); + return i != j.end() && i.value().is_string(); } bool JSONIOHandlerImpl::isGroup(nlohmann::json::const_iterator const &it) @@ -1594,21 +2402,24 @@ bool JSONIOHandlerImpl::isGroup(nlohmann::json::const_iterator const &it) { return false; } - auto i = j.find("data"); - return i == j.end() || !i.value().is_array(); + + auto i = j.find("datatype"); + return i == j.end() || !i.value().is_string(); } template -void JSONIOHandlerImpl::verifyDataset( - Param const ¶meters, nlohmann::json &j) +auto JSONIOHandlerImpl::verifyDataset( + Param const ¶meters, nlohmann::json &j) -> DatasetMode { VERIFY_ALWAYS( isDataset(j), "[JSON] Specified dataset does not exist or is not a dataset."); + DatasetMode res; try { - auto datasetExtent = getExtent(j); + Extent datasetExtent; + std::tie(datasetExtent, res) = getExtent(j); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Read/Write request does not fit the dataset's dimension"); @@ -1630,6 +2441,7 @@ void JSONIOHandlerImpl::verifyDataset( throw std::runtime_error( "[JSON] The given path does not contain a valid dataset."); } + return res; } nlohmann::json JSONIOHandlerImpl::platformSpecifics() @@ -1655,7 +2467,7 @@ nlohmann::json JSONIOHandlerImpl::platformSpecifics() Datatype::BOOL}; for (auto it = std::begin(datatypes); it != std::end(datatypes); it++) { - res[datatypeToString(*it)] = toBytes(*it); + res[jsonDatatypeToString(*it)] = toBytes(*it); } return res; } @@ -1711,7 +2523,7 @@ nlohmann::json JSONIOHandlerImpl::CppToJSON::operator()(const T &val) } template -nlohmann::json JSONIOHandlerImpl::CppToJSON >::operator()( +nlohmann::json JSONIOHandlerImpl::CppToJSON>::operator()( const std::vector &v) { nlohmann::json j; @@ -1724,7 +2536,7 @@ nlohmann::json JSONIOHandlerImpl::CppToJSON >::operator()( } template -nlohmann::json JSONIOHandlerImpl::CppToJSON >::operator()( +nlohmann::json JSONIOHandlerImpl::CppToJSON>::operator()( const std::array &v) { nlohmann::json j; @@ -1743,7 +2555,7 @@ T JSONIOHandlerImpl::JsonToCpp::operator()(nlohmann::json const &json) } template -std::vector JSONIOHandlerImpl::JsonToCpp >::operator()( +std::vector JSONIOHandlerImpl::JsonToCpp>::operator()( nlohmann::json const &json) { std::vector v; @@ -1756,7 +2568,7 @@ std::vector JSONIOHandlerImpl::JsonToCpp >::operator()( } template -std::array JSONIOHandlerImpl::JsonToCpp >::operator()( +std::array JSONIOHandlerImpl::JsonToCpp>::operator()( nlohmann::json const &json) { std::array a; diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp index 0387268514..fc17909fc6 100644 --- a/src/RecordComponent.cpp +++ b/src/RecordComponent.cpp @@ -104,15 +104,20 @@ RecordComponent &RecordComponent::resetDataset(Dataset d) rc.m_hasBeenExtended = true; } - if (d.dtype == Datatype::UNDEFINED) + if (d.extent.empty()) + throw std::runtime_error("Dataset extent must be at least 1D."); + if (d.empty()) { - throw error::WrongAPIUsage( - "[RecordComponent] Must set specific datatype."); + if (d.dtype != Datatype::UNDEFINED) + { + return makeEmpty(std::move(d)); + } + else + { + rc.m_dataset = std::move(d); + return *this; + } } - // if( d.extent.empty() ) - // throw std::runtime_error("Dataset extent must be at least 1D."); - if (d.empty()) - return makeEmpty(std::move(d)); rc.m_isEmpty = false; if (written()) diff --git a/src/Series.cpp b/src/Series.cpp index cfd92f84e7..792be0555f 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -3238,19 +3238,13 @@ auto Series::currentSnapshot() const if (series.iterations.containsAttribute("snapshot")) { auto const &attribute = series.iterations.getAttribute("snapshot"); - switch (attribute.dtype) + auto res = attribute.getOptional(); + if (res.has_value()) { - case Datatype::ULONGLONG: - case Datatype::VEC_ULONGLONG: { - auto const &vec = attribute.get>(); - return vec_t{vec.begin(), vec.end()}; + return res.value(); } - case Datatype::ULONG: - case Datatype::VEC_ULONG: { - auto const &vec = attribute.get>(); - return vec_t{vec.begin(), vec.end()}; - } - default: { + else + { std::stringstream s; s << "Unexpected datatype for '/data/snapshot': " << attribute.dtype << " (expected a vector of integer, found " + @@ -3262,7 +3256,6 @@ auto Series::currentSnapshot() const {}, s.str()); } - } } else { diff --git a/src/backend/PatchRecordComponent.cpp b/src/backend/PatchRecordComponent.cpp index af19923fad..2ac202e44a 100644 --- a/src/backend/PatchRecordComponent.cpp +++ b/src/backend/PatchRecordComponent.cpp @@ -34,23 +34,6 @@ PatchRecordComponent &PatchRecordComponent::setUnitSI(double usi) return *this; } -PatchRecordComponent &PatchRecordComponent::resetDataset(Dataset d) -{ - if (written()) - throw std::runtime_error( - "A Records Dataset can not (yet) be changed after it has been " - "written."); - if (d.extent.empty()) - throw std::runtime_error("Dataset extent must be at least 1D."); - if (d.empty()) - throw std::runtime_error( - "Dataset extent must not be zero in any dimension."); - - get().m_dataset = std::move(d); - setDirty(true); - return *this; -} - uint8_t PatchRecordComponent::getDimensionality() const { return 1; diff --git a/test/JSONTest.cpp b/test/JSONTest.cpp index 161f1fa3a3..067919bd89 100644 --- a/test/JSONTest.cpp +++ b/test/JSONTest.cpp @@ -1,6 +1,7 @@ #include "openPMD/auxiliary/JSON.hpp" #include "openPMD/Error.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" +#include "openPMD/helper/list_series.hpp" #include "openPMD/openPMD.hpp" #include @@ -9,6 +10,7 @@ #include #include #include +#include #include #include @@ -306,3 +308,203 @@ TEST_CASE("variableBasedModifiedSnapshot", "[auxiliary]") testRead(std::vector{1, 2, 3, 4, 5}); } + +namespace auxiliary +{ +template +void test_matrix_impl(Callable &callable, AccumulatorTuple tuple) +{ + std::apply(callable, std::move(tuple)); +} + +template < + typename Callable, + typename AccumulatorTuple, + typename Arg, + typename... Args> +void test_matrix_impl( + Callable &callable, + AccumulatorTuple tuple, + std::vector const &arg, + std::vector const &...args) +{ + for (auto &val : arg) + { + test_matrix_impl( + callable, std::tuple_cat(tuple, std::tuple{val}), args...); + } +} + +template +void test_matrix(Callable &&callable, std::vector const &...matrix) +{ + // no std::forward, callable is called multiple times, so the impl takes + // a simple reference + test_matrix_impl(callable, std::tuple<>(), matrix...); +} +} // namespace auxiliary + +void json_short_modes( + std::optional short_attributes, + std::optional template_datasets, + std::string const &standardVersion, + std::string const &backend, + unsigned int *name_counter) +{ + nlohmann::json config = nlohmann::json::object(); + if (short_attributes.has_value()) + { + config[backend]["attribute"]["mode"] = + *short_attributes ? "short" : "long"; + } + if (template_datasets.has_value()) + { + config[backend]["dataset"]["mode"] = + *template_datasets ? "template" : "dataset"; + } + std::string name = "../samples/json_short_modes/test" + + std::to_string((*name_counter)++) + "." + backend; + + auto config_str = [&]() { + std::stringstream res; + res << config; + return res.str(); + }(); + Series output(name, Access::CREATE, config_str); + output.setOpenPMD(standardVersion); + auto iteration = output.writeIterations()[0]; + + auto default_configured = iteration.meshes["default_configured"]; + Dataset ds1(Datatype::INT, {5}); + default_configured.resetDataset(ds1); + + auto explicitly_templated = iteration.meshes["explicitly_templated"]; + Dataset ds2 = ds1; + ds2.options = + R"({")" + backend + R"(": {"dataset": {"mode": "template"}}})"; + explicitly_templated.resetDataset(ds2); + + auto explicitly_not_templated = + iteration.meshes["explicitly_not_templated"]; + Dataset ds3 = ds1; + ds3.options = R"({")" + backend + R"(": {"dataset": {"mode": "dataset"}}})"; + explicitly_not_templated.resetDataset(ds3); + + auto undefined_dataset = iteration.meshes["undefined_dataset"]; + Dataset d4(Datatype::UNDEFINED, {Dataset::UNDEFINED_EXTENT}); + undefined_dataset.resetDataset(d4); + + output.close(); + + bool expect_template_datasets = template_datasets.value_or(false); + bool expect_short_attributes = short_attributes.value_or( + backend == "toml" || standardVersion == "2.0.0"); + + nlohmann::json resulting_dataset = [&]() { + std::fstream handle; + handle.open(name, std::ios_base::binary | std::ios_base::in); + if (backend == "json") + { + nlohmann::json res; + handle >> res; + return res; + } + else + { + auto toml_val = toml::parse(handle, name); + return json::tomlToJson(toml_val); + } + }(); + + if (expect_short_attributes) + { + REQUIRE( + resulting_dataset["attributes"]["openPMD"] == + nlohmann::json::string_t{standardVersion}); + REQUIRE( + resulting_dataset["__openPMD_internal"]["attribute_mode"] == + nlohmann::json::string_t{"short"}); + } + else + { + REQUIRE( + resulting_dataset["attributes"]["openPMD"] == + nlohmann::json{{"datatype", "STRING"}, {"value", standardVersion}}); + REQUIRE( + resulting_dataset["__openPMD_internal"]["attribute_mode"] == + nlohmann::json::string_t{"long"}); + } + + auto verify_full_dataset = [&](nlohmann::json const &j) { + REQUIRE(j["datatype"] == "INT"); + if (backend == "json") + { + nlohmann::json null; + REQUIRE( + j["data"] == + nlohmann::json::array_t{null, null, null, null, null}); + } + else + { + REQUIRE(j["data"] == nlohmann::json::array_t{0, 0, 0, 0, 0}); + } + // `data` key, `datatype` key, and `attributes` key + REQUIRE(j.size() == 3); + }; + auto verify_template_dataset = [](nlohmann::json const &j) { + REQUIRE(j["datatype"] == "INT"); + REQUIRE(j["extent"] == nlohmann::json::array_t{5}); + // `extent` key, `datatype` key, and `attributes` key + REQUIRE(j.size() == 3); + }; + + // Undefined datasets write neither `extent` nor `data` key, so they are + // not distinguished between template and nontemplate mode. + REQUIRE( + resulting_dataset["data"]["0"]["meshes"]["undefined_dataset"] + ["datatype"] == nlohmann::json::string_t{"UNDEFINED"}); + REQUIRE( + resulting_dataset["data"]["0"]["meshes"]["undefined_dataset"].size() == + 2); + if (expect_template_datasets) + { + REQUIRE( + resulting_dataset["__openPMD_internal"]["dataset_mode"] == + nlohmann::json::string_t{"template"}); + verify_template_dataset( + resulting_dataset["data"]["0"]["meshes"]["default_configured"]); + } + else + { + REQUIRE( + resulting_dataset["__openPMD_internal"]["dataset_mode"] == + nlohmann::json::string_t{"dataset"}); + verify_full_dataset( + resulting_dataset["data"]["0"]["meshes"]["default_configured"]); + } + verify_template_dataset( + resulting_dataset["data"]["0"]["meshes"]["explicitly_templated"]); + verify_full_dataset( + resulting_dataset["data"]["0"]["meshes"]["explicitly_not_templated"]); + + Series read(name, Access::READ_ONLY); + helper::listSeries(read); +} + +TEST_CASE("json_short_modes") +{ + unsigned int name_counter = 0; + ::auxiliary::test_matrix( + &json_short_modes, + std::vector>{std::nullopt, true, false}, + std::vector>{std::nullopt, true, false}, + std::vector{getStandardDefault(), getStandardMaximum()}, + std::vector{ + "json" +#if !__NVCOMPILER // see https://github.com/ToruNiina/toml11/issues/205 + , + "toml" +#endif + }, + std::vector{&name_counter}); +} diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 41de1bdbcd..9c06ac0e3e 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -2,7 +2,7 @@ #include "openPMD/ChunkInfo_internal.hpp" #include "openPMD/Datatype.hpp" #include "openPMD/IO/Access.hpp" -#include "openPMD/UnitDimension.hpp" +#include "openPMD/auxiliary/JSON.hpp" #if openPMD_USE_INVASIVE_TESTS #define OPENPMD_private public: #define OPENPMD_protected public: @@ -1276,13 +1276,24 @@ TEST_CASE("particle_patches", "[serial]") } } -inline void dtype_test(const std::string &backend) +inline void dtype_test( + const std::string &backend, + std::optional activateTemplateMode = {}) { bool test_long_double = backend != "json" && backend != "toml"; bool test_long_long = (backend != "json") || sizeof(long long) <= 8; { - Series s = Series("../samples/dtype_test." + backend, Access::CREATE); - + Series s = activateTemplateMode.has_value() + ? Series( + "../samples/dtype_test." + backend, + Access::CREATE, + activateTemplateMode.value()) + : + // test TOML long attribute mode by default + Series( + "../samples/dtype_test." + backend, + Access::CREATE, + R"({"toml":{"attribute":{"mode":"long"}}})"); char c = 'c'; s.setAttribute("char", c); unsigned char uc = 'u'; @@ -1403,8 +1414,12 @@ inline void dtype_test(const std::string &backend) } } - Series s = Series("../samples/dtype_test." + backend, Access::READ_ONLY); - + Series s = activateTemplateMode.has_value() + ? Series( + "../samples/dtype_test." + backend, + Access::READ_ONLY, + activateTemplateMode.value()) + : Series("../samples/dtype_test." + backend, Access::READ_ONLY); REQUIRE(s.getAttribute("char").get() == 'c'); REQUIRE(s.getAttribute("uchar").get() == 'u'); REQUIRE(s.getAttribute("schar").get() == 's'); @@ -1474,6 +1489,10 @@ inline void dtype_test(const std::string &backend) REQUIRE(s.getAttribute("bool").get() == true); REQUIRE(s.getAttribute("boolF").get() == false); + if (activateTemplateMode.has_value()) + { + return; + } // same implementation types (not necessary aliases) detection #if !defined(_MSC_VER) REQUIRE(s.getAttribute("short").dtype == Datatype::SHORT); @@ -1546,6 +1565,17 @@ TEST_CASE("dtype_test", "[serial]") { dtype_test(t); } + dtype_test("json", R"( +{ + "json": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +})"); if (auto extensions = getFileExtensions(); std::find(extensions.begin(), extensions.end(), "toml") != extensions.end()) @@ -1554,6 +1584,17 @@ TEST_CASE("dtype_test", "[serial]") * testing it here. */ dtype_test("toml"); + dtype_test("toml", R"( +{ + "toml": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +})"); } } @@ -1566,12 +1607,13 @@ struct ReadFromAnyType } }; -inline void write_test(const std::string &backend) +inline void write_test( + const std::string &backend, + std::string jsonCfg = "{}", + bool test_rank_table = true) { -#ifdef _WIN32 - std::string jsonCfg = "{}"; -#else - std::string jsonCfg = R"({"rank_table": "posix_hostname"})"; +#ifndef _WIN32 + jsonCfg = json::merge(jsonCfg, R"({"rank_table": "posix_hostname"})"); chunk_assignment::RankMeta compare{ {0, host_info::byMethod( @@ -1604,8 +1646,10 @@ inline void write_test(const std::string &backend) return posOff++; }); std::shared_ptr positionOffset_local_1(new uint64_t); - e_1["positionOffset"]["x"].resetDataset( - Dataset(determineDatatype(positionOffset_local_1), {4})); + e_1["positionOffset"]["x"].resetDataset(Dataset( + determineDatatype(positionOffset_local_1), + {4}, + R"({"json":{"dataset":{"mode":"dataset"}}})")); for (uint64_t i = 0; i < 4; ++i) { @@ -1691,7 +1735,10 @@ inline void write_test(const std::string &backend) variantTypeDataset); #ifndef _WIN32 - REQUIRE(read.rankTable(/* collective = */ false) == compare); + if (test_rank_table) + { + REQUIRE(read.rankTable(/* collective = */ false) == compare); + } #endif } @@ -1699,7 +1746,41 @@ TEST_CASE("write_test", "[serial]") { for (auto const &t : testedFileExtensions()) { - write_test(t); + if (t == "json") + { + write_test( + "template." + t, + R"( +{ + "json": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +})", + false); + write_test( + t, + R"( +{ + "json": { + "dataset": { + "mode": "dataset" + }, + "attribute": { + "mode": "short" + } + } +})", + true); + } + else + { + write_test(t); + } Series list{"../samples/serial_write." + t, Access::READ_ONLY}; helper::listSeries(list); } @@ -1855,7 +1936,7 @@ inline void fileBased_write_test(const std::string &backend) Series o = Series( "../samples/subdir/serial_fileBased_write%03T." + backend, Access::CREATE, - jsonCfg); + json::merge(jsonCfg, R"({"toml":{"dataset":{"mode":"dataset"}}})")); REQUIRE( auxiliary::replace_all(o.myPath().filePath(), "\\", "/") == auxiliary::replace_all( @@ -7513,7 +7594,10 @@ void groupbased_read_write(std::string const &ext) std::string filename = "../samples/groupbased_read_write." + ext; { - Series write(filename, Access::CREATE); + Series write( + filename, + Access::CREATE, + R"({"toml":{"dataset":{"mode":"dataset"}}})"); auto E_x = write.iterations[0].meshes["E"]["x"]; auto E_y = write.iterations[0].meshes["E"]["y"]; E_x.resetDataset(ds); @@ -7528,7 +7612,10 @@ void groupbased_read_write(std::string const &ext) } { - Series write(filename, Access::READ_WRITE); + Series write( + filename, + Access::READ_WRITE, + R"({"toml":{"dataset":{"mode":"dataset"}}})"); // create a new iteration auto E_x = write.iterations[1].meshes["E"]["x"]; E_x.resetDataset(ds); @@ -7574,7 +7661,10 @@ void groupbased_read_write(std::string const &ext) // check that truncation works correctly { - Series write(filename, Access::CREATE); + Series write( + filename, + Access::CREATE, + R"({"toml":{"dataset":{"mode":"dataset"}}})"); // create a new iteration auto E_x = write.iterations[2].meshes["E"]["x"]; E_x.resetDataset(ds);