diff --git a/include/gul17/DataTree.h b/include/gul17/DataTree.h
new file mode 100644
index 0000000..bb9a27b
--- /dev/null
+++ b/include/gul17/DataTree.h
@@ -0,0 +1,657 @@
+/**
+ * \file DataTree.h
+ * \author Jan Behrens
+ * \date Created on 19 November 2025
+ * \brief Declaration of the DataTree class.
+ *
+ * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 2.1 of the license, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef GUL17_DATA_TREE_H_
+#define GUL17_DATA_TREE_H_
+
+#include
+#include
+#include
+#include
+#include
+
+namespace gul17 {
+
+/**
+ * \addtogroup DataTree_h gul17/DataTree.h
+ * \brief A hierarchical data structure for representing various data types.
+ * @{
+ */
+
+/**
+ * A hierarchical data structure that can represent various data types including
+ * null, boolean, number, string, array, and object.
+ *
+ * The DataTree class can be used to create, manipulate, and access data in a
+ * tree-like structure. It supports dynamic typing and can hold different types
+ * of data at each node.
+ * It can be used to represent data formats such as JSON, YAML, or XML.
+ *
+ * \code
+ * // Create a data tree object
+ * DataTree tree;
+ *
+ * tree["foo"] = "bar"; // String
+ * tree["answer"] = 42; // Number
+ * tree["is_valid"] = true; // Boolean
+ * tree["items"] = DataTree::Array{1, 2, 3}; // Array
+ * tree["config"] = DataTree::Object{ {"key1", "value1"}, {"key2", 42} }; // Object
+ *
+ * tree["items"].push_back(4); // Add an element to the array
+ * tree["config"]["key3"] = 3.14; // Add a key-value pair to the object
+ * \endcode
+ *
+ * \since GUL version x.y.z
+ */
+class DataTree
+{
+public:
+ // Type definitions
+
+ /// Type of an object (key-value pairs)
+ using Object = std::map;
+ /// Type of an array (list of values)
+ using Array = std::vector;
+ /// Underlying variant type to hold different data types
+ using Value = std::variant<
+ std::nullptr_t, // null
+ bool, // boolean
+ int, // integer
+ double, // float
+ std::string, // string
+ Array, // array
+ Object // object
+ >;
+
+ // Constructors
+
+ /**
+ * Create an empty DataTree object (default to an empty object).
+ */
+ DataTree() : value_(Object()) {} // default to an empty object
+
+ /**
+ * Create a DataTree object holding a null value.
+ */
+ DataTree(std::nullptr_t) : value_(nullptr) {}
+
+ /**
+ * Create a DataTree object holding a boolean value.
+ */
+ DataTree(bool b) : value_(b) {}
+
+ /**
+ * Create a DataTree object holding an integer value.
+ */
+ DataTree(int i) : value_(i) {}
+
+ /**
+ * Create a DataTree object holding a floating-point value.
+ */
+ DataTree(double d) : value_(d) {}
+
+ /**
+ * Create a DataTree object holding a string value.
+ */
+ DataTree(const std::string& s) : value_(s) {}
+
+ /**
+ * Create a DataTree object holding a C-style string value.
+ */
+ DataTree(const char* s) : value_(std::string(s)) {}
+
+ /**
+ * Create a DataTree object holding an array value.
+ */
+ DataTree(const Array& a) : value_(a) {}
+
+ /**
+ * Create a DataTree object holding an object value.
+ */
+ DataTree(const Object& o) : value_(o) {}
+
+ // Factory methods
+
+ /**
+ * Create a DataTree object representing a null value.
+ */
+ static DataTree make_null() { return DataTree(nullptr); }
+
+ /**
+ * Create a DataTree object representing an empty array.
+ */
+ static DataTree make_array() { return DataTree(Array{}); }
+
+ /**
+ * Create a DataTree object representing an empty object.
+ */
+ static DataTree make_object() { return DataTree(Object{}); }
+
+ // Move constructors
+
+ /**
+ * Create a DataTree object by moving an array into it.
+ */
+ DataTree(Array&& a) : value_(std::move(a)) {}
+
+ /**
+ * Create a DataTree object by moving an object into it.
+ */
+ DataTree(Object&& o) : value_(std::move(o)) {}
+
+ /**
+ * Create a DataTree object by moving a string into it.
+ */
+ DataTree(std::string&& s) : value_(std::move(s)) {}
+
+ /**
+ * Move another DataTree object into this one.
+ */
+ DataTree(DataTree&& other) = default;
+
+ // Copy constructor
+
+ /**
+ * Create a copy of another DataTree object.
+ */
+ DataTree(const DataTree& other) = default;
+
+ // Assignment operators
+
+ /**
+ * Assign another DataTree object to this one.
+ */
+ DataTree& operator=(const DataTree& other) = default;
+
+ /**
+ * Move-assign another DataTree object to this one.
+ */
+ DataTree& operator=(DataTree&& other) = default;
+
+ // Type checking
+
+ /**
+ * Check if the DataTree holds a null value.
+ * \return True if the DataTree is null, false otherwise.
+ */
+ bool is_null() const { return std::holds_alternative(value_); }
+
+ /**
+ * Check if the DataTree holds a boolean value.
+ * \return True if the DataTree is boolean, false otherwise.
+ */
+ bool is_boolean() const { return std::holds_alternative(value_); }
+
+ /**
+ * Check if the DataTree holds an integer value.
+ * \return True if the DataTree is integer, false otherwise.
+ */
+ bool is_int() const { return std::holds_alternative(value_); }
+
+ /**
+ * Check if the DataTree holds a floating-point value.
+ * \return True if the DataTree is floating-point, false otherwise.
+ */
+ bool is_double() const { return std::holds_alternative(value_); }
+
+ /**
+ * Check if the DataTree holds a numeric value.
+ * \return True if the DataTree is numeric (integer or floating-point), false otherwise.
+ */
+ bool is_number() const { return is_int() || is_double(); }
+
+ /**
+ * Check if the DataTree holds a string value.
+ * \return True if the DataTree is string, false otherwise.
+ */
+ bool is_string() const { return std::holds_alternative(value_); }
+
+ /**
+ * Check if the DataTree holds an array value.
+ * \return True if the DataTree is array, false otherwise.
+ */
+ bool is_array() const { return std::holds_alternative(value_); }
+
+ /**
+ * Check if the DataTree holds an object value.
+ * \return True if the DataTree is object, false otherwise.
+ */
+ bool is_object() const { return std::holds_alternative(value_); }
+
+ /**
+ * Check if the DataTree is empty.
+ * A DataTree is considered empty if it is null, or if it is a string, array,
+ * or object that contains no elements.
+ *
+ * \return True if the DataTree is empty, false otherwise.
+ */
+ bool is_empty() const
+ {
+ if (is_null()) return true;
+ if (is_string()) return std::get(value_).empty();
+ if (is_array()) return std::get(value_).empty();
+ if (is_object()) return std::get(value_).empty();
+ return false;
+ }
+
+ /**
+ * Check if the object contains the specified key.
+ * Throws a std::runtime_error if the DataTree is not an object.
+ * \param key The key to check for.
+ * \return True if the key exists, false otherwise.
+ */
+ bool has_key(const std::string& key) const
+ {
+ if (!is_object())
+ throw std::runtime_error("DataTree is not an object");
+ const auto& obj = std::get(value_);
+ return obj.find(key) != obj.end();
+ }
+
+ /**
+ * Get the size of the array or object.
+ * Throws a std::runtime_error if the DataTree is neither an array nor an object.
+ * \return The number of elements in the array or object.
+ */
+ size_t size() const
+ {
+ if (is_array())
+ return std::get(value_).size();
+ else if (is_object())
+ return std::get(value_).size();
+ else
+ throw std::runtime_error("DataTree is neither array nor object");
+ }
+
+ /**
+ * Add an element to the end of the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * \param val The value to add.
+ */
+ void push_back(const DataTree& val)
+ {
+ if (!is_array())
+ throw std::runtime_error("DataTree is not an array");
+ std::get(value_).push_back(val);
+ }
+
+ /**
+ * Add an element to the end of the array by moving it.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * \param val The value to add.
+ */
+ void emplace_back(DataTree&& val)
+ {
+ if (!is_array())
+ throw std::runtime_error("DataTree is not an array");
+ std::get(value_).emplace_back(std::move(val));
+ }
+
+ /**
+ * Insert a key-value pair into the object.
+ * Throws a std::runtime_error if the DataTree is not an object.
+ * \param key The key to insert.
+ * \param val The value to insert.
+ */
+ void insert(const std::string& key, const DataTree& val)
+ {
+ if (!is_object())
+ throw std::runtime_error("DataTree is not an object");
+ std::get(value_)[key] = val;
+ }
+
+ /**
+ * Insert a value into the array at the specified index.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * Throws a std::out_of_range if the index is out of bounds.
+ * \param index The index at which to insert the value.
+ * \param val The value to insert.
+ */
+ void insert(size_t index, const DataTree& val)
+ {
+ if (!is_array())
+ throw std::runtime_error("DataTree is not an array");
+ auto& arr = std::get(value_);
+ if (index > arr.size())
+ throw std::out_of_range("Index out of range: " + std::to_string(index));
+ arr.insert(arr.begin() + index, val);
+ }
+
+ /**
+ * Clear all elements from the array or object.
+ * Throws a std::runtime_error if the DataTree is neither an array nor an object.
+ */
+ void clear()
+ {
+ if (is_array())
+ std::get(value_).clear();
+ else if (is_object())
+ std::get(value_).clear();
+ else
+ throw std::runtime_error("DataTree is neither array nor object");
+ }
+
+ // Iterator return types, only works for arrays
+ using iterator = std::vector::iterator;
+ using const_iterator = const std::vector::iterator;
+
+ // Iterators
+
+ /**
+ * Return an iterator to the beginning of the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * \return An iterator to the first element.
+ */
+ iterator begin()
+ {
+ if (!is_array())
+ throw std::runtime_error("DataTree is not an array");
+ auto& arr = std::get(value_);
+ return arr.begin();
+ }
+
+ /**
+ * Return an iterator to the end of the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * \return An iterator to the last element.
+ */
+ iterator end()
+ {
+ if (!is_array())
+ throw std::runtime_error("DataTree is not an array");
+ auto& arr = std::get(value_);
+ return arr.end();
+ }
+
+ /**
+ * Return a const iterator to the beginning of the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * \return A const iterator to the first element.
+ */
+ const_iterator cbegin() const { return const_cast(this)->begin(); }
+
+ /**
+ * Return a const iterator to the end of the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * \return A const iterator to the last element.
+ */
+ const_iterator cend() const { return const_cast(this)->end(); }
+
+ /**
+ * Return a const iterator to the beginning of the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * \return A const iterator to the first element.
+ */
+ const_iterator begin() const { return cbegin(); }
+
+ /**
+ * Return a const iterator to the end of the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * \return A const iterator to the last element.
+ */
+ const_iterator end() const { return cend(); }
+
+ // Accessors with bounds checking
+
+ /**
+ * Get a reference to the value associated with the specified key in the object,
+ * or the value at the specified index in the array.
+ * Throws a std::runtime_error if the DataTree is not an object/array.
+ * Throws a std::out_of_range if the key/index does not exist.
+ * \param key The key to look up in the object.
+ * \param index The index to look up in the array.
+ * \return A reference to the corresponding DataTree value.
+ */
+ DataTree& at(const std::string& key)
+ {
+ if (!is_object())
+ throw std::runtime_error("DataTree is not an object");
+ const auto& obj = std::get(value_);
+ auto it = obj.find(key);
+ if (it == obj.end())
+ throw std::out_of_range("Key not found in object: " + key);
+ return const_cast(it->second);
+ }
+
+ /**
+ * Get a const reference to the value associated with the specified key in the object.
+ * Throws a std::runtime_error if the DataTree is not an object.
+ * Throws a std::out_of_range if the key does not exist.
+ * \param key The key to look up in the object.
+ * \return A const reference to the corresponding DataTree value.
+ */
+ const DataTree& at(const std::string& key) const
+ {
+ return const_cast(this)->at(key);
+ }
+
+ /**
+ * Get a reference to the value at the specified index in the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * Throws a std::out_of_range if the index is out of bounds.
+ * \param index The index to look up in the array.
+ * \return A reference to the corresponding DataTree value.
+ */
+ DataTree& at(size_t index)
+ {
+ if (!is_array())
+ throw std::runtime_error("DataTree is not an array");
+ const auto& arr = std::get(value_);
+ if (index >= arr.size())
+ throw std::out_of_range("Index out of range: " + std::to_string(index));
+ return const_cast(arr[index]);
+ }
+
+ /**
+ * Get a const reference to the value at the specified index in the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * Throws a std::out_of_range if the index is out of bounds.
+ * \param index The index to look up in the array.
+ * \return A const reference to the corresponding DataTree value.
+ */
+ const DataTree& at(size_t index) const
+ {
+ return const_cast(this)->at(index);
+ }
+
+ // Operator[] without bounds checking
+
+ /**
+ * Get a reference to the value associated with the specified key in the object,
+ * or the value at the specified index in the array.
+ * Throws a std::runtime_error if the DataTree is not an object/array.
+ * \param key The key to look up in the object.
+ * \param index The index to look up in the array.
+ * \return A reference to the corresponding DataTree value.
+ */
+ DataTree& operator[](const std::string& key)
+ {
+ if (!is_object())
+ throw std::runtime_error("DataTree is not an object");
+ return std::get(value_)[key];
+ }
+
+ /**
+ * Get a const reference to the value associated with the specified key in the object,
+ * or the value at the specified index in the array.
+ * Throws a std::runtime_error if the DataTree is not an object/array.
+ * \param key The key to look up in the object.
+ * \param index The index to look up in the array.
+ * \return A const reference to the corresponding DataTree value.
+ */
+ const DataTree& operator[](const std::string& key) const
+ {
+ return (*const_cast(this))[key];
+ }
+
+ /**
+ * Get a reference to the value at the specified index in the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * \param index The index to look up in the array.
+ * \return A reference to the corresponding DataTree value.
+ */
+ DataTree& operator[](size_t index)
+ {
+ if (!is_array())
+ throw std::runtime_error("DataTree is not an array");
+ return std::get(value_)[index];
+ }
+
+ /**
+ * Get a const reference to the value at the specified index in the array.
+ * Throws a std::runtime_error if the DataTree is not an array.
+ * \param index The index to look up in the array.
+ * \return A reference to the corresponding DataTree value.
+ */
+ const DataTree& operator[](size_t index) const
+ {
+ return (*const_cast(this))[index];
+ }
+
+ // Type checking for template types
+
+ /**
+ * Check if the DataTree holds a value of type T.
+ * \tparam T The type to check against.
+ * \return True if the DataTree holds a value of type T, false otherwise.
+ */
+ template
+ bool is() const
+ {
+ if constexpr (std::is_same_v)
+ {
+ return is_null();
+ }
+ else if constexpr (std::is_same_v)
+ {
+ return is_boolean();
+ }
+ else if constexpr (std::is_same_v)
+ {
+ return is_int();
+ }
+ else if constexpr (std::is_same_v)
+ {
+ return is_double();
+ }
+ else if constexpr (std::is_same_v)
+ {
+ return is_string();
+ }
+ else if constexpr (std::is_same_v)
+ {
+ return is_array();
+ }
+ else if constexpr (std::is_same_v)
+ {
+ return is_object();
+ }
+ return false;
+ }
+
+ // Conversion
+
+ /**
+ * Convert the DataTree to the specified type T.
+ * Throws a std::bad_variant_access if the conversion is not possible.
+ * \tparam T The type to convert to.
+ * \return The converted value.
+ *
+ * \code
+ * DataTree tree = 42;
+ * int value = tree.as(); // value == 42
+ * \endcode
+ *
+ * Note that implicit conversions are supported for some types:
+ * - int to double
+ * - boolean to int
+ * - int/double/boolean/null to string
+ *
+ * It is not possible to convert complex types (array/object) to primitive types.
+ */
+ template
+ T as() const
+ {
+ if constexpr (std::is_same_v)
+ {
+ if (is_null()) return std::get(value_);
+ }
+ else if constexpr (std::is_same_v)
+ {
+ if (is_boolean()) return std::get(value_);
+ if (is_int()) return std::get(value_) != 0;
+ }
+ else if constexpr (std::is_same_v)
+ {
+ if (is_int()) return std::get(value_);
+ if (is_boolean()) return static_cast(std::get(value_));
+ }
+ else if constexpr (std::is_same_v)
+ {
+ if (is_double()) return std::get(value_);
+ if (is_int()) return static_cast(std::get(value_));
+ }
+ else if constexpr (std::is_same_v)
+ {
+ if (is_string()) return std::get(value_);
+ if (is_int()) return std::to_string(std::get(value_));
+ if (is_double()) return std::to_string(std::get(value_));
+ if (is_boolean()) return std::get(value_) ? "true" : "false";
+ if (is_null()) return "null";
+ }
+ else if constexpr (std::is_same_v)
+ {
+ if (is_array()) return std::get(value_);
+ }
+ else if constexpr (std::is_same_v)
+ {
+ if (is_object()) return std::get(value_);
+ }
+
+ throw std::bad_variant_access();
+ }
+
+ // Get underlying value
+
+ /**
+ * Get a reference to the underlying value variant.
+ * \return A reference to the underlying Value variant.
+ */
+ Value& get_value() { return value_; }
+
+ /**
+ * Get a const reference to the underlying value variant.
+ * \return A const reference to the underlying Value variant.
+ */
+ const Value& get_value() const { return value_; }
+
+private:
+ Value value_;
+};
+
+/// @}
+
+} // namespace gul17
+
+#endif // GUL17_DATA_TREE_H_
+
+// vi:ts=4:sw=4:sts=4:et
diff --git a/include/gul17/data_processors.h b/include/gul17/data_processors.h
new file mode 100644
index 0000000..63268c7
--- /dev/null
+++ b/include/gul17/data_processors.h
@@ -0,0 +1,167 @@
+/**
+ * \file data_processors.h
+ * \author Jan Behrens
+ * \date Created on 20 November 2025
+ * \brief Declaration of the data processor utility functions.
+ *
+ * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 2.1 of the license, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef GUL17_DATA_PROCESSORS_H_
+#define GUL17_DATA_PROCESSORS_H_
+
+#include "DataTree.h"
+
+#include "gul17/internal.h"
+
+#include
+
+namespace gul17 {
+
+/**
+ * \addtogroup data_processors_h gul17/data_processors.h
+ * \brief Various data processor utility functions.
+ * @{
+ */
+
+/**
+ * Parse a JSON string and return the corresponding DataTree representation.
+ *
+ * The function parses the input JSON string and constructs a DataTree object
+ * representing the hierarchical structure and data contained in the JSON.
+ * Throws a std::runtime_error if the input string is not valid JSON.
+ *
+ * \code
+ * auto a = from_json_string(R"({"foo": "bar"})"); // a == DataTree{"foo": "bar"}
+ * \endcode
+ *
+ * \param data The JSON string to be parsed.
+ *
+ * \see to_json_string()
+ *
+ * \since GUL version x.y.z
+ */
+GUL_EXPORT
+DataTree from_json_string(const std::string_view& data);
+
+/**
+ * Serialize a DataTree object to a JSON string.
+ *
+ * The function serializes the given DataTree object into a JSON-formatted string.
+ * The optional \c indent parameter specifies the number of spaces to use for
+ * indentation in the output string (default is 0, meaning no pretty-printing).
+ *
+ * \code
+ * auto a = to_json_string(DataTree{"foo": "bar"}); // a == "{\"foo\": \"bar\"}"
+ * \endcode
+ *
+ * \param value The DataTree object to be serialized.
+ *
+ * \see from_json_string()
+ *
+ * \since GUL version x.y.z
+ */
+GUL_EXPORT
+std::string to_json_string(const DataTree& value, size_t indent = 0);
+
+/**
+ * Parse an XML string and return the corresponding DataTree representation.
+ *
+ * The function parses the input XML string and constructs a DataTree object
+ * representing the hierarchical structure and data contained in the XML.
+ * Throws a std::runtime_error if the input string is not valid XML.
+ *
+ * \code
+ * auto a = from_xml_string(R"(bar )"); // a == DataTree{"foo": "bar"}
+ * \endcode
+ *
+ * \param data The XML string to be parsed.
+ *
+ * \see to_xml_string()
+ *
+ * \since GUL version x.y.z
+ */
+GUL_EXPORT
+DataTree from_xml_string(const std::string_view& data);
+
+/**
+ * Serialize a DataTree object to an XML string.
+ *
+ * The function serializes the given DataTree object into an XML-formatted string.
+ * The optional \c indent parameter specifies the number of spaces to use for
+ * indentation in the output string (default is 0, meaning no pretty-printing).
+ *
+ * \code
+ * auto a = to_xml_string(DataTree{"foo": "bar"}); // a == "bar "
+ * \endcode
+ *
+ * \param value The DataTree object to be serialized.
+ *
+ * \see from_xml_string()
+ *
+ * \since GUL version x.y.z
+ */
+GUL_EXPORT
+std::string to_xml_string(const DataTree& value, size_t indent = 0,
+ const std::string& root_tag_name = "root");
+
+/**
+ * Parse a YAML string and return the corresponding DataTree representation.
+ *
+ * The function parses the input YAML string and constructs a DataTree object
+ * representing the hierarchical structure and data contained in the YAML.
+ * Throws a std::runtime_error if the input string is not valid YAML.
+ *
+ * \code
+ * auto a = from_yaml_string(R"(foo: bar)"); // a == DataTree{"foo": "bar"}
+ * \endcode
+ *
+ * \param data The YAML string to be parsed.
+ *
+ * \see to_yaml_string()
+ *
+ * \since GUL version x.y.z
+ */
+GUL_EXPORT
+DataTree from_yaml_string(const std::string_view& data);
+
+/**
+ * Serialize a DataTree object to a YAML string.
+ *
+ * The function serializes the given DataTree object into a YAML-formatted string.
+ * The optional \c indent parameter specifies the number of spaces to use for
+ * indentation in the output string (default is 2).
+ *
+ * \code
+ * auto a = to_yaml_string(DataTree{"foo": "bar"}); // a == "foo: bar\n"
+ * \endcode
+ *
+ * \param value The DataTree object to be serialized.
+ *
+ * \see from_yaml_string()
+ *
+ * \since GUL version x.y.z
+ */
+GUL_EXPORT
+std::string to_yaml_string(const DataTree& value, size_t indent = 2);
+
+/// @}
+
+} // namespace gul17
+
+#endif // GUL17_DATA_PROCESSORS_H_
+
+// vi:ts=4:sw=4:sts=4:et
diff --git a/include/gul17/gul.h b/include/gul17/gul.h
index e8e2b58..19f90eb 100644
--- a/include/gul17/gul.h
+++ b/include/gul17/gul.h
@@ -41,6 +41,7 @@
#include "gul17/cat.h"
// #include "gul17/catch.h" not included because it is only useful for unit tests
// #include "gul17/date.h" not included by default to reduce compile times
+#include "gul17/data_processors.h"
#include "gul17/escape.h"
#include "gul17/expected.h"
#include "gul17/finalizer.h"
diff --git a/include/gul17/meson.build b/include/gul17/meson.build
index db7f45e..10f4594 100644
--- a/include/gul17/meson.build
+++ b/include/gul17/meson.build
@@ -4,6 +4,8 @@ standalone_headers = [
'case_ascii.h',
'cat.h',
'date.h',
+ 'data_processors.h',
+ 'DataTree.h',
'escape.h',
'expected.h',
'finalizer.h',
diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc
new file mode 100644
index 0000000..fb02b1b
--- /dev/null
+++ b/src/data_processors/json_processor.cc
@@ -0,0 +1,514 @@
+/**
+ * \file json_processor.cc
+ * \author Jan Behrens
+ * \date Created on 20 November 2025
+ * \brief Implementation of the JSON data processor functions.
+ *
+ * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 2.1 of the license, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ */
+
+#include "gul17/data_processors.h"
+#include "gul17/cat.h"
+
+#include
+#include
+
+using gul17::DataTree;
+
+struct JsonDataParser
+{
+ JsonDataParser(const std::string_view& json_str)
+ : data_(json_str)
+ {}
+
+ DataTree parse()
+ {
+ return parse_value();
+ }
+
+private:
+ DataTree parse_value()
+ {
+ skip_comment();
+ skip_whitespace();
+ char c = current_char();
+
+ switch (c)
+ {
+ case '{': return parse_object();
+ case '[': return parse_array();
+ case '"': return parse_string();
+ case 't': case 'f': return parse_boolean();
+ case 'n': return parse_null();
+ case '-':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return parse_number();
+
+ default:
+ throw std::runtime_error(gul17::cat("Unexpected character: ", c, " at position ", pos_));
+ }
+ }
+
+ DataTree parse_object()
+ {
+ expect('{');
+ DataTree::Object obj;
+
+ skip_comment();
+ skip_whitespace();
+ if (current_char() == '}')
+ {
+ advance();
+ return DataTree(obj);
+ }
+
+ while (true)
+ {
+ skip_whitespace();
+ std::string key = parse_string().as();
+
+ skip_whitespace();
+ expect(':');
+
+ DataTree value = parse_value();
+ obj.emplace(std::move(key), std::move(value));
+
+ skip_whitespace();
+ if (current_char() == '}')
+ {
+ advance();
+ break;
+ }
+ expect(',');
+ }
+
+ return DataTree(obj);
+ }
+
+ DataTree parse_array()
+ {
+ expect('[');
+ DataTree::Array arr;
+
+ skip_comment();
+ skip_whitespace();
+ if (current_char() == ']')
+ {
+ advance();
+ return DataTree(arr);
+ }
+
+ while (true)
+ {
+ arr.push_back(parse_value());
+
+ skip_whitespace();
+ if (current_char() == ']')
+ {
+ advance();
+ break;
+ }
+ expect(',');
+ }
+
+ return DataTree(arr);
+ }
+
+ DataTree parse_string()
+ {
+ expect('"');
+ std::string result;
+
+ while (true)
+ {
+ char c = current_char();
+ if (c == '"')
+ {
+ advance();
+ break;
+ }
+ else if (c == '\\')
+ {
+ advance();
+ char esc = current_char();
+ switch (esc)
+ {
+ case '"': result += '"'; break;
+ case '\\': result += '\\'; break;
+ case '/': result += '/'; break;
+ case 'b': result += '\b'; break;
+ case 'f': result += '\f'; break;
+ case 'n': result += '\n'; break;
+ case 'r': result += '\r'; break;
+ case 't': result += '\t'; break;
+
+ case 'u':
+ // Unicode escape sequence (e.g., \uXXXX)
+ if (pos_ + 5 <= data_.length())
+ {
+ auto num = data_.substr(pos_ + 1, 4);
+ unsigned int ch;
+ try
+ {
+ ch = std::stoi(std::string(num), nullptr, 16);
+ }
+ catch (...)
+ {
+ throw std::runtime_error(gul17::cat("Invalid number format in Unicode escape at position ", pos_));
+ }
+
+ if (ch < 0x80)
+ {
+ result += static_cast(ch);
+ }
+ else if (ch < 0x800)
+ {
+ result += static_cast(0xC0 | (ch >> 6));
+ result += static_cast(0x80 | (ch & 0x3F));
+ }
+ else if (ch < 0x10000)
+ {
+ result += static_cast(0xE0 | (ch >> 12));
+ result += static_cast(0x80 | ((ch >> 6) & 0x3F));
+ result += static_cast(0x80 | (ch & 0x3F));
+ }
+ else
+ {
+ // Note: JSON \uXXXX escapes only support BMP (<= 0xFFFF).
+ throw std::runtime_error(gul17::cat("Invalid Unicode code point at position ", pos_));
+ }
+ pos_ += 4;
+ }
+ break;
+
+ case 'U':
+ throw std::runtime_error(gul17::cat("Unicode escape sequence (\\UXXXXXXXX) not supported at position ", pos_));
+ default:
+ throw std::runtime_error(gul17::cat("Invalid escape sequence: ", esc, " at position ", pos_));
+ }
+ advance();
+ }
+ else
+ {
+ result += c;
+ advance();
+ }
+ }
+
+ return DataTree(result);
+ }
+
+ DataTree parse_boolean()
+ {
+ if (data_.compare(pos_, 4, "true") == 0)
+ {
+ pos_ += 4;
+ return DataTree(true);
+ }
+ else if (data_.compare(pos_, 5, "false") == 0)
+ {
+ pos_ += 5;
+ return DataTree(false);
+ }
+ else
+ {
+ throw std::runtime_error(gul17::cat("Invalid boolean value at position ", pos_));
+ }
+ }
+
+ DataTree parse_null()
+ {
+ if (data_.compare(pos_, 4, "null") == 0)
+ {
+ pos_ += 4;
+ return DataTree(nullptr);
+ }
+ else
+ {
+ throw std::runtime_error(gul17::cat("Invalid null value at position ", pos_));
+ }
+ }
+
+ DataTree parse_number()
+ {
+ auto start_pos = pos_;
+ if (current_char() == '-')
+ {
+ advance();
+ }
+
+ while (std::isdigit(current_char()))
+ {
+ advance();
+ }
+
+ if (current_char() == '.')
+ {
+ advance();
+ while (std::isdigit(current_char()))
+ {
+ advance();
+ }
+ try
+ {
+ double value = std::stod(std::string(data_.substr(start_pos, pos_ - start_pos)));
+ return DataTree(value);
+ }
+ catch (...)
+ {
+ throw std::runtime_error(gul17::cat("Invalid number format at position ", start_pos));
+ }
+ }
+ else
+ {
+ try
+ {
+ int value = std::stoi(std::string(data_.substr(start_pos, pos_ - start_pos)));
+ return DataTree(value);
+ }
+ catch (...)
+ {
+ throw std::runtime_error(gul17::cat("Invalid number format at position ", start_pos));
+ }
+ }
+ }
+
+ void skip_whitespace()
+ {
+ while (pos_ < data_.size() && std::isspace(data_[pos_]))
+ {
+ advance();
+ }
+ }
+
+ void skip_comment()
+ {
+ skip_whitespace();
+
+ if (current_char() == '/')
+ {
+ // Skip comments
+ if (next_char() == '/')
+ {
+ // Single-line comment
+ while (has_remaining_chars() && current_char() != '\n')
+ {
+ advance();
+ }
+ }
+ else if (next_char() == '*')
+ {
+ // Multi-line comment
+ advance(2);
+ while (has_remaining_chars())
+ {
+ if (current_char() == '*' && next_char() == '/')
+ {
+ advance(2);
+ break;
+ }
+ advance();
+ }
+ }
+ else
+ {
+ throw std::runtime_error(gul17::cat("Invalid comment syntax at position ", pos_));
+ }
+ }
+ }
+
+ char current_char() const
+ {
+ return pos_ < data_.size() ? data_[pos_] : '\0';
+ }
+
+ char next_char() const
+ {
+ return pos_ + 1 < data_.size() ? data_[pos_ + 1] : '\0';
+ }
+
+ bool has_remaining_chars() const
+ {
+ return pos_ < data_.size();
+ }
+
+ void advance(size_t n = 1)
+ {
+ pos_ += n;
+ }
+
+ void expect(char expected)
+ {
+ if (current_char() != expected)
+ {
+ throw std::runtime_error(gul17::cat("Expected character not found: ", expected, " at position ", pos_));
+ }
+ advance();
+ }
+
+private:
+ std::string_view data_;
+ size_t pos_{0};
+};
+
+struct JsonDataSerializer
+{
+ JsonDataSerializer(const DataTree& tree_root)
+ : tree_root_(tree_root)
+ {}
+
+ std::string serialize(size_t indent)
+ {
+ serialize_value(tree_root_, indent);
+ return output_.str();
+ }
+
+private:
+ void serialize_value(const DataTree& value, size_t indent, size_t current_indent = 0)
+ {
+ if (value.is_null())
+ {
+ output_ << "null";
+ }
+ else if (value.is_boolean())
+ {
+ output_ << (value.as() ? "true" : "false");
+ }
+ else if (value.is_int())
+ {
+ output_ << std::to_string(value.as());
+ }
+ else if (value.is_double())
+ {
+ output_ << std::to_string(value.as());
+ }
+ else if (value.is_string())
+ {
+ output_ << "\"" << escape_string(value.as()) << "\"";
+ }
+ else if (value.is_array())
+ {
+ serialize_array(value.as(), indent, current_indent);
+ }
+ else if (value.is_object())
+ {
+ serialize_object(value.as(), indent, current_indent);
+ }
+ }
+
+ void serialize_array(const DataTree::Array& arr, size_t indent, size_t current_indent)
+ {
+ std::string newline = indent > 0 ? "\n" : ""; // Add newlines if indenting
+
+ output_ << "[";
+ if (!arr.empty())
+ {
+ output_ << newline;
+ for (size_t i = 0; i < arr.size(); ++i)
+ {
+ output_ << std::string(current_indent + indent, ' ');
+ serialize_value(arr[i], indent, current_indent + indent);
+
+ if (i < arr.size() - 1)
+ output_ << ",";
+ output_ << newline;
+ }
+ output_ << std::string(current_indent, ' ');
+ }
+ output_ << "]";
+ }
+
+ void serialize_object(const DataTree::Object& obj, size_t indent, size_t current_indent)
+ {
+ std::string newline = indent > 0 ? "\n" : ""; // Add newlines if indenting
+
+ output_ << "{";
+ if (!obj.empty())
+ {
+ output_ << newline;
+ size_t i = 0;
+ for (const auto & [key, val] : obj)
+ {
+ output_ << std::string(current_indent + indent, ' ');
+ output_ << "\"" << escape_string(key) << "\": ";
+ serialize_value(val, indent, current_indent + indent);
+
+ if (i++ < obj.size() - 1)
+ output_ << ",";
+ output_ << newline;
+ }
+ output_ << std::string(current_indent, ' ');
+ }
+ output_ << "}";
+ }
+
+ static std::string escape_string(const std::string& str)
+ {
+ std::string result;
+ result.reserve(str.size() + 2); // Reserve space for efficiency
+ for (char c : str)
+ {
+ switch (c)
+ {
+ case '"': result += "\\\""; break;
+ case '\\': result += "\\\\"; break;
+ case '\b': result += "\\b"; break;
+ case '\f': result += "\\f"; break;
+ case '\n': result += "\\n"; break;
+ case '\r': result += "\\r"; break;
+ case '\t': result += "\\t"; break;
+
+ default:
+ // escape control characters
+ if (static_cast(c) < 0x20)
+ {
+ char buf[12];
+ snprintf(buf, sizeof(buf), "\\u%04x", static_cast(c));
+ result += buf;
+ }
+ else
+ {
+ result += c;
+ }
+ }
+ }
+
+ return result;
+ }
+
+private:
+ const DataTree& tree_root_;
+ std::ostringstream output_;
+};
+
+namespace gul17 {
+
+DataTree from_json_string(const std::string_view& data)
+{
+ JsonDataParser parser(data);
+ return parser.parse();
+}
+
+std::string to_json_string(const DataTree& value, size_t indent)
+{
+ JsonDataSerializer serializer(value);
+ return serializer.serialize(indent);
+}
+
+} // namespace gul17
+
+// vi:ts=4:sw=4:sts=4:et
diff --git a/src/data_processors/xml_processor.cc b/src/data_processors/xml_processor.cc
new file mode 100644
index 0000000..9f006bd
--- /dev/null
+++ b/src/data_processors/xml_processor.cc
@@ -0,0 +1,567 @@
+/**
+ * \file xml_processor.cc
+ * \author Jan Behrens
+ * \date Created on 20 November 2025
+ * \brief Implementation of the XML data processor functions.
+ *
+ * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 2.1 of the license, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ */
+
+#include "gul17/data_processors.h"
+#include "gul17/cat.h"
+
+#include
+#include
+#include
+#include
+
+using gul17::DataTree;
+
+struct XmlDataParser
+{
+ XmlDataParser(const std::string_view& xml_str)
+ : data_(xml_str)
+ {}
+
+ DataTree parse()
+ {
+ return parse_xml_element().second;
+ }
+
+private:
+ using KeyValuePair = std::pair;
+ using AttributesList = std::vector;
+ using ChildrenList = std::vector;
+
+ KeyValuePair parse_xml_element()
+ {
+ // Parse content
+ DataTree result;
+ std::vector text_content;
+
+ skip_whitespace();
+ expect('<');
+
+ strip_comment();
+
+ // Parse tag name
+ auto tag_name = std::string(parse_tag_name());
+ if (root_name_.empty())
+ root_name_ = tag_name;
+
+ // Parse attributes
+ AttributesList attributes;
+
+ while (has_remaining_chars() && current_char() != '>' && current_char() != '/')
+ {
+ skip_whitespace();
+
+ // Parse attribute name
+ auto attr_name = parse_attribute_name();
+ if (attr_name.empty())
+ {
+ throw std::runtime_error(gul17::cat("Malformed XML: attribute name cannot be empty at position ", pos_));
+ }
+
+ skip_whitespace();
+ expect('=');
+ skip_whitespace();
+
+ // Parse attribute value (assuming it's quoted)
+ auto attr_value = parse_attribute_value();
+
+ if (attr_value.empty())
+ {
+ attributes.emplace_back(attr_name, DataTree(nullptr));
+ }
+ else
+ {
+ attributes.emplace_back(attr_name, convert_string_to_value(attr_value));
+ }
+ }
+
+ // Parse children or text content
+ ChildrenList children;
+
+ if (current_char() == '/')
+ {
+ // Self-closing tag
+ advance();
+ expect('>');
+ }
+ else
+ {
+ expect('>');
+
+ // Check for nested elements vs text content
+ while (has_remaining_chars() && !(current_char() == '<' && next_char() == '/'))
+ {
+ strip_comment();
+ if (current_char() == '<')
+ {
+ // Nested element
+ children.push_back(parse_xml_element());
+ }
+ else
+ {
+ // Text content
+ text_content.push_back(parse_text_content());
+ }
+ skip_whitespace();
+ }
+
+ // Parse closing tag
+ expect('<');
+ expect('/');
+ auto closing_tag = parse_tag_name();
+ expect('>');
+
+ if (closing_tag != tag_name)
+ {
+ throw std::runtime_error(gul17::cat("Mismatched tags: ", tag_name, " vs ", closing_tag, " at position ", pos_));
+ }
+ }
+
+ // Determine how to represent this element - as object, array or simple value
+ if (!attributes.empty() || !children.empty())
+ {
+ // Handle arrays for multiple same-tag children / attributes
+ std::map obj;
+ std::unordered_map> child_groups;
+
+ for (const auto& [child_tag, child_value] : children)
+ {
+ // For simplicity, assume each child is an object with its tag name
+ child_groups[child_tag].push_back(child_value);
+ }
+
+ // Add grouped children to object
+ for (auto& [child_tag, values] : child_groups)
+ {
+ if (values.size() == 1)
+ {
+ obj[child_tag] = values[0];
+ }
+ else
+ {
+ obj[child_tag] = DataTree(values);
+ }
+ }
+
+ // Add attributes
+ for (const auto& [attr_name, attr_value] : attributes)
+ {
+ auto key = "@" + attr_name;
+ if (obj.find(key) != obj.end())
+ {
+ throw std::runtime_error(gul17::cat("Duplicate attribute name: ", attr_name, " at position ", pos_));
+ }
+ obj[key] = attr_value;
+ }
+
+ // Add text content if any
+ if (!text_content.empty())
+ {
+ if (text_content.size() == 1)
+ {
+ obj["#text"] = DataTree(std::string(text_content[0])); // Single text content
+ }
+ else
+ {
+ /// TODO: Better to pass string_views directly to DataTree?
+ DataTree::Array text_array;
+ std::transform(
+ text_content.begin(), text_content.end(),
+ std::back_inserter(text_array),
+ [](const std::string_view& txt) { return DataTree(std::string(txt)); });
+
+ obj["#text"] = DataTree(text_array); // Multiple text contents as array
+ }
+ }
+
+ return std::make_pair(tag_name, DataTree(obj));
+ }
+ else if (!text_content.empty())
+ {
+ // Simple element with text content
+ // Try to convert to appropriate type
+ if (text_content.size() == 1)
+ {
+ return std::make_pair(tag_name, convert_string_to_value(text_content[0]));
+ }
+ else
+ {
+ throw std::runtime_error(gul17::cat("Multiple text contents in simple element at position ", pos_));
+ }
+ }
+ else
+ {
+ // Empty element
+ return std::make_pair(tag_name, DataTree(nullptr));
+ }
+ }
+
+ void strip_comment()
+ {
+ skip_whitespace();
+
+ if (current_char() == '<' && next_char() == '!' &&
+ data_.compare(pos_, 4, "") != 0)
+ {
+ ++pos_;
+ }
+ if (data_.compare(pos_, 3, "-->") == 0)
+ {
+ pos_ += 3; // skip '-->'
+ }
+ skip_whitespace();
+ }
+ }
+
+ std::string_view parse_attribute_name()
+ {
+ auto start_pos = pos_;
+ while (has_remaining_chars() && !std::isspace(current_char()) &&
+ current_char() != '=' && current_char() != '>' && current_char() != '/')
+ {
+ ++pos_;
+ }
+
+ return data_.substr(start_pos, pos_ - start_pos);
+ }
+
+ std::string_view parse_attribute_value()
+ {
+ char quote_char = current_char();
+ if (quote_char != '"' && quote_char != '\'')
+ throw std::runtime_error(gul17::cat("Expected quote for attribute value at position ", pos_));
+
+ advance(); // skip opening quote
+
+ auto start_pos = pos_;
+ while (has_remaining_chars() && current_char() != quote_char)
+ {
+ ++pos_;
+ }
+ auto value = data_.substr(start_pos, pos_ - start_pos);
+ expect(quote_char); // skip closing quote
+
+ return value;
+ }
+
+ std::string_view parse_tag_name()
+ {
+ auto start_pos = pos_;
+ while (has_remaining_chars() && !std::isspace(current_char()) &&
+ current_char() != '>' && current_char() != '/')
+ {
+ ++pos_;
+ }
+
+ return data_.substr(start_pos, pos_ - start_pos);
+ }
+
+ std::string_view parse_text_content()
+ {
+ size_t start_pos = pos_;
+ while (has_remaining_chars() && current_char() != '<')
+ {
+ ++pos_;
+ }
+ auto text = data_.substr(start_pos, pos_ - start_pos);
+
+ // Trim whitespace
+ auto first = text.find_first_not_of(" \t\n\r");
+ auto last = text.find_last_not_of(" \t\n\r");
+
+ if (first == std::string::npos)
+ return "";
+
+ return text.substr(first, last - first + 1);
+ }
+
+ DataTree convert_string_to_value(const std::string_view& str)
+ {
+ // Try to convert to int
+ try
+ {
+ size_t idx;
+ int int_val = std::stoi(std::string(str), &idx);
+ if (idx == str.size())
+ return DataTree(int_val);
+ }
+ catch (...) {}
+
+ // Try to convert to double
+ try
+ {
+ size_t idx;
+ double double_val = std::stod(std::string(str), &idx);
+ if (idx == str.size())
+ return DataTree(double_val);
+ }
+ catch (...) {}
+
+ // Otherwise, return as string
+ return DataTree(unescape_xml(str));
+ }
+
+ void skip_whitespace()
+ {
+ while (pos_ < data_.size() && std::isspace(data_[pos_]))
+ {
+ ++pos_;
+ }
+ }
+
+ char current_char() const
+ {
+ return pos_ < data_.size() ? data_[pos_] : '\0';
+ }
+
+ char next_char() const
+ {
+ return pos_ + 1 < data_.size() ? data_[pos_ + 1] : '\0';
+ }
+
+ bool has_remaining_chars() const
+ {
+ return pos_ < data_.size();
+ }
+
+ void advance(size_t n = 1)
+ {
+ pos_ += n;
+ }
+
+ void expect(char expected)
+ {
+ if (current_char() != expected)
+ {
+ throw std::runtime_error(gul17::cat("Expected character not found: ", expected, " at position ", pos_));
+ }
+ advance();
+ }
+
+ static std::string unescape_xml(const std::string_view& str)
+ {
+ std::string result;
+
+ size_t i = 0;
+ while (i < str.length())
+ {
+ if (str[i] == '&')
+ {
+ if (str.compare(i, 5, "&") == 0)
+ {
+ result += '&';
+ i += 5;
+ }
+ else if (str.compare(i, 4, "<") == 0)
+ {
+ result += '<';
+ i += 4;
+ }
+ else if (str.compare(i, 4, ">") == 0)
+ {
+ result += '>';
+ i += 4;
+ }
+ else if (str.compare(i, 6, """) == 0)
+ {
+ result += '"';
+ i += 6;
+ }
+ else if (str.compare(i, 6, "'") == 0)
+ {
+ result += '\'';
+ i += 6;
+ }
+ else
+ {
+ result += '&';
+ ++i;
+ }
+ }
+ else
+ {
+ result += str[i];
+ ++i;
+ }
+ }
+
+ return result;
+ }
+
+private:
+ std::string_view data_;
+ size_t pos_{0};
+ std::string root_name_;
+};
+
+struct XmlDataSerializer
+{
+ XmlDataSerializer(const DataTree& tree_root)
+ : tree_root_(tree_root)
+ {}
+
+ std::string serialize(size_t indent, const std::string& root_tag_name)
+ {
+ if (!tree_root_.is_object())
+ throw std::runtime_error("Root value must be an object for XML serialization");
+ serialize_value(tree_root_, root_tag_name, indent, 0);
+ return output_.str();
+ }
+
+private:
+ void serialize_value(const DataTree& value, const std::string& tag_name, size_t indent, size_t current_indent = 0)
+ {
+ std::string newline = indent > 0 ? "\n" : ""; // Add newlines if indenting
+ std::string indent_str = std::string(current_indent, ' ');
+
+ std::string opening_tag = "<" + tag_name;
+ std::string closing_tag = "" + tag_name + ">" + newline;
+
+ if (value.is_null())
+ {
+ output_ << indent_str << opening_tag << "/>" << newline;
+ }
+ else if (value.is_boolean())
+ {
+ output_ << indent_str << opening_tag << ">"
+ << (value.as() ? "true" : "false")
+ << closing_tag;
+ }
+ else if (value.is_int())
+ {
+ output_ << indent_str << opening_tag << ">"
+ << std::to_string(value.as())
+ << closing_tag;
+ }
+ else if (value.is_double())
+ {
+ output_ << indent_str << opening_tag << ">"
+ << std::to_string(value.as())
+ << closing_tag;
+ }
+ else if (value.is_string())
+ {
+ output_ << indent_str << opening_tag << ">"
+ << escape_xml(value.as())
+ << closing_tag;
+ }
+ else if (value.is_array())
+ {
+ const auto& array = value.as();
+
+ for (const auto& item : array)
+ {
+ serialize_value(item, tag_name, indent, current_indent);
+ }
+ }
+ else if (value.is_object())
+ {
+ const auto& obj = value.as();
+
+ // Opening tag with attributes
+ output_ << indent_str << opening_tag;
+ for (const auto & [key, val] : obj)
+ {
+ if (key.rfind("@", 0) == 0)
+ {
+ // Attribute
+ std::string attr_name = key.substr(1); // Strip '@'
+ output_ << " " << attr_name << "=\"";
+ if (val.is_null())
+ {
+ output_ << "\"";
+ }
+ else
+ {
+ output_ << escape_xml(val.as()) << "\"";
+ }
+ }
+ }
+ output_ << ">";
+ output_ << newline;
+
+ // Child elements and text content
+ for (const auto & [key, val] : obj)
+ {
+ // Skip already handled attributes and text content handled later
+ if (key.rfind("@", 0) == 0 || key == "#text")
+ continue;
+
+ serialize_value(val, key, indent, current_indent + indent);
+ }
+
+ auto it = obj.find("#text");
+ if (it != obj.end() && it->second.is_string())
+ {
+ // Text content
+ std::string next_indent_str = std::string(current_indent + indent, ' ');
+ output_ << next_indent_str << escape_xml(it->second.as()) << newline;
+ }
+
+ output_ << indent_str;
+ output_ << closing_tag;
+ }
+ }
+
+ static std::string escape_xml(const std::string_view& str)
+ {
+ std::string result;
+ for (char c : str)
+ {
+ switch (c)
+ {
+ case '&': result += "&"; break;
+ case '<': result += "<"; break;
+ case '>': result += ">"; break;
+ case '"': result += """; break;
+ case '\'': result += "'"; break;
+ default: result += c; break;
+ }
+ }
+ return result;
+ }
+
+private:
+ const DataTree& tree_root_;
+ std::ostringstream output_;
+};
+
+namespace gul17 {
+
+DataTree from_xml_string(const std::string_view& data)
+{
+ XmlDataParser parser(data);
+ return parser.parse();
+}
+
+std::string to_xml_string(const DataTree& value, size_t indent, const std::string& root_tag_name)
+{
+ XmlDataSerializer serializer(value);
+ return serializer.serialize(indent, root_tag_name);
+}
+
+} // namespace gul17
+
+// vi:ts=4:sw=4:sts=4:et
diff --git a/src/data_processors/yaml_processor.cc b/src/data_processors/yaml_processor.cc
new file mode 100644
index 0000000..689bee4
--- /dev/null
+++ b/src/data_processors/yaml_processor.cc
@@ -0,0 +1,593 @@
+/**
+ * \file yaml_processor.cc
+ * \author Jan Behrens
+ * \date Created on 20 November 2025
+ * \brief Implementation of the YAML data processor functions.
+ *
+ * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 2.1 of the license, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ */
+
+#include "gul17/data_processors.h"
+#include "gul17/cat.h"
+#include "gul17/join_split.h"
+
+#include
+#include
+
+using gul17::DataTree;
+
+struct YamlDataParser
+{
+ YamlDataParser(const std::string_view& yaml_str)
+ : data_(yaml_str)
+ {}
+
+ DataTree parse()
+ {
+ return parse_document();
+ }
+
+private:
+ DataTree parse_document()
+ {
+ // Split into lines and reset state
+ lines_.clear();
+ current_line_ = 0;
+
+ for (const auto & line : gul17::split_sv(data_, "\n"))
+ {
+ // Remove comments and skip empty lines
+ auto stripped = strip_comment(line);
+ if (!trim(stripped).empty())
+ {
+ lines_.emplace_back(stripped);
+ }
+ }
+
+ if (lines_.empty())
+ return DataTree(nullptr);
+
+ return parse_node();
+ }
+
+ DataTree parse_node(size_t current_indent = 0)
+ {
+ if (current_line_ >= lines_.size())
+ return DataTree(nullptr);
+
+ auto line = lines_[current_line_];
+ auto line_indent = get_indentation(line);
+ auto content = trim(line.substr(line_indent));
+
+ // Check if we're at the wrong indentation level
+ if (line_indent < current_indent)
+ return DataTree(nullptr); // Signal to go back
+
+ // Determine node type
+ if (is_sequence_item(content))
+ {
+ return parse_sequence(current_indent);
+ }
+ else if (is_mapping_item(content))
+ {
+ return parse_mapping(current_indent);
+ }
+ else
+ {
+ // Simple scalar value
+ current_line_++;
+ return parse_scalar(content);
+ }
+ }
+
+ DataTree parse_sequence(size_t current_indent)
+ {
+ DataTree::Array sequence;
+
+ while (current_line_ < lines_.size())
+ {
+ auto line = lines_[current_line_];
+ auto line_indent = get_indentation(line);
+
+ if (line_indent < current_indent)
+ break; // End of this sequence
+
+ auto content = trim(line.substr(line_indent));
+
+ if (is_sequence_item(content)) // Starts with '-'
+ {
+ // Remove the sequence marker and parse the value
+ auto item_content = trim(content.substr(1)); // Remove '-'
+ if (!item_content.empty())
+ {
+ // Simple scalar on same line
+ sequence.push_back(parse_scalar(item_content));
+ current_line_++;
+ continue;
+ }
+
+ current_line_++;
+
+ // Check if this is a complex item (object or nested sequence)
+ if (current_line_ < lines_.size())
+ {
+ auto next_indent = get_indentation(lines_[current_line_]);
+ if (next_indent > line_indent)
+ {
+ // Nested structure
+ sequence.push_back(parse_node(next_indent));
+ }
+ else
+ {
+ // Simple scalar
+ sequence.push_back(parse_scalar(item_content));
+ }
+ }
+ else
+ {
+ sequence.push_back(parse_scalar(item_content));
+ }
+ }
+ else
+ {
+ break; // Not a sequence item anymore
+ }
+ }
+
+ return DataTree(sequence);
+ }
+
+ DataTree parse_mapping(size_t current_indent)
+ {
+ DataTree::Object mapping;
+
+ while (current_line_ < lines_.size())
+ {
+ auto line = lines_[current_line_];
+ auto line_indent = get_indentation(line);
+
+ if (line_indent < current_indent)
+ break; // End of this mapping
+
+ auto content = trim(line.substr(line_indent));
+
+ if (is_mapping_item(content)) // Contains ':'
+ {
+ // Parse key-value pair
+ auto colon_pos = content.find(':');
+ auto key = trim(content.substr(0, colon_pos));
+ auto value_str = trim(content.substr(colon_pos + 1));
+
+ current_line_++;
+
+ DataTree value;
+
+ if (value_str.empty())
+ {
+ // Value might be on next lines (complex value)
+ if (current_line_ < lines_.size())
+ {
+ auto next_indent = get_indentation(lines_[current_line_]);
+ if (next_indent > line_indent)
+ {
+ value = parse_node(next_indent);
+ }
+ else
+ {
+ value = DataTree(nullptr); // null for empty value
+ }
+ }
+ else
+ {
+ value = DataTree(nullptr); // null for empty value
+ }
+ }
+ else
+ {
+ // Simple scalar value
+ value = parse_scalar(value_str);
+ }
+
+ mapping[std::string(key)] = value;
+ }
+ else
+ {
+ break; // Not a mapping item
+ }
+ }
+
+ return DataTree(mapping);
+ }
+
+ DataTree parse_scalar(const std::string_view& value)
+ {
+ auto trimmed = trim(value);
+
+ // Check for null
+ if (trimmed == "null" || trimmed == "~" || trimmed.empty())
+ return DataTree(nullptr);
+
+ // Check for boolean
+ if (trimmed == "true") return DataTree(true);
+ if (trimmed == "false") return DataTree(false);
+
+ // Check for number (integer)
+ if (trimmed[0] == '-' || std::isdigit(trimmed[0]))
+ {
+ try
+ {
+ if (trimmed.find('.') == std::string::npos)
+ {
+ size_t pos;
+ auto int_val = std::stoi(std::string(trimmed), &pos);
+ if (pos == trimmed.length()) // Entire string was converted
+ {
+ return DataTree(int_val);
+ }
+ }
+ }
+ catch (...)
+ {
+ // Not an integer, try float
+ }
+
+ // Check for number (float)
+ try
+ {
+ size_t pos;
+ auto double_val = std::stod(std::string(trimmed), &pos);
+ if (pos == trimmed.length()) // Entire string was converted
+ {
+ return DataTree(double_val);
+ }
+ }
+ catch (...)
+ {
+ // Not a number
+ }
+ }
+
+ // Remove quotes if present and unescape
+ if ((trimmed.front() == '"' && trimmed.back() == '"') ||
+ (trimmed.front() == '\'' && trimmed.back() == '\''))
+ {
+ auto unquoted = trimmed.substr(1, trimmed.length() - 2);
+ return DataTree(unescape_yaml_string(unquoted));
+ }
+
+ // Default to string
+ return DataTree(std::string(trimmed));
+ }
+
+ size_t get_indentation(const std::string_view& line)
+ {
+ size_t i = 0;
+ while (i < line.length() && (line[i] == ' ' || line[i] == '\t'))
+ {
+ if (line[i] == '\t')
+ throw std::runtime_error(gul17::cat("Tabs are not allowed for indentation in YAML at line ", current_line_ + 1));
+ i++;
+ }
+
+ return i;
+ }
+
+ std::string_view strip_comment(const std::string_view& line)
+ {
+ bool in_single_quote = false;
+ bool in_double_quote = false;
+ for (size_t i = 0; i < line.size(); ++i)
+ {
+ char c = line[i];
+ if (c == '\'' && !in_double_quote)
+ {
+ in_single_quote = !in_single_quote;
+ }
+ else if (c == '"' && !in_single_quote)
+ {
+ // Only toggle if not escaped
+ if (i == 0 || line[i-1] != '\\')
+ in_double_quote = !in_double_quote;
+ }
+ else if (c == '#' && !in_single_quote && !in_double_quote)
+ {
+ return line.substr(0, i);
+ }
+ }
+
+ return line;
+ }
+
+ std::string_view trim(const std::string_view& str)
+ {
+ auto start = str.find_first_not_of(" \t\n\r");
+ if (start == std::string::npos)
+ return "";
+
+ auto end = str.find_last_not_of(" \t\n\r");
+ return str.substr(start, end - start + 1);
+ }
+
+ bool is_sequence_item(const std::string_view& line)
+ {
+ auto trimmed = trim(line);
+ return !trimmed.empty() && trimmed[0] == '-';
+ }
+
+ bool is_mapping_item(const std::string_view& line)
+ {
+ // Check for a colon outside of single or double quotes
+ bool in_single_quote = false;
+ bool in_double_quote = false;
+ for (size_t i = 0; i < line.length(); ++i)
+ {
+ char c = line[i];
+ if (c == '\'' && !in_double_quote)
+ {
+ in_single_quote = !in_single_quote;
+ }
+ else if (c == '"' && !in_single_quote)
+ {
+ in_double_quote = !in_double_quote;
+ }
+ else if (c == ':' && !in_single_quote && !in_double_quote)
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ std::string unescape_yaml_string(const std::string_view& str)
+ {
+ std::string result;
+ for (size_t i = 0; i < str.length(); ++i)
+ {
+ if (str[i] == '\\' && i + 1 < str.length())
+ {
+ auto esc = str[i + 1];
+ switch (esc)
+ {
+ case '"': result += '\"'; break;
+ case '\'': result += '\''; break;
+ case '\\': result += '\\'; break;
+ case '/': result += '/'; break;
+ case 'a': result += '\a'; break;
+ case 'b': result += '\b'; break;
+ case 'f': result += '\f'; break;
+ case 'n': result += '\n'; break;
+ case 'r': result += '\r'; break;
+ case 't': result += '\t'; break;
+ case 'v': result += '\v'; break;
+ case ' ': result += ' '; break;
+
+ // YAML-specific escapes
+ case '_': result += u8"\u00A0"; break; // U+00A0 (non-breaking space)
+ case 'N': result += u8"\u0085"; break; // U+0085 (next line)
+ case 'L': result += u8"\u2028"; break; // U+2028 (line separator)
+ case 'P': result += u8"\u2029"; break; // U+2029 (paragraph separator)
+
+ // Hexcode and Unicode escapes
+ case 'x':
+ if (i + 3 <= str.length())
+ {
+ auto hex = str.substr(i + 2, 2);
+ try
+ {
+ auto ch = std::stoi(std::string(hex), nullptr, 16);
+ result += static_cast(ch);
+ i += 2;
+ }
+ catch (...)
+ {
+ result += str[i + 1]; // Invalid hex, treat as literal
+ }
+ }
+ break;
+
+ case 'u':
+ // Unicode escape sequence (e.g., \uXXXX)
+ if (i + 5 <= str.length())
+ {
+ auto num = str.substr(i + 2, 4);
+ unsigned int ch;
+ try
+ {
+ ch = std::stoi(std::string(num), nullptr, 16);
+ }
+ catch (...)
+ {
+ throw std::runtime_error(gul17::cat("Invalid number format in Unicode escape at line ", current_line_));
+ }
+
+ if (ch < 0x80)
+ {
+ result += static_cast(ch);
+ }
+ else if (ch < 0x800)
+ {
+ result += static_cast(0xC0 | (ch >> 6));
+ result += static_cast(0x80 | (ch & 0x3F));
+ }
+ else if (ch < 0x10000)
+ {
+ result += static_cast(0xE0 | (ch >> 12));
+ result += static_cast(0x80 | ((ch >> 6) & 0x3F));
+ result += static_cast(0x80 | (ch & 0x3F));
+ }
+ else
+ {
+ // Note: JSON \uXXXX escapes only support BMP (<= 0xFFFF).
+ throw std::runtime_error(gul17::cat("Invalid Unicode code point at line ", current_line_));
+ }
+ i += 4;
+ }
+ break;
+
+ case 'U':
+ throw std::runtime_error(gul17::cat("Unicode escape sequence (\\UXXXXXXXX) not supported at line ", current_line_));
+ default:
+ throw std::runtime_error(gul17::cat("Invalid escape sequence: ", esc, " at line ", current_line_));
+ }
+ ++i; // Skip next character after escape `\`
+ }
+ else
+ {
+ result += str[i];
+ }
+ }
+
+ return result;
+ }
+
+private:
+ std::string_view data_;
+ std::vector lines_;
+ size_t current_line_{0};
+};
+
+struct YamlDataSerializer
+{
+ YamlDataSerializer(const DataTree& tree_root)
+ : tree_root_(tree_root)
+ {}
+
+ std::string serialize(size_t indent)
+ {
+ if (indent == 0)
+ throw std::runtime_error("Indentation must be greater than zero for YAML serialization");
+ serialize_yaml(tree_root_, indent);
+ return output_.str();
+ }
+
+private:
+ void serialize_yaml(const DataTree& value, size_t indent, size_t current_indent = 0)
+ {
+ if (value.is_object())
+ {
+ serialize_mapping(value.as(), indent, current_indent);
+ }
+ else if (value.is_array())
+ {
+ serialize_sequence(value.as(), indent, current_indent);
+ }
+ else
+ {
+ serialize_scalar(value);
+ }
+ }
+
+ void serialize_scalar(const DataTree& value)
+ {
+ if (value.is_null())
+ {
+ output_ << "null";
+ }
+ else if (value.is_boolean())
+ {
+ output_ << (value.as() ? "true" : "false");
+ }
+ else if (value.is_int())
+ {
+ output_ << value.as();
+ }
+ else if (value.is_double())
+ {
+ output_ << value.as();
+ }
+ else if (value.is_string())
+ {
+ std::string str = value.as();
+ // Quote strings if they contain special characters
+ if (str.empty() || str.find_first_of(":#{}[]&*!|>\"'%") != std::string::npos)
+ {
+ output_ << "\"";
+ for (char c : str)
+ {
+ if (c == '"')
+ output_ << "\\\"";
+ else
+ output_ << c;
+ }
+ output_ << "\"";
+ }
+ else
+ {
+ output_ << str;
+ }
+ }
+ }
+
+ void serialize_sequence(const DataTree::Array& arr, size_t indent, size_t current_indent)
+ {
+ for (const auto& item : arr)
+ {
+ output_ << std::string(current_indent, ' ') << "- ";
+ if (item.is_object() || item.is_array())
+ {
+ output_ << "\n";
+ serialize_yaml(item, indent, current_indent + indent);
+ }
+ else
+ {
+ serialize_scalar(item);
+ output_ << "\n";
+ }
+ }
+ }
+
+ void serialize_mapping(const DataTree::Object& obj, size_t indent, size_t current_indent)
+ {
+ for (const auto & [key, val] : obj)
+ {
+ output_ << std::string(current_indent, ' ') << key << ":";
+
+ if (val.is_object() || val.is_array())
+ {
+ output_ << "\n";
+ serialize_yaml(val, indent, current_indent + indent);
+ }
+ else
+ {
+ output_ << " ";
+ serialize_scalar(val);
+ output_ << "\n";
+ }
+ }
+ }
+
+private:
+ const DataTree& tree_root_;
+ std::ostringstream output_;
+};
+
+namespace gul17 {
+
+DataTree from_yaml_string(const std::string_view& data)
+{
+ YamlDataParser parser(data);
+ return parser.parse();
+}
+
+std::string to_yaml_string(const DataTree& value, size_t indent)
+{
+ YamlDataSerializer serializer(value);
+ return serializer.serialize(indent);
+}
+
+} // namespace gul17
+
+// vi:ts=4:sw=4:sts=4:et
diff --git a/src/meson.build b/src/meson.build
index 39dd8c3..dc97a0f 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -10,6 +10,9 @@ libgul_src = files([
'to_number.cc',
'Trigger.cc',
'trim.cc',
+ 'data_processors/json_processor.cc',
+ 'data_processors/xml_processor.cc',
+ 'data_processors/yaml_processor.cc',
])
inc += include_directories('.')
diff --git a/tests/data_processors/test_json_processor.cc b/tests/data_processors/test_json_processor.cc
new file mode 100644
index 0000000..8ef80e2
--- /dev/null
+++ b/tests/data_processors/test_json_processor.cc
@@ -0,0 +1,130 @@
+/**
+ * \file test_json_processor.cc
+ * \author Jan Behrens
+ * \date Created on November 19, 2025
+ * \brief Test suite for the JsonProcessor class.
+ *
+ * \copyright Copyright 2019-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 2.1 of the license, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ */
+
+#include "gul17/data_processors.h"
+
+#include
+#include
+#include
+
+using gul17::DataTree;
+using gul17::from_json_string;
+using gul17::to_json_string;
+
+TEST_CASE("JsonProcessor: JSON parsing", "[JsonProcessor]")
+{
+ auto tree = from_json_string(
+ R"({"key1": "value1", "key2": 42, "key3": [1, 2, 3], "key4": {"nestedKey": 3.1415}, "key5": null})");
+
+ REQUIRE(tree["key1"].is_string());
+ REQUIRE(tree["key1"].as() == "value1");
+
+ REQUIRE(tree["key2"].is_number());
+ REQUIRE(tree["key2"].as() == 42);
+
+ REQUIRE(tree["key3"].is_array());
+ REQUIRE(tree["key3"].size() == 3);
+ REQUIRE(tree["key3"][0].as() == 1);
+ REQUIRE(tree["key3"][1].as() == 2);
+ REQUIRE(tree["key3"][2].as() == 3);
+
+ REQUIRE(tree["key4"].is_object());
+ REQUIRE(tree["key4"]["nestedKey"].is_double());
+ REQUIRE(tree["key4"]["nestedKey"].as() == Catch::Approx(3.1415));
+
+ REQUIRE(tree["key5"].is_null());
+
+ REQUIRE(tree.has_key("invalid") == false);
+ REQUIRE(tree["invalid"].is_empty());
+}
+
+TEST_CASE("JsonProcessor: JSON parsing with comments", "[JsonProcessor]")
+{
+ auto tree = from_json_string(
+R"({
+ /* ignored comment */
+ "key1": "value1",
+ "key2": 42
+})");
+
+ REQUIRE(tree["key1"].is_string());
+ REQUIRE(tree["key1"].as() == "value1");
+
+ REQUIRE(tree["key2"].is_number());
+ REQUIRE(tree["key2"].as() == 42);
+}
+
+TEST_CASE("JsonProcessor: JSON parsing with escape sequences", "[JsonProcessor]")
+{
+ auto tree = from_json_string(
+R"({
+ "key1": "\nvalue1\t",
+ "key2": "\"value\\2\"",
+ "key3": "\u0032\u0034"
+})");
+
+ REQUIRE(tree["key1"].is_string());
+ REQUIRE(tree["key1"].as() == "\nvalue1\t");
+
+ REQUIRE(tree["key2"].is_string());
+ REQUIRE(tree["key2"].as() == "\"value\\2\"");
+
+ REQUIRE(tree["key3"].is_string());
+ REQUIRE(tree["key3"].as() == "24");
+}
+
+TEST_CASE("JsonProcessor: JSON parsing with errors", "[JsonProcessor]")
+{
+ REQUIRE_THROWS(from_json_string(R"({"key1": "value1", "key2": 42, )")); // Trailing comma
+ REQUIRE_THROWS(from_json_string(R"({"key1": "value1" "key2": 42})")); // Missing comma
+ REQUIRE_THROWS(from_json_string(R"({"key1": "value1", "key2": [1, 2, })")); // Trailing comma in array
+ REQUIRE_THROWS(from_json_string(R"({"key1": "value1", "key2": 42)")); // Missing closing brace
+}
+
+TEST_CASE("JsonProcessor: JSON serialization", "[JsonProcessor]")
+{
+ auto tree = DataTree::make_object();
+
+ tree["key1"] = "value1";
+ tree["key2"] = 42;
+ tree["key3"] = DataTree::Array{1, 2, 3};
+ tree["key4"] = DataTree::Object{{"nestedKey", nullptr}};
+ tree["key5"] = nullptr;
+
+ std::string expected_json =
+R"({
+ "key1": "value1",
+ "key2": 42,
+ "key3": [
+ 1,
+ 2,
+ 3
+ ],
+ "key4": {
+ "nestedKey": null
+ },
+ "key5": null
+})";
+
+ auto json_str = to_json_string(tree, 2);
+ REQUIRE(json_str == expected_json);
+}
diff --git a/tests/data_processors/test_xml_processor.cc b/tests/data_processors/test_xml_processor.cc
new file mode 100644
index 0000000..76f5b9d
--- /dev/null
+++ b/tests/data_processors/test_xml_processor.cc
@@ -0,0 +1,225 @@
+/**
+ * \file test_xml_processor.cc
+ * \author Jan Behrens
+ * \date Created on November 19, 2025
+ * \brief Test suite for the XmlProcessor class.
+ *
+ * \copyright Copyright 2019-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 2.1 of the license, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ */
+
+#include "gul17/data_processors.h"
+
+#include
+#include
+#include
+
+using gul17::DataTree;
+using gul17::from_xml_string;
+using gul17::to_xml_string;
+
+TEST_CASE("XmlProcessor: XML parsing", "[XmlProcessor]")
+{
+ auto tree = from_xml_string(
+R"(
+ value1
+ 42
+ 1
+ 2
+ 3
+
+ 3.1415
+
+
+ )");
+
+ REQUIRE(tree["key1"].is_string());
+ REQUIRE(tree["key1"].as() == "value1");
+
+ REQUIRE(tree["key2"].is_number());
+ REQUIRE(tree["key2"].as() == 42);
+
+ REQUIRE(tree["key3"].is_array());
+ REQUIRE(tree["key3"].size() == 3);
+ REQUIRE(tree["key3"][0].as() == 1);
+ REQUIRE(tree["key3"][1].as() == 2);
+ REQUIRE(tree["key3"][2].as() == 3);
+
+ REQUIRE(tree["key4"].is_object());
+ REQUIRE(tree["key4"]["nestedKey"].is_double());
+ REQUIRE(tree["key4"]["nestedKey"].as() == Catch::Approx(3.1415));
+
+ REQUIRE(tree["key5"].is_null());
+
+ REQUIRE(tree.has_key("invalid") == false);
+ REQUIRE(tree["invalid"].is_empty());
+}
+
+TEST_CASE("XmlProcessor: XML parsing with attributes and comments", "[XmlProcessor]")
+{
+ auto tree = from_xml_string(
+R"(
+ TEXT CONTENT 1
+
+ TEXT CONTENT 2
+ value1
+
+ TEXT CONTENT 3
+ )");
+
+ REQUIRE(tree["key1"].is_object());
+ REQUIRE(tree["key1"]["#text"].is_string());
+ REQUIRE(tree["key1"]["#text"].as() == "value1");
+ REQUIRE(tree["key1"]["@attr1"].is_string());
+ REQUIRE(tree["key1"]["@attr1"].as() == "k1a1");
+
+ REQUIRE(tree["key2"].is_object());
+ REQUIRE(tree["key2"]["#text"].is_empty());
+ REQUIRE(tree["key2"]["@attr1"].is_string());
+ REQUIRE(tree["key2"]["@attr1"].as() == "k2a1");
+ REQUIRE(tree["key2"]["@attr2"].is_string());
+ REQUIRE(tree["key2"]["@attr2"].as() == "k2a2");
+ REQUIRE(tree["key2"]["@attr3"].is_null());
+
+ REQUIRE(tree["#text"].is_array());
+ REQUIRE(tree["#text"].size() == 3);
+ REQUIRE(tree["#text"][0].as() == "TEXT CONTENT 1");
+ REQUIRE(tree["#text"][1].as() == "TEXT CONTENT 2");
+ REQUIRE(tree["#text"][2].as() == "TEXT CONTENT 3");
+}
+
+TEST_CASE("XmlProcessor: XML parsing with escape sequences", "[XmlProcessor]")
+{
+ auto tree = from_xml_string(
+R"(
+
+ ><&"'
+
+)");
+
+ REQUIRE(tree["key1"].is_string());
+ REQUIRE(tree["key1"].as() == "><&\"'");
+}
+
+TEST_CASE("XmlProcessor: XML parsing with errors", "[XmlProcessor]")
+{
+ REQUIRE_THROWS(from_xml_string(R"( )")); // Missing closing tag
+ REQUIRE_THROWS(from_xml_string(R"( )")); // Mismatched closing tag
+ REQUIRE_THROWS(from_xml_string(R"( )")); // Malformed attribute
+ REQUIRE_THROWS(from_xml_string(R"( )")); // Duplicate attribute
+}
+
+TEST_CASE("XmlProcessor: XML serialization", "[XmlProcessor]")
+{
+ auto tree = DataTree::make_object();
+
+ tree["key1"] = "value1";
+ tree["key2"] = 42;
+ tree["key3"] = DataTree::Array{1, 2, 3};
+ tree["key4"] = DataTree::Object{{"nestedKey", nullptr}};
+ tree["key5"] = nullptr;
+
+ std::string expected_xml =
+R"(
+ value1
+ 42
+ 1
+ 2
+ 3
+
+
+
+
+
+)";
+
+ auto xml_str = to_xml_string(tree, 4);
+ REQUIRE(xml_str == expected_xml);
+}
+
+TEST_CASE("XmlProcessor: XML serialization with attributes", "[XmlProcessor]")
+{
+ auto tree = DataTree::make_object();
+
+ tree["key1"]["#text"] = "value1";
+ tree["key1"]["@attr1"] = "k1a1";
+ tree["key2"]["#text"] = nullptr;
+ tree["key2"]["@attr1"] = "k2a1";
+ tree["key2"]["@attr2"] = "k2a2";
+ tree["key2"]["@attr3"] = nullptr;
+ tree["#text"] = "TEXT CONTENT";
+
+ std::string expected_xml =
+R"(
+
+ value1
+
+
+
+ TEXT CONTENT
+
+)";
+
+ auto xml_str = to_xml_string(tree, 4);
+ REQUIRE(xml_str == expected_xml);
+}
+
+TEST_CASE("XmlProcessor: XML parsing of SVR.AUTH string", "[XmlProcessor]")
+{
+ auto tree = from_xml_string(
+R"(
+
+ uid # set operator User ID
+ gid # set operator Group ID
+ uid # set expert User ID
+ gid # set expert Group ID
+ uid # set customer User ID
+ gid # set customer Group ID
+
+ uid # set user 0 User ID
+ uid # set user 1 User ID
+ uid # set user 2 User ID
+ uid # set user 3 User ID
+ uid # set user 4 User ID
+ uid # set user 5 User ID
+ uid # set user 6 User ID
+ uid # set user 7 User ID
+ uid # set user 8 User ID
+ uid # set user 9 User ID
+ uid # set user 10 User ID
+ uid # set user 11 User ID
+ uid # set user 12 User ID
+ uid # set user 13 User ID
+ uid # set user 14 User ID
+ uid # set user 15 User ID
+
+
+ mask
+ name mask
+ name mask
+
+
+)");
+
+ REQUIRE(tree["OPER"].as() == "uid");
+ REQUIRE(tree["OPER_GROUP"].as() == "gid");
+ // Other user/group IDs omitted for brevity
+
+ REQUIRE(tree["LIST"]["PERM"].is_array());
+ REQUIRE(tree["LIST"]["PERM"].size() == 2);
+ REQUIRE(tree["LIST"]["PERM"][0]["NAME"].as() == "name");
+ REQUIRE(tree["LIST"]["PERM"][0]["MASK"].as() == "mask");
+ // Second PERM omitted for brevity
+}
diff --git a/tests/data_processors/test_yaml_processor.cc b/tests/data_processors/test_yaml_processor.cc
new file mode 100644
index 0000000..39e2a17
--- /dev/null
+++ b/tests/data_processors/test_yaml_processor.cc
@@ -0,0 +1,134 @@
+/**
+ * \file test_yaml_processor.cc
+ * \author Jan Behrens
+ * \date Created on November 20, 2025
+ * \brief Test suite for the YamlProcessor class.
+ *
+ * \copyright Copyright 2019-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 2.1 of the license, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ */
+
+#include "gul17/data_processors.h"
+
+#include
+#include
+#include
+
+using gul17::DataTree;
+using gul17::from_yaml_string;
+using gul17::to_yaml_string;
+
+TEST_CASE("YamlProcessor: YAML parsing", "[YamlProcessor]")
+{
+ auto tree = from_yaml_string(
+R"(
+key1: value1
+key2: 42
+key3:
+ - 1
+ - 2
+ - 3
+key4:
+ nestedKey: 3.1415
+key5: null
+)");
+
+ REQUIRE(tree["key1"].is_string());
+ REQUIRE(tree["key1"].as() == "value1");
+
+ REQUIRE(tree["key2"].is_number());
+ REQUIRE(tree["key2"].as() == 42);
+
+ REQUIRE(tree["key3"].is_array());
+ REQUIRE(tree["key3"].size() == 3);
+ REQUIRE(tree["key3"][0].as() == 1);
+ REQUIRE(tree["key3"][1].as() == 2);
+ REQUIRE(tree["key3"][2].as() == 3);
+
+ REQUIRE(tree["key4"].is_object());
+ REQUIRE(tree["key4"]["nestedKey"].is_double());
+ REQUIRE(tree["key4"]["nestedKey"].as() == Catch::Approx(3.1415));
+
+ REQUIRE(tree["key5"].is_null());
+
+ REQUIRE(tree.has_key("invalid") == false);
+ REQUIRE(tree["invalid"].is_empty());
+}
+
+TEST_CASE("YamlProcessor: YAML parsing with comments", "[YamlProcessor]")
+{
+ auto tree = from_yaml_string(
+R"(
+# ignored comment
+key1: value1
+key2: 42 # another comment
+)");
+
+ REQUIRE(tree["key1"].is_string());
+ REQUIRE(tree["key1"].as() == "value1");
+
+ REQUIRE(tree["key2"].is_number());
+ REQUIRE(tree["key2"].as() == 42);
+}
+
+TEST_CASE("YamlProcessor: YAML parsing with escape sequences", "[YamlProcessor]")
+{
+ auto tree = from_yaml_string(
+R"(
+key1: "\nvalue1\t"
+key2: "\"value\\2\""
+key3: "\u0032\u0034"
+)");
+
+ REQUIRE(tree["key1"].is_string());
+ REQUIRE(tree["key1"].as() == "\nvalue1\t");
+
+ REQUIRE(tree["key2"].is_string());
+ REQUIRE(tree["key2"].as() == "\"value\\2\"");
+
+ REQUIRE(tree["key3"].is_string());
+ REQUIRE(tree["key3"].as() == "24");
+}
+
+TEST_CASE("YamlProcessor: YAML parsing with errors", "[YamlProcessor]")
+{
+ // Currently, the parser does not throw exceptions for malformed YAML.
+}
+
+TEST_CASE("YamlProcessor: YAML serialization", "[YamlProcessor]")
+{
+ auto tree = DataTree::make_object();
+
+ tree["key1"] = "value1";
+ tree["key2"] = 42;
+ tree["key3"] = DataTree::Array{1, 2, 3};
+ tree["key4"] = DataTree::Object{{"nestedKey", 3.1415}};
+ tree["key5"] = nullptr;
+
+ std::string expected_yaml =
+R"(key1: value1
+key2: 42
+key3:
+ - 1
+ - 2
+ - 3
+key4:
+ nestedKey: 3.1415
+key5: null
+)";
+
+ auto yaml_str = to_yaml_string(tree, 2);
+ REQUIRE(yaml_str == expected_yaml);
+}
diff --git a/tests/meson.build b/tests/meson.build
index 70a8794..44549e1 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -44,6 +44,9 @@ tests = [
'test_Trigger.cc',
'test_trim.cc',
'test_type_name.cc',
+ 'data_processors/test_json_processor.cc',
+ 'data_processors/test_xml_processor.cc',
+ 'data_processors/test_yaml_processor.cc',
]
test('all',