From 405744eabf5187907a6c072150d7c764ae7255ad Mon Sep 17 00:00:00 2001 From: zykure <54305315+zykure@users.noreply.github.com> Date: Tue, 25 Nov 2025 16:17:53 +0100 Subject: [PATCH 01/12] Add JSON/XML/YAML parsers and serializers [WIP] FIXME: Compiles, but no symbols exported? --- include/gul17/DataTree.h | 323 +++++++++++ include/gul17/data_processors.h | 55 ++ include/gul17/gul.h | 1 + include/gul17/meson.build | 2 + src/data_processors/json_processor.cc | 473 +++++++++++++++++ src/data_processors/xml_processor.cc | 511 ++++++++++++++++++ src/data_processors/yaml_processor.cc | 530 +++++++++++++++++++ src/meson.build | 3 + tests/data_processors/test_json_processor.cc | 130 +++++ tests/data_processors/test_xml_processor.cc | 220 ++++++++ tests/data_processors/test_yaml_processor.cc | 134 +++++ tests/meson.build | 3 + 12 files changed, 2385 insertions(+) create mode 100644 include/gul17/DataTree.h create mode 100644 include/gul17/data_processors.h create mode 100644 src/data_processors/json_processor.cc create mode 100644 src/data_processors/xml_processor.cc create mode 100644 src/data_processors/yaml_processor.cc create mode 100644 tests/data_processors/test_json_processor.cc create mode 100644 tests/data_processors/test_xml_processor.cc create mode 100644 tests/data_processors/test_yaml_processor.cc diff --git a/include/gul17/DataTree.h b/include/gul17/DataTree.h new file mode 100644 index 0000000..7cf852f --- /dev/null +++ b/include/gul17/DataTree.h @@ -0,0 +1,323 @@ +/** + * \file DataTree.h + * \author Jan Behrens + * \date Created on 19 November 2025 + * \brief Declaration of the DataTree class. + * + * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 2.1 of the license, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +#ifndef GUL17_DATA_TREE_H_ +#define GUL17_DATA_TREE_H_ + +#include +#include +#include +#include +#include + +namespace gul17 { + +class DataTree +{ +public: + using Object = std::unordered_map; + using Array = std::vector; + using Value = std::variant< + std::nullptr_t, // null + bool, // boolean + int, // integer + double, // float + std::string, // string + Array, // array + Object // object + >; + + // Constructors + DataTree() : value_(Object()) {} // default to an empty object + DataTree(std::nullptr_t) : value_(nullptr) {} + DataTree(bool b) : value_(b) {} + DataTree(int i) : value_(i) {} + DataTree(double d) : value_(d) {} + DataTree(const std::string& s) : value_(s) {} + DataTree(const char* s) : value_(std::string(s)) {} + DataTree(const Array& a) : value_(a) {} + DataTree(const Object& o) : value_(o) {} + + // Factory methods + static DataTree make_array() { return DataTree(Array{}); } + static DataTree make_object() { return DataTree(Object{}); } + + // Move constructors + DataTree(Array&& a) : value_(std::move(a)) {} + DataTree(Object&& o) : value_(std::move(o)) {} + DataTree(std::string&& s) : value_(std::move(s)) {} + + // Copy constructor + DataTree(const DataTree& other) = default; + + // Move constructor + DataTree(DataTree&& other) = default; + + // Assignment operators + DataTree& operator=(const DataTree& other) = default; + DataTree& operator=(DataTree&& other) = default; + + // Type checking + bool is_null() const { return std::holds_alternative(value_); } + bool is_boolean() const { return std::holds_alternative(value_); } + bool is_int() const { return std::holds_alternative(value_); } + bool is_double() const { return std::holds_alternative(value_); } + bool is_number() const { return is_int() || is_double(); } + bool is_string() const { return std::holds_alternative(value_); } + bool is_array() const { return std::holds_alternative(value_); } + bool is_object() const { return std::holds_alternative(value_); } + + bool is_empty() const + { + if (is_null()) return true; + if (is_string()) return std::get(value_).empty(); + if (is_array()) return std::get(value_).empty(); + if (is_object()) return std::get(value_).empty(); + return false; + } + + bool has_key(const std::string& key) const + { + if (!is_object()) + throw std::runtime_error("DataTree is not an object"); + const auto& obj = std::get(value_); + return obj.find(key) != obj.end(); + } + + size_t size() const + { + if (is_array()) + return std::get(value_).size(); + else if (is_object()) + return std::get(value_).size(); + else + throw std::runtime_error("DataTree is neither array nor object"); + } + + void push_back(const DataTree& val) + { + if (!is_array()) + throw std::runtime_error("DataTree is not an array"); + std::get(value_).push_back(val); + } + + void emplace_back(DataTree&& val) + { + if (!is_array()) + throw std::runtime_error("DataTree is not an array"); + std::get(value_).emplace_back(std::move(val)); + } + + void insert(const std::string& key, const DataTree& val) + { + if (!is_object()) + throw std::runtime_error("DataTree is not an object"); + std::get(value_)[key] = val; + } + + void insert(size_t index, const DataTree& val) + { + if (!is_array()) + throw std::runtime_error("DataTree is not an array"); + auto& arr = std::get(value_); + if (index > arr.size()) + throw std::out_of_range("Index out of range: " + std::to_string(index)); + arr.insert(arr.begin() + index, val); + } + + void clear() + { + if (is_array()) + std::get(value_).clear(); + else if (is_object()) + std::get(value_).clear(); + else + throw std::runtime_error("DataTree is neither array nor object"); + } + + // Iterator return types, only works for arrays + using iterator = DataTree*; + using const_iterator = const DataTree*; + + // Iterators + iterator begin() + { + if (!is_array()) + throw std::runtime_error("DataTree is not an array"); + auto& arr = std::get(value_); + return arr.data(); + } + + iterator end() + { + if (!is_array()) + throw std::runtime_error("DataTree is not an array"); + auto& arr = std::get(value_); + return arr.data() + arr.size(); + } + + const_iterator cbegin() const { return const_cast(this)->begin(); } + const_iterator cend() const { return const_cast(this)->end(); } + + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + // Accessors with bounds checking + DataTree& at(const std::string& key) + { + if (!is_object()) + throw std::runtime_error("DataTree is not an object"); + const auto& obj = std::get(value_); + auto it = obj.find(key); + if (it == obj.end()) + throw std::out_of_range("Key not found in object: " + key); + return const_cast(it->second); + } + const DataTree& at(const std::string& key) const + { + return const_cast(this)->at(key); + } + DataTree& at(size_t index) + { + if (!is_array()) + throw std::runtime_error("DataTree is not an array"); + const auto& arr = std::get(value_); + if (index >= arr.size()) + throw std::out_of_range("Index out of range: " + std::to_string(index)); + return const_cast(arr[index]); + } + const DataTree& at(size_t index) const + { + return const_cast(this)->at(index); + } + + // Operator[] without bounds checking + DataTree& operator[](const std::string& key) + { + if (!is_object()) + throw std::runtime_error("DataTree is not an object"); + return std::get(value_)[key]; + } + const DataTree& operator[](const std::string& key) const + { + return (*const_cast(this))[key]; + } + DataTree& operator[](size_t index) + { + if (!is_array()) + throw std::runtime_error("DataTree is not an array"); + return std::get(value_)[index]; + } + const DataTree& operator[](size_t index) const + { + return (*const_cast(this))[index]; + } + + template + bool is() const + { + if constexpr (std::is_same_v) + { + return is_null(); + } + else if constexpr (std::is_same_v) + { + return is_boolean(); + } + else if constexpr (std::is_same_v) + { + return is_int(); + } + else if constexpr (std::is_same_v) + { + return is_double(); + } + else if constexpr (std::is_same_v) + { + return is_string(); + } + else if constexpr (std::is_same_v) + { + return is_array(); + } + else if constexpr (std::is_same_v) + { + return is_object(); + } + return false; + } + + // Conversion + template + T as() const + { + if constexpr (std::is_same_v) + { + if (is_null()) return std::get(value_); + } + else if constexpr (std::is_same_v) + { + if (is_boolean()) return std::get(value_); + } + else if constexpr (std::is_same_v) + { + if (is_int()) return std::get(value_); + if (is_double()) return static_cast(std::get(value_)); + if (is_boolean()) return static_cast(std::get(value_)); + } + else if constexpr (std::is_same_v) + { + if (is_double()) return std::get(value_); + if (is_int()) return static_cast(std::get(value_)); + } + else if constexpr (std::is_same_v) + { + if (is_string()) return std::get(value_); + if (is_int()) return std::to_string(std::get(value_)); + if (is_double()) return std::to_string(std::get(value_)); + if (is_boolean()) return std::get(value_) ? "true" : "false"; + if (is_null()) return "null"; + // Add conversion logic for other types to string if needed + } + else if constexpr (std::is_same_v) + { + if (is_array()) return std::get(value_); + } + else if constexpr (std::is_same_v) + { + if (is_object()) return std::get(value_); + } + + throw std::bad_variant_access(); + } + + // Get underlying value + Value& get_value() { return value_; } + const Value& get_value() const { return value_; } + +private: + Value value_; +}; + +} // namespace gul17 + +#endif // GUL17_DATA_TREE_H_ diff --git a/include/gul17/data_processors.h b/include/gul17/data_processors.h new file mode 100644 index 0000000..911c689 --- /dev/null +++ b/include/gul17/data_processors.h @@ -0,0 +1,55 @@ +/** + * \file YamlDataProcessor.h + * \author Jan Behrens + * \date Created on 20 November 2025 + * \brief Declaration of the YamlDataProcessor class. + * + * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 2.1 of the license, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see + +namespace gul17 { + +GUL_EXPORT +DataTree from_json_string(const std::string& data); + +GUL_EXPORT +std::string to_json_string(const DataTree& value, size_t indent = 0); + +GUL_EXPORT +DataTree from_xml_string(const std::string& data); + +GUL_EXPORT +std::string to_xml_string(const DataTree& value, size_t indent = 0, const std::string& root_tag_name = "root"); + +GUL_EXPORT +DataTree from_yaml_string(const std::string& data); + +GUL_EXPORT +std::string to_yaml_string(const DataTree& value, size_t indent = 0); + +} // namespace gul17 + +#endif // GUL17_DATA_PROCESSORS_H_ diff --git a/include/gul17/gul.h b/include/gul17/gul.h index e8e2b58..19f90eb 100644 --- a/include/gul17/gul.h +++ b/include/gul17/gul.h @@ -41,6 +41,7 @@ #include "gul17/cat.h" // #include "gul17/catch.h" not included because it is only useful for unit tests // #include "gul17/date.h" not included by default to reduce compile times +#include "gul17/data_processors.h" #include "gul17/escape.h" #include "gul17/expected.h" #include "gul17/finalizer.h" diff --git a/include/gul17/meson.build b/include/gul17/meson.build index db7f45e..10f4594 100644 --- a/include/gul17/meson.build +++ b/include/gul17/meson.build @@ -4,6 +4,8 @@ standalone_headers = [ 'case_ascii.h', 'cat.h', 'date.h', + 'data_processors.h', + 'DataTree.h', 'escape.h', 'expected.h', 'finalizer.h', diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc new file mode 100644 index 0000000..19c11a4 --- /dev/null +++ b/src/data_processors/json_processor.cc @@ -0,0 +1,473 @@ +#include "gul17/data_processors.h" + +#include +#include + +using gul17::DataTree; + +struct JsonDataProcessorParser +{ + JsonDataProcessorParser(const std::string_view& json_str) : data_(json_str) + {} + + DataTree parse() { return parse_value(); } + +private: + DataTree parse_value() + { + skip_comment(); + skip_whitespace(); + char c = current_char(); + + switch (c) { + case '{': return parse_object(); + case '[': return parse_array(); + case '"': return parse_string(); + case 't': case 'f': return parse_boolean(); + case 'n': return parse_null(); + case '-': case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return parse_number(); + default: + throw std::runtime_error("Unexpected character"); + } + } + + DataTree parse_object() + { + expect('{'); + DataTree::Object obj; + + skip_comment(); + skip_whitespace(); + if (current_char() == '}') + { + advance(); + return DataTree(obj); + } + + while (true) + { + skip_whitespace(); + std::string key = parse_string().as(); + + skip_whitespace(); + expect(':'); + + DataTree value = parse_value(); + obj.emplace(std::move(key), std::move(value)); + + skip_whitespace(); + if (current_char() == '}') + { + advance(); + break; + } + expect(','); + } + + return DataTree(obj); + } + + DataTree parse_array() + { + expect('['); + DataTree::Array arr; + + skip_comment(); + skip_whitespace(); + if (current_char() == ']') + { + advance(); + return DataTree(arr); + } + + while (true) + { + arr.push_back(parse_value()); + + skip_whitespace(); + if (current_char() == ']') + { + advance(); + break; + } + expect(','); + } + + return DataTree(arr); + } + + DataTree parse_string() + { + expect('"'); + std::string result; + + while (true) + { + char c = current_char(); + if (c == '"') + { + advance(); + break; + } + else if (c == '\\') + { + // TODO - Implement full JSON string unescaping + + advance(); + char esc = current_char(); + switch (esc) + { + case '"': result += '"'; break; + case '\\': result += '\\'; break; + case '/': result += '/'; break; + case 'a': result += '\a'; break; + case 'b': result += '\b'; break; + case 'f': result += '\f'; break; + case 'n': result += '\n'; break; + case 'r': result += '\r'; break; + case 't': result += '\t'; break; + case 'v': result += '\v'; break; + + case 'u': + // Unicode escape sequence (e.g., \uXXXX) + if (pos_ + 5 < data_.length()) + { + auto num = data_.substr(pos_ + 1, 4); + try { + unsigned ch = std::stoi(std::string(num), nullptr); + if (ch < 0x80) + { + result += static_cast(ch); + } + else if (ch < 0x800) + { + result += static_cast(0xC0 | (ch >> 6)); + result += static_cast(0x80 | (ch & 0x3F)); + } + else if (ch < 0x10000) + { + result += static_cast(0xE0 | (ch >> 12)); + result += static_cast(0x80 | ((ch >> 6) & 0x3F)); + result += static_cast(0x80 | (ch & 0x3F)); + } + else + { + result += static_cast(0xF0 | (ch >> 18)); + result += static_cast(0x80 | ((ch >> 12) & 0x3F)); + result += static_cast(0x80 | ((ch >> 6) & 0x3F)); + result += static_cast(0x80 | (ch & 0x3F)); + } + pos_ += 4; + } + catch (...) { + result += data_[pos_ + 1]; // Invalid number, treat as literal + pos_ += 1; + } + } + break; + + case 'U': + // Unicode escape sequence (e.g., \UXXXXXXXX) + // FIXME - Unicode escape sequences not implemented yet + throw std::runtime_error("Unicode escape sequences not supported"); + + default: + throw std::runtime_error("Invalid escape sequence"); + } + advance(); + } + else + { + result += c; + advance(); + } + } + + return DataTree(result); + } + + DataTree parse_boolean() + { + if (data_.compare(pos_, 4, "true") == 0) + { + pos_ += 4; + return DataTree(true); + } + else if (data_.compare(pos_, 5, "false") == 0) + { + pos_ += 5; + return DataTree(false); + } + else + { + throw std::runtime_error("Invalid boolean value"); + } + } + + DataTree parse_null() + { + if (data_.compare(pos_, 4, "null") == 0) + { + pos_ += 4; + return DataTree(nullptr); + } + else + { + throw std::runtime_error("Invalid null value"); + } + } + + DataTree parse_number() + { + auto start_pos = pos_; + if (current_char() == '-') + { + advance(); + } + + while (std::isdigit(current_char())) + { + advance(); + } + + if (current_char() == '.') + { + advance(); + while (std::isdigit(current_char())) + { + advance(); + } + double value = std::stod(std::string(data_.substr(start_pos, pos_ - start_pos))); + return DataTree(value); + } + else + { + int value = std::stoi(std::string(data_.substr(start_pos, pos_ - start_pos))); + return DataTree(value); + } + } + + void skip_whitespace() + { + while (pos_ < data_.size() && std::isspace(data_[pos_])) + { + advance(); + } + } + + void skip_comment() + { + skip_whitespace(); + + if (current_char() == '/') + { + // Skip comments + if (next_char() == '/') + { + // Single-line comment + while (has_remaining_chars() && current_char() != '\n') + { + advance(); + } + } + else if (next_char() == '*') + { + // Multi-line comment + advance(2); + while (has_remaining_chars()) + { + if (current_char() == '*' && next_char() == '/') + { + advance(2); + break; + } + advance(); + } + } + else + { + throw std::runtime_error("Invalid comment syntax"); + } + } + } + + char current_char() const + { + return pos_ < data_.size() ? data_[pos_] : '\0'; + } + + char next_char() const + { + return pos_ + 1 < data_.size() ? data_[pos_+1] : '\0'; + } + + bool has_remaining_chars() const + { + return pos_ < data_.size(); + } + + void advance(size_t n = 1) + { + pos_ += n; + } + + void expect(char expected) + { + if (current_char() != expected) + { + //fprintf(stderr, "Expected '%c' but found '%c' at position %d\n", expected, current_char(), pos_); + throw std::runtime_error("Expected character not found"); + } + advance(); + } + +private: + std::string_view data_; + size_t pos_{0}; +}; + +struct JsonDataProcessorSerializer +{ + static std::string serialize( + const DataTree& value, size_t indent) + { + std::ostringstream oss; + serialize_value(oss, value, indent); + return oss.str(); + } + +private: + static void serialize_value( + std::ostringstream& oss, const DataTree& value, size_t indent, size_t current_indent = 0) + { + if (value.is_null()) + { + oss << "null"; + } + else if (value.is_boolean()) + { + oss << (value.as() ? "true" : "false"); + } + else if (value.is_int()) + { + oss << std::to_string(value.as()); + } + else if (value.is_double()) + { + oss << std::to_string(value.as()); + } + else if (value.is_string()) + { + oss << "\"" << escape_string(value.as()) << "\""; + } + else if (value.is_array()) + { + serialize_array(oss, value.as(), indent, current_indent); + } + else if (value.is_object()) + { + serialize_object(oss, value.as(), indent, current_indent); + } + } + + static void serialize_array( + std::ostringstream& oss, const DataTree::Array& arr, size_t indent, size_t current_indent) + { + oss << "["; + if (!arr.empty()) + { + oss << "\n"; + for (size_t i = 0; i < arr.size(); ++i) + { + oss << std::string(current_indent + indent, ' '); + serialize_value(oss, arr[i], indent, current_indent + indent); + + if (i < arr.size() - 1) + oss << ","; + oss << "\n"; + } + oss << std::string(current_indent, ' '); + } + oss << "]"; + } + + static void serialize_object( + std::ostringstream& oss, const DataTree::Object& obj, size_t indent, size_t current_indent) + { + oss << "{"; + if (!obj.empty()) + { + // Sort keys for consistent output + std::vector keys; + std::transform(obj.begin(), obj.end(), std::back_inserter(keys), + [](const auto& pair) { return pair.first; }); + std::sort(keys.begin(), keys.end()); + + oss << "\n"; + for (size_t i = 0; i < keys.size(); ++i) + { + const auto& key = keys[i]; + const auto& val = obj.at(key); + + oss << std::string(current_indent + indent, ' '); + oss << "\"" << escape_string(key) << "\": "; + serialize_value(oss, val, indent, current_indent + indent); + + if (i < keys.size() - 1) + oss << ","; + oss << "\n"; + } + oss << std::string(current_indent, ' '); + } + oss << "}"; + } + + static std::string escape_string(const std::string& str) + { + std::string result; + result.reserve(str.size() + 2); // Reserve space for efficiency + for (char c : str) + { + switch (c) + { + case '"': result += "\\\""; break; + case '\\': result += "\\\\"; break; + case '\a': result += "\\a"; break; + case '\b': result += "\\b"; break; + case '\f': result += "\\f"; break; + case '\n': result += "\\n"; break; + case '\r': result += "\\r"; break; + case '\t': result += "\\t"; break; + case '\v': result += "\\v"; break; + + default: + // escape control characters + if (static_cast(c) < 0x20) + { + char buf[7]; + snprintf(buf, sizeof(buf), "\\u%04x", static_cast(c)); + result += buf; + } + else + { + result += c; + } + } + } + + return result; + } +}; + +DataTree from_json_string(const std::string& data) +{ + JsonDataProcessorParser parser(data); + return parser.parse(); +} + +std::string to_json_string(const DataTree& value, size_t indent) +{ + return JsonDataProcessorSerializer::serialize(value, indent); +} diff --git a/src/data_processors/xml_processor.cc b/src/data_processors/xml_processor.cc new file mode 100644 index 0000000..c4e346b --- /dev/null +++ b/src/data_processors/xml_processor.cc @@ -0,0 +1,511 @@ +#include "gul17/data_processors.h" +#include "gul17/cat.h" + +#include +#include + +using gul17::DataTree; + +struct XmlDataProcessorParser +{ + XmlDataProcessorParser(const std::string_view& xml_str) : data_(xml_str) + {} + + DataTree parse() { return parse_xml_element().second; } + +private: + using KeyValuePair = std::pair; + using AttributesList = std::vector; + using ChildrenList = std::vector; + + KeyValuePair parse_xml_element() + { + // Parse content + DataTree result; + skip_whitespace(); + + expect('<'); + + if (current_char() == '!') + { + // Skip comments or DOCTYPE + while (has_remaining_chars() && !(current_char() == '>' )) + { + advance(); + } + expect('>'); + skip_whitespace(); + return parse_xml_element(); + } + + // Parse tag name + auto tag_name = std::string(parse_tag_name()); + if (root_name_.empty()) + { + root_name_ = tag_name; + } + + // Parse attributes + AttributesList attributes; + + while (has_remaining_chars() && current_char() != '>' && current_char() != '/') + { + skip_whitespace(); + + // Parse attribute name + auto attr_name = parse_attribute_name(); + + skip_whitespace(); + expect('='); + skip_whitespace(); + + // Parse attribute value (assuming it's quoted) + auto attr_value = parse_attribute_value(); + + if (attr_value.empty()) + { + attributes.emplace_back(attr_name, DataTree(nullptr)); + } + else + { + attributes.emplace_back(attr_name, convert_string_to_value(attr_value)); + } + } + + // Parse children or text content + ChildrenList children; + std::string text_content; + + if (current_char() == '/') + { + // Self-closing tag + advance(); + expect('>'); + } + else + { + expect('>'); + + // Check for nested elements vs text content + while (has_remaining_chars() && !(current_char() == '<' && next_char() == '/')) + { + if (current_char() == '<') + { + // Nested element + children.push_back(parse_xml_element()); + } + else + { + // Text content + text_content += parse_text_content(); + } + skip_whitespace(); + } + + // Parse closing tag + expect('<'); + expect('/'); + auto closing_tag = parse_tag_name(); + expect('>'); + + if (closing_tag != tag_name) + { + throw std::runtime_error(gul17::cat("Mismatched tags: ", tag_name, " vs ", closing_tag)); + } + } + + // Determine how to represent this element - as object, array or simple value + if (!attributes.empty() || !children.empty()) + { + // Handle arrays for multiple same-tag children / attributes + std::unordered_map obj; + std::unordered_map> child_groups; + + for (const auto& [child_tag, child_value] : children) + { + // For simplicity, assume each child is an object with its tag name + child_groups[child_tag].push_back(child_value); + } + + for (auto& [child_tag, values] : child_groups) + { + if (values.size() == 1) + { + obj[child_tag] = values[0]; + } + else + { + obj[child_tag] = DataTree(values); + } + } + + for (const auto& [attr_name, attr_value] : attributes) + { + auto key = "@" + attr_name; + if (obj.find(key) != obj.end()) + { + throw std::runtime_error("Duplicate attribute name: " + attr_name); + } + obj[key] = attr_value; + } + + // Add text content if any + if (!text_content.empty()) + { + obj["#text"] = DataTree(text_content); + } + + return std::make_pair(tag_name, DataTree(obj)); + } + else if (!text_content.empty()) + { + // Simple element with text content + // Try to convert to appropriate type + return std::make_pair(tag_name, convert_string_to_value(text_content)); + } + else + { + // Empty element + return std::make_pair(tag_name, DataTree(nullptr)); + } + } + + std::string_view parse_attribute_name() + { + auto start_pos = pos_; + while (has_remaining_chars() && !std::isspace(current_char()) && + current_char() != '=' && current_char() != '>' && current_char() != '/') + { + ++pos_; + } + + return data_.substr(start_pos, pos_ - start_pos); + } + + std::string_view parse_attribute_value() + { + char quote_char = current_char(); + if (quote_char != '"' && quote_char != '\'') + { + throw std::runtime_error("Expected quote for attribute value"); + } + advance(); // skip opening quote + + auto start_pos = pos_; + while (has_remaining_chars() && current_char() != quote_char) + { + ++pos_; + } + auto value = data_.substr(start_pos, pos_ - start_pos); + expect(quote_char); // skip closing quote + + return value; + } + + std::string_view parse_tag_name() + { + auto start_pos = pos_; + while (has_remaining_chars() && !std::isspace(current_char()) && + current_char() != '>' && current_char() != '/') + { + ++pos_; + } + + return data_.substr(start_pos, pos_ - start_pos); + } + + std::string_view parse_text_content() + { + size_t start_pos = pos_; + while (has_remaining_chars() && current_char() != '<') + { + ++pos_; + } + auto text = data_.substr(start_pos, pos_ - start_pos); + + // Trim whitespace + auto first = text.find_first_not_of(" \t\n\r"); + auto last = text.find_last_not_of(" \t\n\r"); + + if (first == std::string::npos) + { + return ""; + } + + return text.substr(first, last - first + 1); + } + + DataTree convert_string_to_value(const std::string_view& str) + { + // Try to convert to int + try + { + size_t idx; + int int_val = std::stoi(std::string(str), &idx); + if (idx == str.size()) + { + return DataTree(int_val); + } + } + catch (...) {} + + // Try to convert to double + try + { + size_t idx; + double double_val = std::stod(std::string(str), &idx); + if (idx == str.size()) + { + return DataTree(double_val); + } + } + catch (...) {} + + // Otherwise, return as string + return DataTree(unescape_xml(str)); + } + + void skip_whitespace() + { + while (pos_ < data_.size() && std::isspace(data_[pos_])) + { + ++pos_; + } + } + + char current_char() const + { + return pos_ < data_.size() ? data_[pos_] : '\0'; + } + + char next_char() const + { + return pos_ + 1 < data_.size() ? data_[pos_ + 1] : '\0'; + } + + bool has_remaining_chars() const + { + return pos_ < data_.size(); + } + + void advance(size_t n = 1) + { + pos_ += n; + } + + void expect(char expected) + { + if (current_char() != expected) + { + //fprintf(stderr, "Expected '%c' but found '%c' at position %d\n", expected, current_char(), pos_); + throw std::runtime_error("Expected character not found"); + } + advance(); + } + + static std::string unescape_xml(const std::string_view& str) + { + std::string result; + + size_t i = 0; + while (i < str.length()) + { + if (str[i] == '&') + { + if (str.compare(i, 5, "&") == 0) + { + result += '&'; + i += 5; + } + else if (str.compare(i, 4, "<") == 0) + { + result += '<'; + i += 4; + } + else if (str.compare(i, 4, ">") == 0) + { + result += '>'; + i += 4; + } + else if (str.compare(i, 6, """) == 0) + { + result += '"'; + i += 6; + } + else if (str.compare(i, 6, "'") == 0) + { + result += '\''; + i += 6; + } + else + { + result += '&'; + ++i; + } + } + else + { + result += str[i]; + ++i; + } + } + + return result; + } + +private: + std::string_view data_; + size_t pos_{0}; + std::string root_name_; +}; + +struct XmlDataProcessorSerializer +{ + static std::string serialize( + const DataTree& value, size_t indent, const std::string& root_tag_name) + { + std::ostringstream oss; + if (value.is_object()) + serialize_value(oss, value, root_tag_name, indent, 0); + else + throw std::runtime_error("Root value must be an object for XML serialization"); + return oss.str(); + } + +private: + static void serialize_value( + std::ostringstream& oss, const DataTree& value, const std::string& tag_name, size_t indent, size_t current_indent = 0) + { + std::string newline = indent > 0 ? "\n" : ""; // Add newlines if indenting + std::string indent_str = std::string(current_indent, ' '); + + std::string opening_tag = "<" + tag_name; + std::string closing_tag = "" + newline; + + if (value.is_null()) + { + oss << indent_str << opening_tag << "/>" << newline; + } + else if (value.is_boolean()) + { + oss << indent_str << opening_tag << ">" + << (value.as() ? "true" : "false") + << closing_tag; + } + else if (value.is_int()) + { + oss << indent_str << opening_tag << ">" + << std::to_string(value.as()) + << closing_tag; + } + else if (value.is_double()) + { + oss << indent_str << opening_tag << ">" + << std::to_string(value.as()) + << closing_tag; + } + else if (value.is_string()) + { + oss << indent_str << opening_tag << ">" + << escape_xml(value.as()) + << closing_tag; + } + else if (value.is_array()) + { + const auto& array = value.as(); + + for (const auto& item : array) + { + serialize_value(oss, item, tag_name, indent, current_indent); + } + } + else if (value.is_object()) + { + const auto& obj = value.as(); + + // Sort keys for consistent output + std::vector keys; + std::transform(obj.begin(), obj.end(), std::back_inserter(keys), + [](const auto& pair) { return pair.first; }); + std::sort(keys.begin(), keys.end()); + + // Opening tag with attributes + oss << indent_str << opening_tag; + for (size_t i = 0; i < keys.size(); ++i) + { + const auto& key = keys[i]; + const auto& val = obj.at(key); + + if (key.rfind("@", 0) == 0) + { + // Attribute + std::string attr_name = key.substr(1); // Strip '@' + oss << " " << attr_name << "=\""; + if (val.is_null()) + { + oss << "\""; + } + else + { + oss << escape_xml(val.as()) << "\""; + } + } + } + oss << ">"; + oss << newline; + + // Child elements and text content + for (size_t i = 0; i < keys.size(); ++i) + { + const auto& key = keys[i]; + const auto& val = obj.at(key); + + // Skip already handled attributes and text content handled later + if (key.rfind("@", 0) == 0 || key == "#text") + continue; + + serialize_value(oss, val, key, indent, current_indent + indent); + } + + auto it = obj.find("#text");; + if (it != obj.end() && it->second.is_string()) + { + // Text content + std::string next_indent_str = std::string(current_indent + indent, ' '); + oss << next_indent_str << escape_xml(it->second.as()) << newline; + } + + oss << indent_str; + oss << closing_tag; + } + } + + static std::string escape_xml(const std::string_view& str) + { + std::string result; + for (char c : str) + { + switch (c) + { + case '&': result += "&"; break; + case '<': result += "<"; break; + case '>': result += ">"; break; + case '"': result += """; break; + case '\'': result += "'"; break; + default: result += c; break; + } + } + return result; + } +}; + +DataTree from_xml_string(const std::string& data) +{ + XmlDataProcessorParser parser(data); + return parser.parse(); +} + +std::string to_xml_string(const DataTree& value, size_t indent, const std::string& root_tag_name) +{ + return XmlDataProcessorSerializer::serialize(value, indent, root_tag_name); +} diff --git a/src/data_processors/yaml_processor.cc b/src/data_processors/yaml_processor.cc new file mode 100644 index 0000000..bef7eb0 --- /dev/null +++ b/src/data_processors/yaml_processor.cc @@ -0,0 +1,530 @@ +#include "gul17/data_processors.h" +#include "gul17/join_split.h" + +#include +#include + +using gul17::DataTree; + +struct YamlDataProcessorParser +{ + YamlDataProcessorParser(const std::string_view& yaml_str) : data_(yaml_str) + {} + + DataTree parse() { return parse_document(); } + +private: + DataTree parse_document() + { + // Split into lines and reset state + lines_.clear(); + current_line_ = 0; + +#if 1 + for (const auto & line : gul17::split_sv(data_, "\n")) + { + // Remove comments and skip empty lines + auto stripped = strip_comment(line); + if (!trim(stripped).empty()) + { + lines_.emplace_back(stripped); + } + } +#else + std::istringstream stream(data_); + std::string line; + + // TODO: use string_view for efficiency + while (std::getline(stream, line)) + { + // Remove comments and skip empty lines + line = strip_comment(line); + if (!trim(line).empty()) + { + lines_.push_back(line); + } + } +#endif + if (lines_.empty()) + { + return DataTree(nullptr); + } + + return parse_node(); + } + + DataTree parse_node(size_t current_indent = 0) + { + if (current_line_ >= lines_.size()) + { + return DataTree(nullptr); + } + + auto line = lines_[current_line_]; + auto line_indent = get_indentation(line); + auto content = trim(line.substr(line_indent)); + + // Check if we're at the wrong indentation level + if (line_indent < current_indent) + { + return DataTree(nullptr); // Signal to go back + } + + // Determine node type + if (is_sequence_item(content)) + { + return parse_sequence(current_indent); + } + else if (is_mapping_item(content)) + { + return parse_mapping(current_indent); + } + else + { + // Simple scalar value + current_line_++; + return parse_scalar(content); + } + } + + DataTree parse_sequence(size_t current_indent) + { + DataTree::Array sequence; + + while (current_line_ < lines_.size()) + { + auto line = lines_[current_line_]; + auto line_indent = get_indentation(line); + + if (line_indent < current_indent) + break; // End of this sequence + + auto content = trim(line.substr(line_indent)); + + if (is_sequence_item(content)) // Starts with '-' + { + // Remove the sequence marker and parse the value + auto item_content = trim(content.substr(1)); // Remove '-' + current_line_++; + + // Check if this is a complex item (object or nested sequence) + if (current_line_ < lines_.size()) + { + auto next_indent = get_indentation(lines_[current_line_]); + if (next_indent > line_indent) + { + // Nested structure + sequence.push_back(parse_node(next_indent)); + } + else + { + // Simple scalar + sequence.push_back(parse_scalar(item_content)); + } + } + else + { + sequence.push_back(parse_scalar(item_content)); + } + } + else + { + break; // Not a sequence item anymore + } + } + + return DataTree(sequence); + } + + DataTree parse_mapping(size_t current_indent) + { + DataTree::Object mapping; + + while (current_line_ < lines_.size()) + { + auto line = lines_[current_line_]; + auto line_indent = get_indentation(line); + + if (line_indent < current_indent) + break; // End of this mapping + + auto content = trim(line.substr(line_indent)); + + if (is_mapping_item(content)) // Contains ':' + { + // Parse key-value pair + auto colon_pos = content.find(':'); + auto key = trim(content.substr(0, colon_pos)); + auto value_str = trim(content.substr(colon_pos + 1)); + + current_line_++; + + DataTree value; + + if (value_str.empty()) + { + // Value might be on next lines (complex value) + if (current_line_ < lines_.size()) + { + auto next_indent = get_indentation(lines_[current_line_]); + if (next_indent > line_indent) + { + value = parse_node(next_indent); + } + else + { + value = DataTree(nullptr); // null for empty value + } + } + else + { + value = DataTree(nullptr); // null for empty value + } + } + else + { + // Simple scalar value + value = parse_scalar(value_str); + } + + mapping[std::string(key)] = value; + } + else + { + break; // Not a mapping item + } + } + + return DataTree(mapping); + } + + DataTree parse_scalar(const std::string_view& value) + { + auto trimmed = trim(value); + + // Check for null + if (trimmed == "null" || trimmed == "~" || trimmed.empty()) + { + return DataTree(nullptr); + } + + // Check for boolean + if (trimmed == "true") return DataTree(true); + if (trimmed == "false") return DataTree(false); + + // Check for number (integer) + if (trimmed[0] == '-' || std::isdigit(trimmed[0])) + { + try + { + if (trimmed.find('.') == std::string::npos) + { + size_t pos; + auto int_val = std::stoi(std::string(trimmed), &pos); + if (pos == trimmed.length()) // Entire string was converted + { + return DataTree(int_val); + } + } + } + catch (...) + { + // Not an integer, try float + } + + // Check for number (float) + try + { + size_t pos; + auto double_val = std::stod(std::string(trimmed), &pos); + if (pos == trimmed.length()) // Entire string was converted + { + return DataTree(double_val); + } + } + catch (...) + { + // Not a number + } + } + + // Remove quotes if present and unescape + if ((trimmed.front() == '"' && trimmed.back() == '"') || + (trimmed.front() == '\'' && trimmed.back() == '\'')) + { + auto unquoted = trimmed.substr(1, trimmed.length() - 2); + return DataTree(unescape_yaml_string(unquoted)); + } + + // Default to string + return DataTree(std::string(trimmed)); + } + + size_t get_indentation(const std::string_view& line) + { + size_t i = 0; + while (i < line.length() && (line[i] == ' ' || line[i] == '\t')) + { + i++; + } + return i; + } + + std::string_view strip_comment(const std::string_view& line) + { + auto comment_pos = line.find('#'); + if (comment_pos != std::string::npos) + { + return line.substr(0, comment_pos); + } + return line; + } + + std::string_view trim(const std::string_view& str) + { + auto start = str.find_first_not_of(" \t\n\r"); + if (start == std::string::npos) + return ""; + + auto end = str.find_last_not_of(" \t\n\r"); + return str.substr(start, end - start + 1); + } + + bool is_sequence_item(const std::string_view& line) + { + auto trimmed = trim(line); + return !trimmed.empty() && trimmed[0] == '-'; + } + + bool is_mapping_item(const std::string_view& line) + { + return line.find(':') != std::string::npos; + } + + std::string unescape_yaml_string(const std::string_view& str) + { + // TODO - Implement full YAML string unescaping + + std::string result; + for (size_t i = 0; i < str.length(); ++i) + { + if (str[i] == '\\' && i + 1 < str.length()) + { + switch (str[i + 1]) + { + case '"': result += '\"'; break; + case '\'': result += '\''; break; + case '\\': result += '\\'; break; + case '/': result += '/'; break; + case 'a': result += '\a'; break; + case 'b': result += '\b'; break; + case 'f': result += '\f'; break; + case 'n': result += '\n'; break; + case 'r': result += '\r'; break; + case 't': result += '\t'; break; + case 'v': result += '\v'; break; + case ' ': result += ' '; break; + + // YAML-specific escapes + case '_': result += "\xC2\xA0"; break; // U+00A0 + case 'N': result += "\xC2\x85"; break; // U+0085 + case 'L': result += "\xE2\x80\xA8"; break; // U+2028 + case 'P': result += "\xE2\x80\xA9"; break; // U+2029 + + // Hexcode and Unicode escapes + case 'x': + if (i + 3 < str.length()) + { + auto hex = str.substr(i + 2, 2); + try { + auto ch = std::stoi(std::string(hex), nullptr, 16); + result += static_cast(ch); + i += 2; + } + catch (...) { + result += str[i + 1]; // Invalid hex, treat as literal + } + } + break; + + case 'u': + // Unicode escape sequence (e.g., \uXXXX) + if (i + 5 < str.length()) + { + auto num = str.substr(i + 4, 4); + try { + auto ch = std::stoi(std::string(num), nullptr); + if (ch < 0x80) + { + result += static_cast(ch); + } + else if (ch < 0x800) + { + result += static_cast(0xC0 | (ch >> 6)); + result += static_cast(0x80 | (ch & 0x3F)); + } + else if (ch < 0x10000) + { + result += static_cast(0xE0 | (ch >> 12)); + result += static_cast(0x80 | ((ch >> 6) & 0x3F)); + result += static_cast(0x80 | (ch & 0x3F)); + } + else + { + result += static_cast(0xF0 | (ch >> 18)); + result += static_cast(0x80 | ((ch >> 12) & 0x3F)); + result += static_cast(0x80 | ((ch >> 6) & 0x3F)); + result += static_cast(0x80 | (ch & 0x3F)); + } + i += 4; + } + catch (...) { + result += str[i + 1]; // Invalid number, treat as literal + } + } + break; + + case 'U': + // FIXME - Unicode escape sequences not implemented yet + throw std::runtime_error("Unicode escape sequences not supported"); + + default: + // Unknown escape - treat as literal character + result += str[i + 1]; + } + ++i; // Skip next character after escape `\` + } + else + { + result += str[i]; + } + } + return result; + } + +private: + std::string_view data_; + std::vector lines_; + size_t current_line_{0}; +}; + +struct YamlDataProcessorSerializer +{ + static std::string serialize(const DataTree& value, size_t indent) + { + std::ostringstream oss; + serialize_yaml(oss, value, indent); + return oss.str(); + } + +private: + static void serialize_yaml( + std::ostringstream& oss, const DataTree& value, size_t indent, size_t current_indent = 0) + { + if (value.is_object()) + { + serialize_mapping(oss, value.as(), indent, current_indent); + } + else if (value.is_array()) + { + serialize_sequence(oss, value.as(), indent, current_indent); + } + else { + serialize_scalar(oss, value); + } + } + + static void serialize_scalar( + std::ostringstream& oss, const DataTree& value) + { + if (value.is_null()) + { + oss << "null"; + } + else if (value.is_boolean()) + { + oss << (value.as() ? "true" : "false"); + } + else if (value.is_int()) + { + oss << value.as(); + } + else if (value.is_double()) + { + oss << value.as(); + } + else if (value.is_string()) + { + std::string str = value.as(); + // Quote strings if they contain special characters + if (str.empty() || str.find_first_of(":#{}[]&*!|>\"'%") != std::string::npos) + { + oss << "\"" << str << "\""; + } + else + { + oss << str; + } + } + } + + static void serialize_sequence( + std::ostringstream& oss, const DataTree::Array& arr, size_t indent, size_t current_indent) + { + for (const auto& item : arr) + { + oss << std::string(current_indent, ' ') << "- "; + if (item.is_object() || item.is_array()) + { + oss << "\n"; + serialize_yaml(oss, item, indent, current_indent + indent); + } + else + { + serialize_scalar(oss, item); + oss << "\n"; + } + } + } + + static void serialize_mapping( + std::ostringstream& oss, const DataTree::Object& obj, size_t indent, size_t current_indent) + { + // Sort keys for consistent output + std::vector keys; + std::transform(obj.begin(), obj.end(), std::back_inserter(keys), + [](const auto& pair) { return pair.first; }); + std::sort(keys.begin(), keys.end()); + + for (size_t i = 0; i < keys.size(); ++i) + { + const auto& key = keys[i]; + const auto& val = obj.at(key); + + oss << std::string(current_indent, ' ') << key << ":"; + + if (val.is_object() || val.is_array()) + { + oss << "\n"; + serialize_yaml(oss, val, indent, current_indent + indent); + } + else + { + oss << " "; + serialize_scalar(oss, val); + oss << "\n"; + } + } + } +}; + +DataTree from_yaml_string(const std::string& data) +{ + YamlDataProcessorParser parser(data); + return parser.parse(); +} + +std::string to_yaml_string(const DataTree& value, size_t indent) +{ + return YamlDataProcessorSerializer::serialize(value, indent); +} diff --git a/src/meson.build b/src/meson.build index 39dd8c3..dc97a0f 100644 --- a/src/meson.build +++ b/src/meson.build @@ -10,6 +10,9 @@ libgul_src = files([ 'to_number.cc', 'Trigger.cc', 'trim.cc', + 'data_processors/json_processor.cc', + 'data_processors/xml_processor.cc', + 'data_processors/yaml_processor.cc', ]) inc += include_directories('.') diff --git a/tests/data_processors/test_json_processor.cc b/tests/data_processors/test_json_processor.cc new file mode 100644 index 0000000..d0a9c7d --- /dev/null +++ b/tests/data_processors/test_json_processor.cc @@ -0,0 +1,130 @@ +/** + * \file test_JsonDataProcessor.cc + * \author Jan Behrens + * \date Created on November 19, 2025 + * \brief Test suite for the JsonDataProcessor class. + * + * \copyright Copyright 2019-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 2.1 of the license, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +#include "gul17/data_processors.h" + +#include +#include +#include + +using gul17::DataTree; +using gul17::from_json_string; +using gul17::to_json_string; + +TEST_CASE("JsonDataProcessor: JSON parsing", "[JsonDataProcessor]") +{ + auto tree = from_json_string( + R"({"key1": "value1", "key2": 42, "key3": [1, 2, 3], "key4": {"nestedKey": 3.1415}, "key5": null})"); + + REQUIRE(tree["key1"].is_string()); + REQUIRE(tree["key1"].as() == "value1"); + + REQUIRE(tree["key2"].is_number()); + REQUIRE(tree["key2"].as() == 42); + + REQUIRE(tree["key3"].is_array()); + REQUIRE(tree["key3"].size() == 3); + REQUIRE(tree["key3"][0].as() == 1); + REQUIRE(tree["key3"][1].as() == 2); + REQUIRE(tree["key3"][2].as() == 3); + + REQUIRE(tree["key4"].is_object()); + REQUIRE(tree["key4"]["nestedKey"].is_double()); + REQUIRE(tree["key4"]["nestedKey"].as() == Catch::Approx(3.1415)); + + REQUIRE(tree["key5"].is_null()); + + REQUIRE(tree.has_key("invalid") == false); + REQUIRE(tree["invalid"].is_empty()); +} + +TEST_CASE("JsonDataProcessor: JSON parsing with comments", "[JsonDataProcessor]") +{ + auto tree = from_json_string( +R"({ + /* ignored comment */ + "key1": "value1", + "key2": 42 +})"); + + REQUIRE(tree["key1"].is_string()); + REQUIRE(tree["key1"].as() == "value1"); + + REQUIRE(tree["key2"].is_number()); + REQUIRE(tree["key2"].as() == 42); +} + +TEST_CASE("JsonDataProcessor: JSON parsing with escape sequences", "[JsonDataProcessor]") +{ + auto tree = from_json_string( +R"({ + "key1": "\nvalue1\t", + "key2": "\"value\\2\"", + "key3": "\u0032\u0034" +})"); + + REQUIRE(tree["key1"].is_string()); + REQUIRE(tree["key1"].as() == "\nvalue1\t"); + + REQUIRE(tree["key2"].is_string()); + REQUIRE(tree["key2"].as() == "\"value\\2\""); + + REQUIRE(tree["key3"].is_string()); + REQUIRE(tree["key3"].as() == " \""); +} + +TEST_CASE("JsonDataProcessor: JSON parsing with errors", "[JsonDataProcessor]") +{ + REQUIRE_THROWS(from_json_string(R"({"key1": "value1", "key2": 42, )")); // Trailing comma + REQUIRE_THROWS(from_json_string(R"({"key1": "value1" "key2": 42})")); // Missing comma + REQUIRE_THROWS(from_json_string(R"({"key1": "value1", "key2": [1, 2, })")); // Trailing comma in array + REQUIRE_THROWS(from_json_string(R"({"key1": "value1", "key2": 42)")); // Missing closing brace +} + +TEST_CASE("JsonDataProcessor: JSON serialization", "[JsonDataProcessor]") +{ + auto tree = DataTree::make_object(); + + tree["key1"] = "value1"; + tree["key2"] = 42; + tree["key3"] = DataTree::Array{1, 2, 3}; + tree["key4"] = DataTree::Object{{"nestedKey", nullptr}}; + tree["key5"] = nullptr; + + std::string expected_json = +R"({ + "key1": "value1", + "key2": 42, + "key3": [ + 1, + 2, + 3 + ], + "key4": { + "nestedKey": null + }, + "key5": null +})"; + + auto json_str = to_json_string(tree, 2); + REQUIRE(json_str == expected_json); +} diff --git a/tests/data_processors/test_xml_processor.cc b/tests/data_processors/test_xml_processor.cc new file mode 100644 index 0000000..3d96480 --- /dev/null +++ b/tests/data_processors/test_xml_processor.cc @@ -0,0 +1,220 @@ +/** + * \file test_XmlDataProcessor.cc + * \author Jan Behrens + * \date Created on November 19, 2025 + * \brief Test suite for the XmlDataProcessor class. + * + * \copyright Copyright 2019-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 2.1 of the license, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +#include "gul17/data_processors.h" + +#include +#include +#include + +using gul17::DataTree; +using gul17::from_xml_string; +using gul17::to_xml_string; + +TEST_CASE("XmlDataProcessor: XML parsing", "[XmlDataProcessor]") +{ + auto tree = from_xml_string( +R"( + value1 + 42 + 1 + 2 + 3 + + 3.1415 + + +)"); + + REQUIRE(tree["key1"].is_string()); + REQUIRE(tree["key1"].as() == "value1"); + + REQUIRE(tree["key2"].is_number()); + REQUIRE(tree["key2"].as() == 42); + + REQUIRE(tree["key3"].is_array()); + REQUIRE(tree["key3"].size() == 3); + REQUIRE(tree["key3"][0].as() == 1); + REQUIRE(tree["key3"][1].as() == 2); + REQUIRE(tree["key3"][2].as() == 3); + + REQUIRE(tree["key4"].is_object()); + REQUIRE(tree["key4"]["nestedKey"].is_double()); + REQUIRE(tree["key4"]["nestedKey"].as() == Catch::Approx(3.1415)); + + REQUIRE(tree["key5"].is_null()); + + REQUIRE(tree.has_key("invalid") == false); + REQUIRE(tree["invalid"].is_empty()); +} + +TEST_CASE("XmlDataProcessor: XML parsing with attributes and comments", "[XmlDataProcessor]") +{ + auto tree = from_xml_string( +R"( + + value1 + + TEXT CONTENT +)"); + + REQUIRE(tree["key1"].is_object()); + REQUIRE(tree["key1"]["#text"].is_string()); + REQUIRE(tree["key1"]["#text"].as() == "value1"); + REQUIRE(tree["key1"]["@attr1"].is_string()); + REQUIRE(tree["key1"]["@attr1"].as() == "k1a1"); + + REQUIRE(tree["key2"].is_object()); + REQUIRE(tree["key2"]["#text"].is_empty()); + REQUIRE(tree["key2"]["@attr1"].is_string()); + REQUIRE(tree["key2"]["@attr1"].as() == "k2a1"); + REQUIRE(tree["key2"]["@attr2"].is_string()); + REQUIRE(tree["key2"]["@attr2"].as() == "k2a2"); + REQUIRE(tree["key2"]["@attr3"].is_null()); + + REQUIRE(tree["#text"].is_string()); + REQUIRE(tree["#text"].as() == "TEXT CONTENT"); +} + +TEST_CASE("XmlDataProcessor: XML parsing with escape sequences", "[XmlDataProcessor]") +{ + auto tree = from_xml_string( +R"( + + ><&"' + +)"); + + REQUIRE(tree["key1"].is_string()); + REQUIRE(tree["key1"].as() == "><&\"'"); +} + +TEST_CASE("XmlDataProcessor: XML parsing with errors", "[XmlDataProcessor]") +{ + REQUIRE_THROWS(from_xml_string(R"()")); // Missing closing tag + REQUIRE_THROWS(from_xml_string(R"()")); // Mismatched closing tag + REQUIRE_THROWS(from_xml_string(R"()")); // Malformed attribute + REQUIRE_THROWS(from_xml_string(R"()")); // Duplicate attribute +} + +TEST_CASE("XmlDataProcessor: XML serialization", "[XmlDataProcessor]") +{ + auto tree = DataTree::make_object(); + + tree["key1"] = "value1"; + tree["key2"] = 42; + tree["key3"] = DataTree::Array{1, 2, 3}; + tree["key4"] = DataTree::Object{{"nestedKey", nullptr}}; + tree["key5"] = nullptr; + + std::string expected_xml = +R"( + value1 + 42 + 1 + 2 + 3 + + + + + +)"; + + auto xml_str = to_xml_string(tree, 4); + REQUIRE(xml_str == expected_xml); +} + +TEST_CASE("XmlDataProcessor: XML serialization with attributes", "[XmlDataProcessor]") +{ + auto tree = DataTree::make_object(); + + tree["key1"]["#text"] = "value1"; + tree["key1"]["@attr1"] = "k1a1"; + tree["key2"]["#text"] = nullptr; + tree["key2"]["@attr1"] = "k2a1"; + tree["key2"]["@attr2"] = "k2a2"; + tree["key2"]["@attr3"] = nullptr; + tree["#text"] = "TEXT CONTENT"; + + std::string expected_xml = +R"( + + value1 + + + + TEXT CONTENT + +)"; + + auto xml_str = to_xml_string(tree, 4); + REQUIRE(xml_str == expected_xml); +} + +TEST_CASE("XmlDataProcessor: XML parsing of SVR.AUTH string", "[XmlDataProcessor]") +{ + auto tree = from_xml_string( +R"( + + uid # set operator User ID + gid # set operator Group ID + uid # set expert User ID + gid # set expert Group ID + uid # set customer User ID + gid # set customer Group ID + + uid # set user 0 User ID + uid # set user 1 User ID + uid # set user 2 User ID + uid # set user 3 User ID + uid # set user 4 User ID + uid # set user 5 User ID + uid # set user 6 User ID + uid # set user 7 User ID + uid # set user 8 User ID + uid # set user 9 User ID + uid # set user 10 User ID + uid # set user 11 User ID + uid # set user 12 User ID + uid # set user 13 User ID + uid # set user 14 User ID + uid # set user 15 User ID + + + mask + name mask + name mask + + +)"); + + REQUIRE(tree["OPER"].as() == "uid"); + REQUIRE(tree["OPER_GROUP"].as() == "gid"); + // Other user/group IDs omitted for brevity + + REQUIRE(tree["LIST"]["PERM"].is_array()); + REQUIRE(tree["LIST"]["PERM"].size() == 2); + REQUIRE(tree["LIST"]["PERM"][0]["NAME"].as() == "name"); + REQUIRE(tree["LIST"]["PERM"][0]["MASK"].as() == "mask"); + // Second PERM omitted for brevity +} diff --git a/tests/data_processors/test_yaml_processor.cc b/tests/data_processors/test_yaml_processor.cc new file mode 100644 index 0000000..5619d0b --- /dev/null +++ b/tests/data_processors/test_yaml_processor.cc @@ -0,0 +1,134 @@ +/** + * \file test_YamlDataProcessor.cc + * \author Jan Behrens + * \date Created on November 20, 2025 + * \brief Test suite for the YamlDataProcessor class. + * + * \copyright Copyright 2019-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 2.1 of the license, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +#include "gul17/data_processors.h" + +#include +#include +#include + +using gul17::DataTree; +using gul17::from_yaml_string; +using gul17::to_yaml_string; + +TEST_CASE("YamlDataProcessor: YAML parsing", "[YamlDataProcessor]") +{ + auto tree = from_yaml_string( +R"( +key1: value1 +key2: 42 +key3: + - 1 + - 2 + - 3 +key4: + nestedKey: 3.1415 +key5: null +)"); + + REQUIRE(tree["key1"].is_string()); + REQUIRE(tree["key1"].as() == "value1"); + + REQUIRE(tree["key2"].is_number()); + REQUIRE(tree["key2"].as() == 42); + + REQUIRE(tree["key3"].is_array()); + REQUIRE(tree["key3"].size() == 3); + REQUIRE(tree["key3"][0].as() == 1); + REQUIRE(tree["key3"][1].as() == 2); + REQUIRE(tree["key3"][2].as() == 3); + + REQUIRE(tree["key4"].is_object()); + REQUIRE(tree["key4"]["nestedKey"].is_double()); + REQUIRE(tree["key4"]["nestedKey"].as() == Catch::Approx(3.1415)); + + REQUIRE(tree["key5"].is_null()); + + REQUIRE(tree.has_key("invalid") == false); + REQUIRE(tree["invalid"].is_empty()); +} + +TEST_CASE("YamlDataProcessor: YAML parsing with comments", "[YamlDataProcessor]") +{ + auto tree = from_yaml_string( +R"( +# ignored comment +key1: value1 +key2: 42 # another comment +)"); + + REQUIRE(tree["key1"].is_string()); + REQUIRE(tree["key1"].as() == "value1"); + + REQUIRE(tree["key2"].is_number()); + REQUIRE(tree["key2"].as() == 42); +} + +TEST_CASE("YamlDataProcessor: YAML parsing with escape sequences", "[YamlDataProcessor]") +{ + auto tree = from_yaml_string( +R"( +key1: "\nvalue1\t" +key2: "\"value\\2\"" +key3: "\u0032\u0034" +)"); + + REQUIRE(tree["key1"].is_string()); + REQUIRE(tree["key1"].as() == "\nvalue1\t"); + + REQUIRE(tree["key2"].is_string()); + REQUIRE(tree["key2"].as() == "\"value\\2\""); + + REQUIRE(tree["key3"].is_string()); + REQUIRE(tree["key3"].as() == " \""); +} + +TEST_CASE("YamlDataProcessor: YAML parsing with errors", "[YamlDataProcessor]") +{ + // Currently, the parser does not throw exceptions for malformed YAML. +} + +TEST_CASE("YamlDataProcessor: YAML serialization", "[YamlDataProcessor]") +{ + auto tree = DataTree::make_object(); + + tree["key1"] = "value1"; + tree["key2"] = 42; + tree["key3"] = DataTree::Array{1, 2, 3}; + tree["key4"] = DataTree::Object{{"nestedKey", 3.1415}}; + tree["key5"] = nullptr; + + std::string expected_yaml = +R"(key1: value1 +key2: 42 +key3: + - 1 + - 2 + - 3 +key4: + nestedKey: 3.1415 +key5: null +)"; + + auto yaml_str = to_yaml_string(tree, 2); + REQUIRE(yaml_str == expected_yaml); +} diff --git a/tests/meson.build b/tests/meson.build index 70a8794..44549e1 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -44,6 +44,9 @@ tests = [ 'test_Trigger.cc', 'test_trim.cc', 'test_type_name.cc', + 'data_processors/test_json_processor.cc', + 'data_processors/test_xml_processor.cc', + 'data_processors/test_yaml_processor.cc', ] test('all', From 427528033f3251bfaf5c45ff0af9927b6b79d41e Mon Sep 17 00:00:00 2001 From: zykure <54305315+zykure@users.noreply.github.com> Date: Wed, 26 Nov 2025 11:26:00 +0100 Subject: [PATCH 02/12] Add documentation --- include/gul17/DataTree.h | 343 ++++++++++++++++++- include/gul17/data_processors.h | 128 ++++++- src/data_processors/json_processor.cc | 43 ++- src/data_processors/xml_processor.cc | 49 ++- src/data_processors/yaml_processor.cc | 67 ++-- tests/data_processors/test_json_processor.cc | 4 + tests/data_processors/test_xml_processor.cc | 4 + tests/data_processors/test_yaml_processor.cc | 4 + 8 files changed, 580 insertions(+), 62 deletions(-) diff --git a/include/gul17/DataTree.h b/include/gul17/DataTree.h index 7cf852f..f65c646 100644 --- a/include/gul17/DataTree.h +++ b/include/gul17/DataTree.h @@ -31,11 +31,47 @@ namespace gul17 { +/** + * \addtogroup DataTree_h gul17/DataTree.h + * \brief A hierarchical data structure for representing various data types. + * @{ + */ + +/** + * A hierarchical data structure that can represent various data types including + * null, boolean, number, string, array, and object. + * + * The DataTree class can be used to create, manipulate, and access data in a + * tree-like structure. It supports dynamic typing and can hold different types + * of data at each node. + * It can be used to represent data formats such as JSON, YAML, or XML. + * + * \code + * // Create a data tree object + * DataTree tree; + * + * tree["foo"] = "bar"; // String + * tree["answer"] = 42; // Number + * tree["is_valid"] = true; // Boolean + * tree["items"] = DataTree::array{1, 2, 3}; // Array + * tree["config"] = DataTree::object{ {"key1", "value1"}, {"key2", 42} }; // Object + * + * tree["items"].push_back(4); // Add an element to the array + * tree["config"]["key3"] = 3.14; // Add a key-value pair to the object + * \endcode + * + * \since GUL version x.y.z + */ class DataTree { public: + // Type definitions + + /// Type of an object (key-value pairs) using Object = std::unordered_map; + /// Type of an array (list of values) using Array = std::vector; + /// Underlying variant type to hold different data types using Value = std::variant< std::nullptr_t, // null bool, // boolean @@ -47,45 +83,167 @@ class DataTree >; // Constructors + + /** + * Create an empty DataTree object (default to an empty object). + */ DataTree() : value_(Object()) {} // default to an empty object + + /** + * Create a DataTree object holding a null value. + */ DataTree(std::nullptr_t) : value_(nullptr) {} + + /** + * Create a DataTree object holding a boolean value. + */ DataTree(bool b) : value_(b) {} + + /** + * Create a DataTree object holding an integer value. + */ DataTree(int i) : value_(i) {} + + /** + * Create a DataTree object holding a floating-point value. + */ DataTree(double d) : value_(d) {} + + /** + * Create a DataTree object holding a string value. + */ DataTree(const std::string& s) : value_(s) {} + + /** + * Create a DataTree object holding a C-style string value. + */ DataTree(const char* s) : value_(std::string(s)) {} + + /** + * Create a DataTree object holding an array value. + */ DataTree(const Array& a) : value_(a) {} + + /** + * Create a DataTree object holding an object value. + */ DataTree(const Object& o) : value_(o) {} // Factory methods + + /** + * Create a DataTree object representing a null value. + */ + static DataTree make_null() { return DataTree(nullptr); } + + /** + * Create a DataTree object representing an empty array. + */ static DataTree make_array() { return DataTree(Array{}); } + + /** + * Create a DataTree object representing an empty object. + */ static DataTree make_object() { return DataTree(Object{}); } // Move constructors + + /** + * Create a DataTree object by moving an array into it. + */ DataTree(Array&& a) : value_(std::move(a)) {} + + /** + * Create a DataTree object by moving an object into it. + */ DataTree(Object&& o) : value_(std::move(o)) {} + + /** + * Create a DataTree object by moving a string into it. + */ DataTree(std::string&& s) : value_(std::move(s)) {} + /** + * Move another DataTree object into this one. + */ + DataTree(DataTree&& other) = default; + // Copy constructor - DataTree(const DataTree& other) = default; - // Move constructor - DataTree(DataTree&& other) = default; + /** + * Create a copy of another DataTree object. + */ + DataTree(const DataTree& other) = default; // Assignment operators + + /** + * Assign another DataTree object to this one. + */ DataTree& operator=(const DataTree& other) = default; + + /** + * Move-assign another DataTree object to this one. + */ DataTree& operator=(DataTree&& other) = default; // Type checking + + /** + * Check if the DataTree holds a null value. + * \return True if the DataTree is null, false otherwise. + */ bool is_null() const { return std::holds_alternative(value_); } + + /** + * Check if the DataTree holds a boolean value. + * \return True if the DataTree is boolean, false otherwise. + */ bool is_boolean() const { return std::holds_alternative(value_); } + + /** + * Check if the DataTree holds an integer value. + * \return True if the DataTree is integer, false otherwise. + */ bool is_int() const { return std::holds_alternative(value_); } + + /** + * Check if the DataTree holds a floating-point value. + * \return True if the DataTree is floating-point, false otherwise. + */ bool is_double() const { return std::holds_alternative(value_); } + + /** + * Check if the DataTree holds a numeric value. + * \return True if the DataTree is numeric (integer or floating-point), false otherwise. + */ bool is_number() const { return is_int() || is_double(); } + + /** + * Check if the DataTree holds a string value. + * \return True if the DataTree is string, false otherwise. + */ bool is_string() const { return std::holds_alternative(value_); } + + /** + * Check if the DataTree holds an array value. + * \return True if the DataTree is array, false otherwise. + */ bool is_array() const { return std::holds_alternative(value_); } + + /** + * Check if the DataTree holds an object value. + * \return True if the DataTree is object, false otherwise. + */ bool is_object() const { return std::holds_alternative(value_); } + /** + * Check if the DataTree is empty. + * A DataTree is considered empty if it is null, or if it is a string, array, + * or object that contains no elements. + * + * \return True if the DataTree is empty, false otherwise. + */ bool is_empty() const { if (is_null()) return true; @@ -95,6 +253,12 @@ class DataTree return false; } + /** + * Check if the object contains the specified key. + * Throws a std::runtime_error if the DataTree is not an object. + * \param key The key to check for. + * \return True if the key exists, false otherwise. + */ bool has_key(const std::string& key) const { if (!is_object()) @@ -103,6 +267,11 @@ class DataTree return obj.find(key) != obj.end(); } + /** + * Get the size of the array or object. + * Throws a std::runtime_error if the DataTree is neither an array nor an object. + * \return The number of elements in the array or object. + */ size_t size() const { if (is_array()) @@ -113,6 +282,11 @@ class DataTree throw std::runtime_error("DataTree is neither array nor object"); } + /** + * Add an element to the end of the array. + * Throws a std::runtime_error if the DataTree is not an array. + * \param val The value to add. + */ void push_back(const DataTree& val) { if (!is_array()) @@ -120,6 +294,11 @@ class DataTree std::get(value_).push_back(val); } + /** + * Add an element to the end of the array by moving it. + * Throws a std::runtime_error if the DataTree is not an array. + * \param val The value to add. + */ void emplace_back(DataTree&& val) { if (!is_array()) @@ -127,6 +306,12 @@ class DataTree std::get(value_).emplace_back(std::move(val)); } + /** + * Insert a key-value pair into the object. + * Throws a std::runtime_error if the DataTree is not an object. + * \param key The key to insert. + * \param val The value to insert. + */ void insert(const std::string& key, const DataTree& val) { if (!is_object()) @@ -134,6 +319,13 @@ class DataTree std::get(value_)[key] = val; } + /** + * Insert a value into the array at the specified index. + * Throws a std::runtime_error if the DataTree is not an array. + * Throws a std::out_of_range if the index is out of bounds. + * \param index The index at which to insert the value. + * \param val The value to insert. + */ void insert(size_t index, const DataTree& val) { if (!is_array()) @@ -144,6 +336,10 @@ class DataTree arr.insert(arr.begin() + index, val); } + /** + * Clear all elements from the array or object. + * Throws a std::runtime_error if the DataTree is neither an array nor an object. + */ void clear() { if (is_array()) @@ -159,6 +355,12 @@ class DataTree using const_iterator = const DataTree*; // Iterators + + /** + * Return an iterator to the beginning of the array. + * Throws a std::runtime_error if the DataTree is not an array. + * \return An iterator to the first element. + */ iterator begin() { if (!is_array()) @@ -167,6 +369,11 @@ class DataTree return arr.data(); } + /** + * Return an iterator to the end of the array. + * Throws a std::runtime_error if the DataTree is not an array. + * \return An iterator to the last element. + */ iterator end() { if (!is_array()) @@ -175,13 +382,45 @@ class DataTree return arr.data() + arr.size(); } + /** + * Return a const iterator to the beginning of the array. + * Throws a std::runtime_error if the DataTree is not an array. + * \return A const iterator to the first element. + */ const_iterator cbegin() const { return const_cast(this)->begin(); } + + /** + * Return a const iterator to the end of the array. + * Throws a std::runtime_error if the DataTree is not an array. + * \return A const iterator to the last element. + */ const_iterator cend() const { return const_cast(this)->end(); } + /** + * Return a const iterator to the beginning of the array. + * Throws a std::runtime_error if the DataTree is not an array. + * \return A const iterator to the first element. + */ const_iterator begin() const { return cbegin(); } + + /** + * Return a const iterator to the end of the array. + * Throws a std::runtime_error if the DataTree is not an array. + * \return A const iterator to the last element. + */ const_iterator end() const { return cend(); } // Accessors with bounds checking + + /** + * Get a reference to the value associated with the specified key in the object, + * or the value at the specified index in the array. + * Throws a std::runtime_error if the DataTree is not an object/array. + * Throws a std::out_of_range if the key/index does not exist. + * \param key The key to look up in the object. + * \param index The index to look up in the array. + * \return A reference to the corresponding DataTree value. + */ DataTree& at(const std::string& key) { if (!is_object()) @@ -192,10 +431,26 @@ class DataTree throw std::out_of_range("Key not found in object: " + key); return const_cast(it->second); } + + /** + * Get a const reference to the value associated with the specified key in the object. + * Throws a std::runtime_error if the DataTree is not an object. + * Throws a std::out_of_range if the key does not exist. + * \param key The key to look up in the object. + * \return A const reference to the corresponding DataTree value. + */ const DataTree& at(const std::string& key) const { return const_cast(this)->at(key); } + + /** + * Get a reference to the value at the specified index in the array. + * Throws a std::runtime_error if the DataTree is not an array. + * Throws a std::out_of_range if the index is out of bounds. + * \param index The index to look up in the array. + * \return A reference to the corresponding DataTree value. + */ DataTree& at(size_t index) { if (!is_array()) @@ -205,33 +460,80 @@ class DataTree throw std::out_of_range("Index out of range: " + std::to_string(index)); return const_cast(arr[index]); } + + /** + * Get a const reference to the value at the specified index in the array. + * Throws a std::runtime_error if the DataTree is not an array. + * Throws a std::out_of_range if the index is out of bounds. + * \param index The index to look up in the array. + * \return A const reference to the corresponding DataTree value. + */ const DataTree& at(size_t index) const { return const_cast(this)->at(index); } // Operator[] without bounds checking + + /** + * Get a reference to the value associated with the specified key in the object, + * or the value at the specified index in the array. + * Throws a std::runtime_error if the DataTree is not an object/array. + * \param key The key to look up in the object. + * \param index The index to look up in the array. + * \return A reference to the corresponding DataTree value. + */ DataTree& operator[](const std::string& key) { if (!is_object()) throw std::runtime_error("DataTree is not an object"); return std::get(value_)[key]; } + + /** + * Get a const reference to the value associated with the specified key in the object, + * or the value at the specified index in the array. + * Throws a std::runtime_error if the DataTree is not an object/array. + * \param key The key to look up in the object. + * \param index The index to look up in the array. + * \return A const reference to the corresponding DataTree value. + */ const DataTree& operator[](const std::string& key) const { return (*const_cast(this))[key]; } + + /** + * Get a reference to the value at the specified index in the array. + * Throws a std::runtime_error if the DataTree is not an array. + * \param index The index to look up in the array. + * \return A reference to the corresponding DataTree value. + */ DataTree& operator[](size_t index) { if (!is_array()) throw std::runtime_error("DataTree is not an array"); return std::get(value_)[index]; } + + /** + * Get a const reference to the value at the specified index in the array. + * Throws a std::runtime_error if the DataTree is not an array. + * \param index The index to look up in the array. + * \return A reference to the corresponding DataTree value. + */ const DataTree& operator[](size_t index) const { return (*const_cast(this))[index]; } + // Type checking for template types + + /** + * Check if the DataTree holds a value of type T. + * \tparam T The type to check against. + * \return True if the DataTree holds a value of type T, false otherwise. + */ template bool is() const { @@ -267,6 +569,23 @@ class DataTree } // Conversion + + /** + * Convert the DataTree to the specified type T. + * Throws a std::bad_variant_access if the conversion is not possible. + * \tparam T The type to convert to. + * \return The converted value. + * + * \code + * DataTree tree = 42; + * int value = tree.as(); // value == 42 + * \endcode + * + * Note that implicit conversions are supported for some types: + * - int to double + * - boolean to int + * - int/double/boolean/null to string + */ template T as() const { @@ -277,11 +596,11 @@ class DataTree else if constexpr (std::is_same_v) { if (is_boolean()) return std::get(value_); + if (is_int()) return std::get(value_) != 0; } else if constexpr (std::is_same_v) { if (is_int()) return std::get(value_); - if (is_double()) return static_cast(std::get(value_)); if (is_boolean()) return static_cast(std::get(value_)); } else if constexpr (std::is_same_v) @@ -296,7 +615,7 @@ class DataTree if (is_double()) return std::to_string(std::get(value_)); if (is_boolean()) return std::get(value_) ? "true" : "false"; if (is_null()) return "null"; - // Add conversion logic for other types to string if needed + // TODO: Add conversion logic for other types to string if needed } else if constexpr (std::is_same_v) { @@ -311,13 +630,27 @@ class DataTree } // Get underlying value + + /** + * Get a reference to the underlying value variant. + * \return A reference to the underlying Value variant. + */ Value& get_value() { return value_; } + + /** + * Get a const reference to the underlying value variant. + * \return A const reference to the underlying Value variant. + */ const Value& get_value() const { return value_; } private: Value value_; }; +/// @} + } // namespace gul17 #endif // GUL17_DATA_TREE_H_ + +// vi:ts=4:sw=4:sts=4:et diff --git a/include/gul17/data_processors.h b/include/gul17/data_processors.h index 911c689..585fe28 100644 --- a/include/gul17/data_processors.h +++ b/include/gul17/data_processors.h @@ -1,8 +1,8 @@ /** - * \file YamlDataProcessor.h + * \file data_processors.h * \author Jan Behrens * \date Created on 20 November 2025 - * \brief Declaration of the YamlDataProcessor class. + * \brief Declaration of the data processor utility functions. * * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg * @@ -26,30 +26,142 @@ #include "DataTree.h" #include "gul17/internal.h" -#include "gul17/traits.h" #include namespace gul17 { +/** + * \addtogroup data_processors_h gul17/data_processors.h + * \brief Various data processor utility functions. + * @{ + */ + +/** + * Parse a JSON string and return the corresponding DataTree representation. + * + * The function parses the input JSON string and constructs a DataTree object + * representing the hierarchical structure and data contained in the JSON. + * Throws a std::runtime_error if the input string is not valid JSON. + * + * \code + * auto a = from_json_string(R"({"foo": "bar"})"); // a == DataTree{"foo": "bar"} + * \endcode + * + * \param data The JSON string to be parsed. + * + * \see to_json_string() + * + * \since GUL version x.y.z + */ GUL_EXPORT -DataTree from_json_string(const std::string& data); +DataTree from_json_string(const std::string_view& data); +/** + * Serialize a DataTree object to a JSON string. + * + * The function serializes the given DataTree object into a JSON-formatted string. + * The optional \c indent parameter specifies the number of spaces to use for + * indentation in the output string (default is 0, meaning no pretty-printing). + * + * \code + * auto a = to_json_string(DataTree{"foo": "bar"}); // a == "{\"foo\": \"bar\"}" + * \endcode + * + * \param value The DataTree object to be serialized. + * + * \see from_json_string() + * + * \since GUL version x.y.z + */ GUL_EXPORT std::string to_json_string(const DataTree& value, size_t indent = 0); +/** + * Parse an XML string and return the corresponding DataTree representation. + * + * The function parses the input XML string and constructs a DataTree object + * representing the hierarchical structure and data contained in the XML. + * Throws a std::runtime_error if the input string is not valid XML. + * + * \code + * auto a = from_xml_string(R"(bar)"); // a == DataTree{"foo": "bar"} + * \endcode + * + * \param data The XML string to be parsed. + * + * \see to_xml_string() + * + * \since GUL version x.y.z + */ GUL_EXPORT -DataTree from_xml_string(const std::string& data); +DataTree from_xml_string(const std::string_view& data); +/** + * Serialize a DataTree object to an XML string. + * + * The function serializes the given DataTree object into a XML-formatted string. + * The optional \c indent parameter specifies the number of spaces to use for + * indentation in the output string (default is 0, meaning no pretty-printing). + * + * \code + * auto a = to_xml_string(DataTree{"foo": "bar"}); // a == "bar" + * \endcode + * + * \param value The DataTree object to be serialized. + * + * \see from_xml_string() + * + * \since GUL version x.y.z + */ GUL_EXPORT -std::string to_xml_string(const DataTree& value, size_t indent = 0, const std::string& root_tag_name = "root"); +std::string to_xml_string(const DataTree& value, size_t indent = 0, + const std::string& root_tag_name = "root"); +/** + * Parse an YAML string and return the corresponding DataTree representation. + * + * The function parses the input YAML string and constructs a DataTree object + * representing the hierarchical structure and data contained in the YAML. + * Throws a std::runtime_error if the input string is not valid YAML. + * + * \code + * auto a = from_yaml_string(R"(foo: bar)"); // a == DataTree{"foo": "bar"} + * \endcode + * + * \param data The YAML string to be parsed. + * + * \see to_yaml_string() + * + * \since GUL version x.y.z + */ GUL_EXPORT -DataTree from_yaml_string(const std::string& data); +DataTree from_yaml_string(const std::string_view& data); +/** + * Serialize a DataTree object to a YAML string. + * + * The function serializes the given DataTree object into a YAML-formatted string. + * The optional \c indent parameter specifies the number of spaces to use for + * indentation in the output string (default is 2). + * + * \code + * auto a = to_yaml_string(DataTree{"foo": "bar"}); // a == "foo: bar\n" + * \endcode + * + * \param value The DataTree object to be serialized. + * + * \see from_yaml_string() + * + * \since GUL version x.y.z + */ GUL_EXPORT -std::string to_yaml_string(const DataTree& value, size_t indent = 0); +std::string to_yaml_string(const DataTree& value, size_t indent = 2); + +/// @} } // namespace gul17 #endif // GUL17_DATA_PROCESSORS_H_ + +// vi:ts=4:sw=4:sts=4:et diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc index 19c11a4..7571068 100644 --- a/src/data_processors/json_processor.cc +++ b/src/data_processors/json_processor.cc @@ -1,4 +1,26 @@ -#include "gul17/data_processors.h" +/** + * \file json_processor.cc + * \author Jan Behrens + * \date Created on 20 November 2025 + * \brief Implementation of the JSON data processor functions. + * + * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 2.1 of the license, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + + #include "gul17/data_processors.h" #include #include @@ -7,10 +29,14 @@ using gul17::DataTree; struct JsonDataProcessorParser { - JsonDataProcessorParser(const std::string_view& json_str) : data_(json_str) + JsonDataProcessorParser(const std::string_view& json_str) + : data_(json_str) {} - DataTree parse() { return parse_value(); } + DataTree parse() + { + return parse_value(); + } private: DataTree parse_value() @@ -19,15 +45,18 @@ struct JsonDataProcessorParser skip_whitespace(); char c = current_char(); - switch (c) { + switch (c) + { case '{': return parse_object(); case '[': return parse_array(); case '"': return parse_string(); case 't': case 'f': return parse_boolean(); case 'n': return parse_null(); - case '-': case '0': case '1': case '2': case '3': case '4': + case '-': + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return parse_number(); + default: throw std::runtime_error("Unexpected character"); } @@ -461,7 +490,7 @@ struct JsonDataProcessorSerializer } }; -DataTree from_json_string(const std::string& data) +DataTree from_json_string(const std::string_view& data) { JsonDataProcessorParser parser(data); return parser.parse(); @@ -471,3 +500,5 @@ std::string to_json_string(const DataTree& value, size_t indent) { return JsonDataProcessorSerializer::serialize(value, indent); } + +// vi:ts=4:sw=4:sts=4:et diff --git a/src/data_processors/xml_processor.cc b/src/data_processors/xml_processor.cc index c4e346b..1c39745 100644 --- a/src/data_processors/xml_processor.cc +++ b/src/data_processors/xml_processor.cc @@ -1,4 +1,26 @@ -#include "gul17/data_processors.h" +/** + * \file xml_processor.cc + * \author Jan Behrens + * \date Created on 20 November 2025 + * \brief Implementation of the XML data processor functions. + * + * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 2.1 of the license, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + + #include "gul17/data_processors.h" #include "gul17/cat.h" #include @@ -8,10 +30,14 @@ using gul17::DataTree; struct XmlDataProcessorParser { - XmlDataProcessorParser(const std::string_view& xml_str) : data_(xml_str) + XmlDataProcessorParser(const std::string_view& xml_str) + : data_(xml_str) {} - DataTree parse() { return parse_xml_element().second; } + DataTree parse() + { + return parse_xml_element().second; + } private: using KeyValuePair = std::pair; @@ -41,9 +67,7 @@ struct XmlDataProcessorParser // Parse tag name auto tag_name = std::string(parse_tag_name()); if (root_name_.empty()) - { root_name_ = tag_name; - } // Parse attributes AttributesList attributes; @@ -127,6 +151,7 @@ struct XmlDataProcessorParser child_groups[child_tag].push_back(child_value); } + // Add grouped children to object for (auto& [child_tag, values] : child_groups) { if (values.size() == 1) @@ -139,6 +164,7 @@ struct XmlDataProcessorParser } } + // Add attributes for (const auto& [attr_name, attr_value] : attributes) { auto key = "@" + attr_name; @@ -186,9 +212,8 @@ struct XmlDataProcessorParser { char quote_char = current_char(); if (quote_char != '"' && quote_char != '\'') - { throw std::runtime_error("Expected quote for attribute value"); - } + advance(); // skip opening quote auto start_pos = pos_; @@ -228,9 +253,7 @@ struct XmlDataProcessorParser auto last = text.find_last_not_of(" \t\n\r"); if (first == std::string::npos) - { return ""; - } return text.substr(first, last - first + 1); } @@ -243,9 +266,7 @@ struct XmlDataProcessorParser size_t idx; int int_val = std::stoi(std::string(str), &idx); if (idx == str.size()) - { return DataTree(int_val); - } } catch (...) {} @@ -255,9 +276,7 @@ struct XmlDataProcessorParser size_t idx; double double_val = std::stod(std::string(str), &idx); if (idx == str.size()) - { return DataTree(double_val); - } } catch (...) {} @@ -499,7 +518,7 @@ struct XmlDataProcessorSerializer } }; -DataTree from_xml_string(const std::string& data) +DataTree from_xml_string(const std::string_view& data) { XmlDataProcessorParser parser(data); return parser.parse(); @@ -509,3 +528,5 @@ std::string to_xml_string(const DataTree& value, size_t indent, const std::strin { return XmlDataProcessorSerializer::serialize(value, indent, root_tag_name); } + +// vi:ts=4:sw=4:sts=4:et diff --git a/src/data_processors/yaml_processor.cc b/src/data_processors/yaml_processor.cc index bef7eb0..9ce3e7d 100644 --- a/src/data_processors/yaml_processor.cc +++ b/src/data_processors/yaml_processor.cc @@ -1,3 +1,25 @@ +/** + * \file yaml_processor.cc + * \author Jan Behrens + * \date Created on 20 November 2025 + * \brief Implementation of the YAML data processor functions. + * + * \copyright Copyright 2018-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 2.1 of the license, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + #include "gul17/data_processors.h" #include "gul17/join_split.h" @@ -8,10 +30,14 @@ using gul17::DataTree; struct YamlDataProcessorParser { - YamlDataProcessorParser(const std::string_view& yaml_str) : data_(yaml_str) + YamlDataProcessorParser(const std::string_view& yaml_str) + : data_(yaml_str) {} - DataTree parse() { return parse_document(); } + DataTree parse() + { + return parse_document(); + } private: DataTree parse_document() @@ -20,7 +46,6 @@ struct YamlDataProcessorParser lines_.clear(); current_line_ = 0; -#if 1 for (const auto & line : gul17::split_sv(data_, "\n")) { // Remove comments and skip empty lines @@ -30,25 +55,9 @@ struct YamlDataProcessorParser lines_.emplace_back(stripped); } } -#else - std::istringstream stream(data_); - std::string line; - // TODO: use string_view for efficiency - while (std::getline(stream, line)) - { - // Remove comments and skip empty lines - line = strip_comment(line); - if (!trim(line).empty()) - { - lines_.push_back(line); - } - } -#endif if (lines_.empty()) - { return DataTree(nullptr); - } return parse_node(); } @@ -56,9 +65,7 @@ struct YamlDataProcessorParser DataTree parse_node(size_t current_indent = 0) { if (current_line_ >= lines_.size()) - { return DataTree(nullptr); - } auto line = lines_[current_line_]; auto line_indent = get_indentation(line); @@ -66,9 +73,7 @@ struct YamlDataProcessorParser // Check if we're at the wrong indentation level if (line_indent < current_indent) - { return DataTree(nullptr); // Signal to go back - } // Determine node type if (is_sequence_item(content)) @@ -204,9 +209,7 @@ struct YamlDataProcessorParser // Check for null if (trimmed == "null" || trimmed == "~" || trimmed.empty()) - { return DataTree(nullptr); - } // Check for boolean if (trimmed == "true") return DataTree(true); @@ -267,6 +270,7 @@ struct YamlDataProcessorParser { i++; } + return i; } @@ -274,9 +278,8 @@ struct YamlDataProcessorParser { auto comment_pos = line.find('#'); if (comment_pos != std::string::npos) - { return line.substr(0, comment_pos); - } + return line; } @@ -399,6 +402,7 @@ struct YamlDataProcessorParser result += str[i]; } } + return result; } @@ -412,6 +416,8 @@ struct YamlDataProcessorSerializer { static std::string serialize(const DataTree& value, size_t indent) { + if (indent == 0) + throw std::runtime_error("Indentation must be greater than zero for YAML serialization"); std::ostringstream oss; serialize_yaml(oss, value, indent); return oss.str(); @@ -429,7 +435,8 @@ struct YamlDataProcessorSerializer { serialize_sequence(oss, value.as(), indent, current_indent); } - else { + else + { serialize_scalar(oss, value); } } @@ -518,7 +525,7 @@ struct YamlDataProcessorSerializer } }; -DataTree from_yaml_string(const std::string& data) +DataTree from_yaml_string(const std::string_view& data) { YamlDataProcessorParser parser(data); return parser.parse(); @@ -528,3 +535,5 @@ std::string to_yaml_string(const DataTree& value, size_t indent) { return YamlDataProcessorSerializer::serialize(value, indent); } + +// vi:ts=4:sw=4:sts=4:et diff --git a/tests/data_processors/test_json_processor.cc b/tests/data_processors/test_json_processor.cc index d0a9c7d..37cf46c 100644 --- a/tests/data_processors/test_json_processor.cc +++ b/tests/data_processors/test_json_processor.cc @@ -26,6 +26,8 @@ #include #include +#if 0 // Temporarily disable all tests to focus on test_func() + using gul17::DataTree; using gul17::from_json_string; using gul17::to_json_string; @@ -128,3 +130,5 @@ R"({ auto json_str = to_json_string(tree, 2); REQUIRE(json_str == expected_json); } + +#endif diff --git a/tests/data_processors/test_xml_processor.cc b/tests/data_processors/test_xml_processor.cc index 3d96480..9b9e592 100644 --- a/tests/data_processors/test_xml_processor.cc +++ b/tests/data_processors/test_xml_processor.cc @@ -26,6 +26,8 @@ #include #include +#if 0 // Temporarily disable all tests to focus on test_func() + using gul17::DataTree; using gul17::from_xml_string; using gul17::to_xml_string; @@ -218,3 +220,5 @@ R"( REQUIRE(tree["LIST"]["PERM"][0]["MASK"].as() == "mask"); // Second PERM omitted for brevity } + +#endif diff --git a/tests/data_processors/test_yaml_processor.cc b/tests/data_processors/test_yaml_processor.cc index 5619d0b..6d873a2 100644 --- a/tests/data_processors/test_yaml_processor.cc +++ b/tests/data_processors/test_yaml_processor.cc @@ -26,6 +26,8 @@ #include #include +#if 0 // Temporarily disable all tests to focus on test_func() + using gul17::DataTree; using gul17::from_yaml_string; using gul17::to_yaml_string; @@ -132,3 +134,5 @@ key5: null auto yaml_str = to_yaml_string(tree, 2); REQUIRE(yaml_str == expected_yaml); } + +#endif From 0a2c03c2235d16bbd69d029951023d87f67c3230 Mon Sep 17 00:00:00 2001 From: zykure <54305315+zykure@users.noreply.github.com> Date: Wed, 26 Nov 2025 15:28:49 +0100 Subject: [PATCH 03/12] Some cleanup --- src/data_processors/json_processor.cc | 90 +++++++++++--------- src/data_processors/xml_processor.cc | 71 ++++++++------- src/data_processors/yaml_processor.cc | 84 +++++++++--------- tests/data_processors/test_json_processor.cc | 4 - tests/data_processors/test_xml_processor.cc | 4 - tests/data_processors/test_yaml_processor.cc | 4 - 6 files changed, 135 insertions(+), 122 deletions(-) diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc index 7571068..3957bcc 100644 --- a/src/data_processors/json_processor.cc +++ b/src/data_processors/json_processor.cc @@ -20,16 +20,16 @@ * along with this program. If not, see . */ - #include "gul17/data_processors.h" +#include "gul17/data_processors.h" #include #include using gul17::DataTree; -struct JsonDataProcessorParser +struct JsonDataParser { - JsonDataProcessorParser(const std::string_view& json_str) + JsonDataParser(const std::string_view& json_str) : data_(json_str) {} @@ -357,75 +357,74 @@ struct JsonDataProcessorParser size_t pos_{0}; }; -struct JsonDataProcessorSerializer +struct JsonDataSerializer { - static std::string serialize( - const DataTree& value, size_t indent) + JsonDataSerializer(const DataTree& tree_root) + : tree_root_(tree_root) + {} + + std::string serialize(size_t indent) { - std::ostringstream oss; - serialize_value(oss, value, indent); - return oss.str(); + serialize_value(tree_root_, indent); + return output_.str(); } private: - static void serialize_value( - std::ostringstream& oss, const DataTree& value, size_t indent, size_t current_indent = 0) + void serialize_value(const DataTree& value, size_t indent, size_t current_indent = 0) { if (value.is_null()) { - oss << "null"; + output_ << "null"; } else if (value.is_boolean()) { - oss << (value.as() ? "true" : "false"); + output_ << (value.as() ? "true" : "false"); } else if (value.is_int()) { - oss << std::to_string(value.as()); + output_ << std::to_string(value.as()); } else if (value.is_double()) { - oss << std::to_string(value.as()); + output_ << std::to_string(value.as()); } else if (value.is_string()) { - oss << "\"" << escape_string(value.as()) << "\""; + output_ << "\"" << escape_string(value.as()) << "\""; } else if (value.is_array()) { - serialize_array(oss, value.as(), indent, current_indent); + serialize_array(value.as(), indent, current_indent); } else if (value.is_object()) { - serialize_object(oss, value.as(), indent, current_indent); + serialize_object(value.as(), indent, current_indent); } } - static void serialize_array( - std::ostringstream& oss, const DataTree::Array& arr, size_t indent, size_t current_indent) + void serialize_array(const DataTree::Array& arr, size_t indent, size_t current_indent) { - oss << "["; + output_ << "["; if (!arr.empty()) { - oss << "\n"; + output_ << "\n"; for (size_t i = 0; i < arr.size(); ++i) { - oss << std::string(current_indent + indent, ' '); - serialize_value(oss, arr[i], indent, current_indent + indent); + output_ << std::string(current_indent + indent, ' '); + serialize_value(arr[i], indent, current_indent + indent); if (i < arr.size() - 1) - oss << ","; - oss << "\n"; + output_ << ","; + output_ << "\n"; } - oss << std::string(current_indent, ' '); + output_ << std::string(current_indent, ' '); } - oss << "]"; + output_ << "]"; } - static void serialize_object( - std::ostringstream& oss, const DataTree::Object& obj, size_t indent, size_t current_indent) + void serialize_object(const DataTree::Object& obj, size_t indent, size_t current_indent) { - oss << "{"; + output_ << "{"; if (!obj.empty()) { // Sort keys for consistent output @@ -434,23 +433,23 @@ struct JsonDataProcessorSerializer [](const auto& pair) { return pair.first; }); std::sort(keys.begin(), keys.end()); - oss << "\n"; + output_ << "\n"; for (size_t i = 0; i < keys.size(); ++i) { const auto& key = keys[i]; const auto& val = obj.at(key); - oss << std::string(current_indent + indent, ' '); - oss << "\"" << escape_string(key) << "\": "; - serialize_value(oss, val, indent, current_indent + indent); + output_ << std::string(current_indent + indent, ' '); + output_ << "\"" << escape_string(key) << "\": "; + serialize_value(val, indent, current_indent + indent); if (i < keys.size() - 1) - oss << ","; - oss << "\n"; + output_ << ","; + output_ << "\n"; } - oss << std::string(current_indent, ' '); + output_ << std::string(current_indent, ' '); } - oss << "}"; + output_ << "}"; } static std::string escape_string(const std::string& str) @@ -488,17 +487,26 @@ struct JsonDataProcessorSerializer return result; } + +private: + const DataTree& tree_root_; + std::ostringstream output_; }; +namespace gul17 { + DataTree from_json_string(const std::string_view& data) { - JsonDataProcessorParser parser(data); + JsonDataParser parser(data); return parser.parse(); } std::string to_json_string(const DataTree& value, size_t indent) { - return JsonDataProcessorSerializer::serialize(value, indent); + JsonDataSerializer serializer(value); + return serializer.serialize(indent); } +} // namespace gul17 + // vi:ts=4:sw=4:sts=4:et diff --git a/src/data_processors/xml_processor.cc b/src/data_processors/xml_processor.cc index 1c39745..86c21c4 100644 --- a/src/data_processors/xml_processor.cc +++ b/src/data_processors/xml_processor.cc @@ -20,7 +20,7 @@ * along with this program. If not, see . */ - #include "gul17/data_processors.h" +#include "gul17/data_processors.h" #include "gul17/cat.h" #include @@ -28,9 +28,9 @@ using gul17::DataTree; -struct XmlDataProcessorParser +struct XmlDataParser { - XmlDataProcessorParser(const std::string_view& xml_str) + XmlDataParser(const std::string_view& xml_str) : data_(xml_str) {} @@ -378,22 +378,22 @@ struct XmlDataProcessorParser std::string root_name_; }; -struct XmlDataProcessorSerializer +struct XmlDataSerializer { - static std::string serialize( - const DataTree& value, size_t indent, const std::string& root_tag_name) + XmlDataSerializer(const DataTree& tree_root) + : tree_root_(tree_root) + {} + + std::string serialize(size_t indent, const std::string& root_tag_name) { - std::ostringstream oss; - if (value.is_object()) - serialize_value(oss, value, root_tag_name, indent, 0); - else + if (!tree_root_.is_object()) throw std::runtime_error("Root value must be an object for XML serialization"); - return oss.str(); + serialize_value(tree_root_, root_tag_name, indent, 0); + return output_.str(); } private: - static void serialize_value( - std::ostringstream& oss, const DataTree& value, const std::string& tag_name, size_t indent, size_t current_indent = 0) + void serialize_value(const DataTree& value, const std::string& tag_name, size_t indent, size_t current_indent = 0) { std::string newline = indent > 0 ? "\n" : ""; // Add newlines if indenting std::string indent_str = std::string(current_indent, ' '); @@ -403,29 +403,29 @@ struct XmlDataProcessorSerializer if (value.is_null()) { - oss << indent_str << opening_tag << "/>" << newline; + output_ << indent_str << opening_tag << "/>" << newline; } else if (value.is_boolean()) { - oss << indent_str << opening_tag << ">" + output_ << indent_str << opening_tag << ">" << (value.as() ? "true" : "false") << closing_tag; } else if (value.is_int()) { - oss << indent_str << opening_tag << ">" + output_ << indent_str << opening_tag << ">" << std::to_string(value.as()) << closing_tag; } else if (value.is_double()) { - oss << indent_str << opening_tag << ">" + output_ << indent_str << opening_tag << ">" << std::to_string(value.as()) << closing_tag; } else if (value.is_string()) { - oss << indent_str << opening_tag << ">" + output_ << indent_str << opening_tag << ">" << escape_xml(value.as()) << closing_tag; } @@ -435,7 +435,7 @@ struct XmlDataProcessorSerializer for (const auto& item : array) { - serialize_value(oss, item, tag_name, indent, current_indent); + serialize_value(item, tag_name, indent, current_indent); } } else if (value.is_object()) @@ -449,7 +449,7 @@ struct XmlDataProcessorSerializer std::sort(keys.begin(), keys.end()); // Opening tag with attributes - oss << indent_str << opening_tag; + output_ << indent_str << opening_tag; for (size_t i = 0; i < keys.size(); ++i) { const auto& key = keys[i]; @@ -459,19 +459,19 @@ struct XmlDataProcessorSerializer { // Attribute std::string attr_name = key.substr(1); // Strip '@' - oss << " " << attr_name << "=\""; + output_ << " " << attr_name << "=\""; if (val.is_null()) { - oss << "\""; + output_ << "\""; } else { - oss << escape_xml(val.as()) << "\""; + output_ << escape_xml(val.as()) << "\""; } } } - oss << ">"; - oss << newline; + output_ << ">"; + output_ << newline; // Child elements and text content for (size_t i = 0; i < keys.size(); ++i) @@ -483,7 +483,7 @@ struct XmlDataProcessorSerializer if (key.rfind("@", 0) == 0 || key == "#text") continue; - serialize_value(oss, val, key, indent, current_indent + indent); + serialize_value(val, key, indent, current_indent + indent); } auto it = obj.find("#text");; @@ -491,11 +491,11 @@ struct XmlDataProcessorSerializer { // Text content std::string next_indent_str = std::string(current_indent + indent, ' '); - oss << next_indent_str << escape_xml(it->second.as()) << newline; + output_ << next_indent_str << escape_xml(it->second.as()) << newline; } - oss << indent_str; - oss << closing_tag; + output_ << indent_str; + output_ << closing_tag; } } @@ -516,17 +516,26 @@ struct XmlDataProcessorSerializer } return result; } + +private: + const DataTree& tree_root_; + std::ostringstream output_; }; +namespace gul17 { + DataTree from_xml_string(const std::string_view& data) { - XmlDataProcessorParser parser(data); + XmlDataParser parser(data); return parser.parse(); } std::string to_xml_string(const DataTree& value, size_t indent, const std::string& root_tag_name) { - return XmlDataProcessorSerializer::serialize(value, indent, root_tag_name); + XmlDataSerializer serializer(value); + return serializer.serialize(indent, root_tag_name); } +} // namespace gul17 + // vi:ts=4:sw=4:sts=4:et diff --git a/src/data_processors/yaml_processor.cc b/src/data_processors/yaml_processor.cc index 9ce3e7d..c07584f 100644 --- a/src/data_processors/yaml_processor.cc +++ b/src/data_processors/yaml_processor.cc @@ -28,9 +28,9 @@ using gul17::DataTree; -struct YamlDataProcessorParser +struct YamlDataParser { - YamlDataProcessorParser(const std::string_view& yaml_str) + YamlDataParser(const std::string_view& yaml_str) : data_(yaml_str) {} @@ -304,7 +304,7 @@ struct YamlDataProcessorParser return line.find(':') != std::string::npos; } - std::string unescape_yaml_string(const std::string_view& str) + static std::string unescape_yaml_string(const std::string_view& str) { // TODO - Implement full YAML string unescaping @@ -412,53 +412,54 @@ struct YamlDataProcessorParser size_t current_line_{0}; }; -struct YamlDataProcessorSerializer +struct YamlDataSerializer { - static std::string serialize(const DataTree& value, size_t indent) + YamlDataSerializer(const DataTree& tree_root) + : tree_root_(tree_root) + {} + + std::string serialize(size_t indent) { if (indent == 0) throw std::runtime_error("Indentation must be greater than zero for YAML serialization"); - std::ostringstream oss; - serialize_yaml(oss, value, indent); - return oss.str(); + serialize_yaml(tree_root_, indent); + return output_.str(); } private: - static void serialize_yaml( - std::ostringstream& oss, const DataTree& value, size_t indent, size_t current_indent = 0) + void serialize_yaml(const DataTree& value, size_t indent, size_t current_indent = 0) { if (value.is_object()) { - serialize_mapping(oss, value.as(), indent, current_indent); + serialize_mapping(value.as(), indent, current_indent); } else if (value.is_array()) { - serialize_sequence(oss, value.as(), indent, current_indent); + serialize_sequence(value.as(), indent, current_indent); } else { - serialize_scalar(oss, value); + serialize_scalar(value); } } - static void serialize_scalar( - std::ostringstream& oss, const DataTree& value) + void serialize_scalar(const DataTree& value) { if (value.is_null()) { - oss << "null"; + output_ << "null"; } else if (value.is_boolean()) { - oss << (value.as() ? "true" : "false"); + output_ << (value.as() ? "true" : "false"); } else if (value.is_int()) { - oss << value.as(); + output_ << value.as(); } else if (value.is_double()) { - oss << value.as(); + output_ << value.as(); } else if (value.is_string()) { @@ -466,36 +467,34 @@ struct YamlDataProcessorSerializer // Quote strings if they contain special characters if (str.empty() || str.find_first_of(":#{}[]&*!|>\"'%") != std::string::npos) { - oss << "\"" << str << "\""; + output_ << "\"" << str << "\""; } else { - oss << str; + output_ << str; } } } - static void serialize_sequence( - std::ostringstream& oss, const DataTree::Array& arr, size_t indent, size_t current_indent) + void serialize_sequence(const DataTree::Array& arr, size_t indent, size_t current_indent) { for (const auto& item : arr) { - oss << std::string(current_indent, ' ') << "- "; + output_ << std::string(current_indent, ' ') << "- "; if (item.is_object() || item.is_array()) { - oss << "\n"; - serialize_yaml(oss, item, indent, current_indent + indent); + output_ << "\n"; + serialize_yaml(item, indent, current_indent + indent); } else { - serialize_scalar(oss, item); - oss << "\n"; + serialize_scalar(item); + output_ << "\n"; } } } - static void serialize_mapping( - std::ostringstream& oss, const DataTree::Object& obj, size_t indent, size_t current_indent) + void serialize_mapping(const DataTree::Object& obj, size_t indent, size_t current_indent) { // Sort keys for consistent output std::vector keys; @@ -508,32 +507,41 @@ struct YamlDataProcessorSerializer const auto& key = keys[i]; const auto& val = obj.at(key); - oss << std::string(current_indent, ' ') << key << ":"; + output_ << std::string(current_indent, ' ') << key << ":"; if (val.is_object() || val.is_array()) { - oss << "\n"; - serialize_yaml(oss, val, indent, current_indent + indent); + output_ << "\n"; + serialize_yaml(val, indent, current_indent + indent); } else { - oss << " "; - serialize_scalar(oss, val); - oss << "\n"; + output_ << " "; + serialize_scalar(val); + output_ << "\n"; } } } + +private: + const DataTree& tree_root_; + std::ostringstream output_; }; +namespace gul17 { + DataTree from_yaml_string(const std::string_view& data) { - YamlDataProcessorParser parser(data); + YamlDataParser parser(data); return parser.parse(); } std::string to_yaml_string(const DataTree& value, size_t indent) { - return YamlDataProcessorSerializer::serialize(value, indent); + YamlDataSerializer serializer(value); + return serializer.serialize(indent); } +} // namespace gul17 + // vi:ts=4:sw=4:sts=4:et diff --git a/tests/data_processors/test_json_processor.cc b/tests/data_processors/test_json_processor.cc index 37cf46c..d0a9c7d 100644 --- a/tests/data_processors/test_json_processor.cc +++ b/tests/data_processors/test_json_processor.cc @@ -26,8 +26,6 @@ #include #include -#if 0 // Temporarily disable all tests to focus on test_func() - using gul17::DataTree; using gul17::from_json_string; using gul17::to_json_string; @@ -130,5 +128,3 @@ R"({ auto json_str = to_json_string(tree, 2); REQUIRE(json_str == expected_json); } - -#endif diff --git a/tests/data_processors/test_xml_processor.cc b/tests/data_processors/test_xml_processor.cc index 9b9e592..3d96480 100644 --- a/tests/data_processors/test_xml_processor.cc +++ b/tests/data_processors/test_xml_processor.cc @@ -26,8 +26,6 @@ #include #include -#if 0 // Temporarily disable all tests to focus on test_func() - using gul17::DataTree; using gul17::from_xml_string; using gul17::to_xml_string; @@ -220,5 +218,3 @@ R"( REQUIRE(tree["LIST"]["PERM"][0]["MASK"].as() == "mask"); // Second PERM omitted for brevity } - -#endif diff --git a/tests/data_processors/test_yaml_processor.cc b/tests/data_processors/test_yaml_processor.cc index 6d873a2..5619d0b 100644 --- a/tests/data_processors/test_yaml_processor.cc +++ b/tests/data_processors/test_yaml_processor.cc @@ -26,8 +26,6 @@ #include #include -#if 0 // Temporarily disable all tests to focus on test_func() - using gul17::DataTree; using gul17::from_yaml_string; using gul17::to_yaml_string; @@ -134,5 +132,3 @@ key5: null auto yaml_str = to_yaml_string(tree, 2); REQUIRE(yaml_str == expected_yaml); } - -#endif From fa313692e09367099a2dccb24c00f83ab02a16c4 Mon Sep 17 00:00:00 2001 From: zykure <54305315+zykure@users.noreply.github.com> Date: Wed, 26 Nov 2025 18:07:31 +0100 Subject: [PATCH 04/12] Properly parse XML text content --- src/data_processors/json_processor.cc | 14 ++-- src/data_processors/xml_processor.cc | 72 +++++++++++++++------ tests/data_processors/test_xml_processor.cc | 11 +++- 3 files changed, 68 insertions(+), 29 deletions(-) diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc index 3957bcc..30a5cd2 100644 --- a/src/data_processors/json_processor.cc +++ b/src/data_processors/json_processor.cc @@ -21,6 +21,7 @@ */ #include "gul17/data_processors.h" +#include "gul17/cat.h" #include #include @@ -58,7 +59,7 @@ struct JsonDataParser return parse_number(); default: - throw std::runtime_error("Unexpected character"); + throw std::runtime_error(gul17::cat("Unexpected character: ", c, " at position ", pos_)); } } @@ -203,7 +204,7 @@ struct JsonDataParser throw std::runtime_error("Unicode escape sequences not supported"); default: - throw std::runtime_error("Invalid escape sequence"); + throw std::runtime_error(gul17::cat("Invalid escape sequence: ", esc, " at position ", pos_)); } advance(); } @@ -231,7 +232,7 @@ struct JsonDataParser } else { - throw std::runtime_error("Invalid boolean value"); + throw std::runtime_error(gul17::cat("Invalid boolean value at position ", pos_)); } } @@ -244,7 +245,7 @@ struct JsonDataParser } else { - throw std::runtime_error("Invalid null value"); + throw std::runtime_error(gul17::cat("Invalid null value at position ", pos_)); } } @@ -317,7 +318,7 @@ struct JsonDataParser } else { - throw std::runtime_error("Invalid comment syntax"); + throw std::runtime_error(gul17::cat("Invalid comment syntax at position ", pos_)); } } } @@ -346,8 +347,7 @@ struct JsonDataParser { if (current_char() != expected) { - //fprintf(stderr, "Expected '%c' but found '%c' at position %d\n", expected, current_char(), pos_); - throw std::runtime_error("Expected character not found"); + throw std::runtime_error(gul17::cat("Expected character not found: ", expected, " at position ", pos_)); } advance(); } diff --git a/src/data_processors/xml_processor.cc b/src/data_processors/xml_processor.cc index 86c21c4..236ae82 100644 --- a/src/data_processors/xml_processor.cc +++ b/src/data_processors/xml_processor.cc @@ -25,6 +25,8 @@ #include #include +#include +#include using gul17::DataTree; @@ -48,21 +50,12 @@ struct XmlDataParser { // Parse content DataTree result; - skip_whitespace(); + std::vector text_content; + skip_whitespace(); expect('<'); - if (current_char() == '!') - { - // Skip comments or DOCTYPE - while (has_remaining_chars() && !(current_char() == '>' )) - { - advance(); - } - expect('>'); - skip_whitespace(); - return parse_xml_element(); - } + strip_comment(); // Parse tag name auto tag_name = std::string(parse_tag_name()); @@ -98,7 +91,6 @@ struct XmlDataParser // Parse children or text content ChildrenList children; - std::string text_content; if (current_char() == '/') { @@ -113,6 +105,7 @@ struct XmlDataParser // Check for nested elements vs text content while (has_remaining_chars() && !(current_char() == '<' && next_char() == '/')) { + strip_comment(); if (current_char() == '<') { // Nested element @@ -121,7 +114,7 @@ struct XmlDataParser else { // Text content - text_content += parse_text_content(); + text_content.push_back(parse_text_content()); } skip_whitespace(); } @@ -170,7 +163,7 @@ struct XmlDataParser auto key = "@" + attr_name; if (obj.find(key) != obj.end()) { - throw std::runtime_error("Duplicate attribute name: " + attr_name); + throw std::runtime_error(gul17::cat("Duplicate attribute name: ", attr_name)); } obj[key] = attr_value; } @@ -178,7 +171,21 @@ struct XmlDataParser // Add text content if any if (!text_content.empty()) { - obj["#text"] = DataTree(text_content); + if (text_content.size() == 1) + { + obj["#text"] = DataTree(std::string(text_content[0])); // Single text content + } + else + { + /// FIXME: Better to pass string_views directly to DataTree? + DataTree::Array text_array; + std::transform( + text_content.begin(), text_content.end(), + std::back_inserter(text_array), + [](const std::string_view& txt) { return DataTree(std::string(txt)); }); + + obj["#text"] = DataTree(text_array); // Multiple text contents as array + } } return std::make_pair(tag_name, DataTree(obj)); @@ -187,7 +194,14 @@ struct XmlDataParser { // Simple element with text content // Try to convert to appropriate type - return std::make_pair(tag_name, convert_string_to_value(text_content)); + if (text_content.size() == 1) + { + return std::make_pair(tag_name, convert_string_to_value(text_content[0])); + } + else + { + throw std::runtime_error(gul17::cat("Multiple text contents in simple element: ", tag_name)); + } } else { @@ -196,6 +210,27 @@ struct XmlDataParser } } + void strip_comment() + { + skip_whitespace(); + + if (current_char() == '<' && next_char() == '!' && + data_.compare(pos_, 4, "") != 0) + { + ++pos_; + } + if (data_.compare(pos_, 3, "-->") == 0) + { + pos_ += 3; // skip '-->' + } + skip_whitespace(); + } + } + std::string_view parse_attribute_name() { auto start_pos = pos_; @@ -316,8 +351,7 @@ struct XmlDataParser { if (current_char() != expected) { - //fprintf(stderr, "Expected '%c' but found '%c' at position %d\n", expected, current_char(), pos_); - throw std::runtime_error("Expected character not found"); + throw std::runtime_error(gul17::cat("Expected character not found: ", expected, " at position ", pos_)); } advance(); } diff --git a/tests/data_processors/test_xml_processor.cc b/tests/data_processors/test_xml_processor.cc index 3d96480..fcc56e6 100644 --- a/tests/data_processors/test_xml_processor.cc +++ b/tests/data_processors/test_xml_processor.cc @@ -71,10 +71,12 @@ TEST_CASE("XmlDataProcessor: XML parsing with attributes and comments", "[XmlDat { auto tree = from_xml_string( R"( + TEXT CONTENT 1 + TEXT CONTENT 2 value1 - TEXT CONTENT + TEXT CONTENT 3 )"); REQUIRE(tree["key1"].is_object()); @@ -91,8 +93,11 @@ R"( REQUIRE(tree["key2"]["@attr2"].as() == "k2a2"); REQUIRE(tree["key2"]["@attr3"].is_null()); - REQUIRE(tree["#text"].is_string()); - REQUIRE(tree["#text"].as() == "TEXT CONTENT"); + REQUIRE(tree["#text"].is_array()); + REQUIRE(tree["#text"].size() == 3); + REQUIRE(tree["#text"][0].as() == "TEXT CONTENT 1"); + REQUIRE(tree["#text"][1].as() == "TEXT CONTENT 2"); + REQUIRE(tree["#text"][2].as() == "TEXT CONTENT 3"); } TEST_CASE("XmlDataProcessor: XML parsing with escape sequences", "[XmlDataProcessor]") From 13f0369ec1e8740bb6b41ac34f5ff72f3bca0a4e Mon Sep 17 00:00:00 2001 From: J B <54305315+zykure@users.noreply.github.com> Date: Thu, 27 Nov 2025 15:30:00 +0100 Subject: [PATCH 05/12] Fix copyright notice in data_processors.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- include/gul17/data_processors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gul17/data_processors.h b/include/gul17/data_processors.h index 585fe28..3673422 100644 --- a/include/gul17/data_processors.h +++ b/include/gul17/data_processors.h @@ -17,7 +17,7 @@ * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see */ #ifndef GUL17_DATA_PROCESSORS_H_ From e472091dc0bfa3defb414aa061afec7058f1ae8a Mon Sep 17 00:00:00 2001 From: J B <54305315+zykure@users.noreply.github.com> Date: Thu, 27 Nov 2025 15:34:51 +0100 Subject: [PATCH 06/12] [Copilot] Fix JSON parser issues Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/data_processors/json_processor.cc | 4 +--- tests/data_processors/test_json_processor.cc | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc index 30a5cd2..4119a32 100644 --- a/src/data_processors/json_processor.cc +++ b/src/data_processors/json_processor.cc @@ -152,13 +152,11 @@ struct JsonDataParser case '"': result += '"'; break; case '\\': result += '\\'; break; case '/': result += '/'; break; - case 'a': result += '\a'; break; case 'b': result += '\b'; break; case 'f': result += '\f'; break; case 'n': result += '\n'; break; case 'r': result += '\r'; break; case 't': result += '\t'; break; - case 'v': result += '\v'; break; case 'u': // Unicode escape sequence (e.g., \uXXXX) @@ -166,7 +164,7 @@ struct JsonDataParser { auto num = data_.substr(pos_ + 1, 4); try { - unsigned ch = std::stoi(std::string(num), nullptr); + unsigned ch = std::stoi(std::string(num), nullptr, 16); if (ch < 0x80) { result += static_cast(ch); diff --git a/tests/data_processors/test_json_processor.cc b/tests/data_processors/test_json_processor.cc index d0a9c7d..017f9b0 100644 --- a/tests/data_processors/test_json_processor.cc +++ b/tests/data_processors/test_json_processor.cc @@ -89,7 +89,7 @@ R"({ REQUIRE(tree["key2"].as() == "\"value\\2\""); REQUIRE(tree["key3"].is_string()); - REQUIRE(tree["key3"].as() == " \""); + REQUIRE(tree["key3"].as() == "24"); } TEST_CASE("JsonDataProcessor: JSON parsing with errors", "[JsonDataProcessor]") From 56a7fe223d505516da8eb9bbecef01acd54f609f Mon Sep 17 00:00:00 2001 From: J B <54305315+zykure@users.noreply.github.com> Date: Thu, 27 Nov 2025 15:35:43 +0100 Subject: [PATCH 07/12] [Copilot] Fix YAML parser issues Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/data_processors/yaml_processor.cc | 4 ++-- tests/data_processors/test_yaml_processor.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/data_processors/yaml_processor.cc b/src/data_processors/yaml_processor.cc index c07584f..7eda0d8 100644 --- a/src/data_processors/yaml_processor.cc +++ b/src/data_processors/yaml_processor.cc @@ -354,9 +354,9 @@ struct YamlDataParser // Unicode escape sequence (e.g., \uXXXX) if (i + 5 < str.length()) { - auto num = str.substr(i + 4, 4); + auto num = str.substr(i + 2, 4); try { - auto ch = std::stoi(std::string(num), nullptr); + auto ch = std::stoi(std::string(num), nullptr, 16); if (ch < 0x80) { result += static_cast(ch); diff --git a/tests/data_processors/test_yaml_processor.cc b/tests/data_processors/test_yaml_processor.cc index 5619d0b..19cd098 100644 --- a/tests/data_processors/test_yaml_processor.cc +++ b/tests/data_processors/test_yaml_processor.cc @@ -99,7 +99,7 @@ key3: "\u0032\u0034" REQUIRE(tree["key2"].as() == "\"value\\2\""); REQUIRE(tree["key3"].is_string()); - REQUIRE(tree["key3"].as() == " \""); + REQUIRE(tree["key3"].as() == "24"); } TEST_CASE("YamlDataProcessor: YAML parsing with errors", "[YamlDataProcessor]") From b59ef66dc27d134e50082ebdcec7bf55dfaf44ef Mon Sep 17 00:00:00 2001 From: Jan Behrens <54305315+zykure@users.noreply.github.com> Date: Thu, 27 Nov 2025 15:43:58 +0100 Subject: [PATCH 08/12] Fix more issues from Copilot review --- src/data_processors/json_processor.cc | 2 +- src/data_processors/xml_processor.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc index 4119a32..2169aa7 100644 --- a/src/data_processors/json_processor.cc +++ b/src/data_processors/json_processor.cc @@ -164,7 +164,7 @@ struct JsonDataParser { auto num = data_.substr(pos_ + 1, 4); try { - unsigned ch = std::stoi(std::string(num), nullptr, 16); + auto ch = std::stoi(std::string(num), nullptr, 16); if (ch < 0x80) { result += static_cast(ch); diff --git a/src/data_processors/xml_processor.cc b/src/data_processors/xml_processor.cc index 236ae82..c987f02 100644 --- a/src/data_processors/xml_processor.cc +++ b/src/data_processors/xml_processor.cc @@ -520,7 +520,7 @@ struct XmlDataSerializer serialize_value(val, key, indent, current_indent + indent); } - auto it = obj.find("#text");; + auto it = obj.find("#text"); if (it != obj.end() && it->second.is_string()) { // Text content From 27fde8c6d815c7bdd173fccf5c1b8f1492104637 Mon Sep 17 00:00:00 2001 From: J B <54305315+zykure@users.noreply.github.com> Date: Thu, 27 Nov 2025 16:32:57 +0100 Subject: [PATCH 09/12] [Copilot] Minor changes from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- include/gul17/DataTree.h | 4 ++-- include/gul17/data_processors.h | 2 +- src/data_processors/json_processor.cc | 4 +--- src/data_processors/yaml_processor.cc | 8 ++++---- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/gul17/DataTree.h b/include/gul17/DataTree.h index f65c646..1e1cad7 100644 --- a/include/gul17/DataTree.h +++ b/include/gul17/DataTree.h @@ -53,8 +53,8 @@ namespace gul17 { * tree["foo"] = "bar"; // String * tree["answer"] = 42; // Number * tree["is_valid"] = true; // Boolean - * tree["items"] = DataTree::array{1, 2, 3}; // Array - * tree["config"] = DataTree::object{ {"key1", "value1"}, {"key2", 42} }; // Object + * tree["items"] = DataTree::Array{1, 2, 3}; // Array + * tree["config"] = DataTree::Object{ {"key1", "value1"}, {"key2", 42} }; // Object * * tree["items"].push_back(4); // Add an element to the array * tree["config"]["key3"] = 3.14; // Add a key-value pair to the object diff --git a/include/gul17/data_processors.h b/include/gul17/data_processors.h index 3673422..c507d54 100644 --- a/include/gul17/data_processors.h +++ b/include/gul17/data_processors.h @@ -17,7 +17,7 @@ * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see + * along with this program. If not, see . */ #ifndef GUL17_DATA_PROCESSORS_H_ diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc index 2169aa7..efce0a4 100644 --- a/src/data_processors/json_processor.cc +++ b/src/data_processors/json_processor.cc @@ -328,7 +328,7 @@ struct JsonDataParser char next_char() const { - return pos_ + 1 < data_.size() ? data_[pos_+1] : '\0'; + return pos_ + 1 < data_.size() ? data_[pos_ + 1] : '\0'; } bool has_remaining_chars() const @@ -460,13 +460,11 @@ struct JsonDataSerializer { case '"': result += "\\\""; break; case '\\': result += "\\\\"; break; - case '\a': result += "\\a"; break; case '\b': result += "\\b"; break; case '\f': result += "\\f"; break; case '\n': result += "\\n"; break; case '\r': result += "\\r"; break; case '\t': result += "\\t"; break; - case '\v': result += "\\v"; break; default: // escape control characters diff --git a/src/data_processors/yaml_processor.cc b/src/data_processors/yaml_processor.cc index 7eda0d8..4c2d92f 100644 --- a/src/data_processors/yaml_processor.cc +++ b/src/data_processors/yaml_processor.cc @@ -329,10 +329,10 @@ struct YamlDataParser case ' ': result += ' '; break; // YAML-specific escapes - case '_': result += "\xC2\xA0"; break; // U+00A0 - case 'N': result += "\xC2\x85"; break; // U+0085 - case 'L': result += "\xE2\x80\xA8"; break; // U+2028 - case 'P': result += "\xE2\x80\xA9"; break; // U+2029 + case '_': result += u8"\u00A0"; break; // U+00A0 (non-breaking space) + case 'N': result += u8"\u0085"; break; // U+0085 (next line) + case 'L': result += u8"\u2028"; break; // U+2028 (line separator) + case 'P': result += u8"\u2029"; break; // U+2029 (paragraph separator) // Hexcode and Unicode escapes case 'x': From a9477a29df8cdc8cd9c2ed514ec188885cf92feb Mon Sep 17 00:00:00 2001 From: zykure <54305315+zykure@users.noreply.github.com> Date: Thu, 27 Nov 2025 17:39:38 +0100 Subject: [PATCH 10/12] Fix more issues from Copilot review --- include/gul17/DataTree.h | 11 +- include/gul17/data_processors.h | 4 +- src/data_processors/json_processor.cc | 98 ++++++++++-------- src/data_processors/xml_processor.cc | 12 ++- src/data_processors/yaml_processor.cc | 139 ++++++++++++++++++-------- 5 files changed, 170 insertions(+), 94 deletions(-) diff --git a/include/gul17/DataTree.h b/include/gul17/DataTree.h index 1e1cad7..d84f2d5 100644 --- a/include/gul17/DataTree.h +++ b/include/gul17/DataTree.h @@ -351,8 +351,8 @@ class DataTree } // Iterator return types, only works for arrays - using iterator = DataTree*; - using const_iterator = const DataTree*; + using iterator = std::vector::iterator; + using const_iterator = const std::vector::iterator; // Iterators @@ -366,7 +366,7 @@ class DataTree if (!is_array()) throw std::runtime_error("DataTree is not an array"); auto& arr = std::get(value_); - return arr.data(); + return arr.begin(); } /** @@ -379,7 +379,7 @@ class DataTree if (!is_array()) throw std::runtime_error("DataTree is not an array"); auto& arr = std::get(value_); - return arr.data() + arr.size(); + return arr.end(); } /** @@ -585,6 +585,8 @@ class DataTree * - int to double * - boolean to int * - int/double/boolean/null to string + * + * It is not possible to convert complex types (array/object) to primitive types. */ template T as() const @@ -615,7 +617,6 @@ class DataTree if (is_double()) return std::to_string(std::get(value_)); if (is_boolean()) return std::get(value_) ? "true" : "false"; if (is_null()) return "null"; - // TODO: Add conversion logic for other types to string if needed } else if constexpr (std::is_same_v) { diff --git a/include/gul17/data_processors.h b/include/gul17/data_processors.h index c507d54..63268c7 100644 --- a/include/gul17/data_processors.h +++ b/include/gul17/data_processors.h @@ -100,7 +100,7 @@ DataTree from_xml_string(const std::string_view& data); /** * Serialize a DataTree object to an XML string. * - * The function serializes the given DataTree object into a XML-formatted string. + * The function serializes the given DataTree object into an XML-formatted string. * The optional \c indent parameter specifies the number of spaces to use for * indentation in the output string (default is 0, meaning no pretty-printing). * @@ -119,7 +119,7 @@ std::string to_xml_string(const DataTree& value, size_t indent = 0, const std::string& root_tag_name = "root"); /** - * Parse an YAML string and return the corresponding DataTree representation. + * Parse a YAML string and return the corresponding DataTree representation. * * The function parses the input YAML string and constructs a DataTree object * representing the hierarchical structure and data contained in the YAML. diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc index efce0a4..47f54d9 100644 --- a/src/data_processors/json_processor.cc +++ b/src/data_processors/json_processor.cc @@ -143,8 +143,6 @@ struct JsonDataParser } else if (c == '\\') { - // TODO - Implement full JSON string unescaping - advance(); char esc = current_char(); switch (esc) @@ -163,44 +161,42 @@ struct JsonDataParser if (pos_ + 5 < data_.length()) { auto num = data_.substr(pos_ + 1, 4); - try { - auto ch = std::stoi(std::string(num), nullptr, 16); - if (ch < 0x80) - { - result += static_cast(ch); - } - else if (ch < 0x800) - { - result += static_cast(0xC0 | (ch >> 6)); - result += static_cast(0x80 | (ch & 0x3F)); - } - else if (ch < 0x10000) - { - result += static_cast(0xE0 | (ch >> 12)); - result += static_cast(0x80 | ((ch >> 6) & 0x3F)); - result += static_cast(0x80 | (ch & 0x3F)); - } - else - { - result += static_cast(0xF0 | (ch >> 18)); - result += static_cast(0x80 | ((ch >> 12) & 0x3F)); - result += static_cast(0x80 | ((ch >> 6) & 0x3F)); - result += static_cast(0x80 | (ch & 0x3F)); - } - pos_ += 4; + unsigned int ch; + try + { + ch = std::stoi(std::string(num), nullptr, 16); + } + catch (...) + { + throw std::runtime_error(gul17::cat("Invalid number format in Unicode escape at position ", pos_)); + } + + if (ch < 0x80) + { + result += static_cast(ch); + } + else if (ch < 0x800) + { + result += static_cast(0xC0 | (ch >> 6)); + result += static_cast(0x80 | (ch & 0x3F)); } - catch (...) { - result += data_[pos_ + 1]; // Invalid number, treat as literal - pos_ += 1; + else if (ch < 0x10000) + { + result += static_cast(0xE0 | (ch >> 12)); + result += static_cast(0x80 | ((ch >> 6) & 0x3F)); + result += static_cast(0x80 | (ch & 0x3F)); } + else + { + // Note: JSON \uXXXX escapes only support BMP (<= 0xFFFF). + throw std::runtime_error(gul17::cat("Invalid Unicode code point at position ", pos_)); + } + pos_ += 4; } break; case 'U': - // Unicode escape sequence (e.g., \UXXXXXXXX) - // FIXME - Unicode escape sequences not implemented yet - throw std::runtime_error("Unicode escape sequences not supported"); - + // TODO - Unicode escape sequence (e.g., \UXXXXXXXX) not implemented yet default: throw std::runtime_error(gul17::cat("Invalid escape sequence: ", esc, " at position ", pos_)); } @@ -267,13 +263,27 @@ struct JsonDataParser { advance(); } - double value = std::stod(std::string(data_.substr(start_pos, pos_ - start_pos))); - return DataTree(value); + try + { + double value = std::stod(std::string(data_.substr(start_pos, pos_ - start_pos))); + return DataTree(value); + } + catch (...) + { + throw std::runtime_error(gul17::cat("Invalid number format at position ", start_pos)); + } } else { - int value = std::stoi(std::string(data_.substr(start_pos, pos_ - start_pos))); - return DataTree(value); + try + { + int value = std::stoi(std::string(data_.substr(start_pos, pos_ - start_pos))); + return DataTree(value); + } + catch (...) + { + throw std::runtime_error(gul17::cat("Invalid number format at position ", start_pos)); + } } } @@ -402,10 +412,12 @@ struct JsonDataSerializer void serialize_array(const DataTree::Array& arr, size_t indent, size_t current_indent) { + std::string newline = indent > 0 ? "\n" : ""; // Add newlines if indenting + output_ << "["; if (!arr.empty()) { - output_ << "\n"; + output_ << newline; for (size_t i = 0; i < arr.size(); ++i) { output_ << std::string(current_indent + indent, ' '); @@ -413,7 +425,7 @@ struct JsonDataSerializer if (i < arr.size() - 1) output_ << ","; - output_ << "\n"; + output_ << newline; } output_ << std::string(current_indent, ' '); } @@ -422,6 +434,8 @@ struct JsonDataSerializer void serialize_object(const DataTree::Object& obj, size_t indent, size_t current_indent) { + std::string newline = indent > 0 ? "\n" : ""; // Add newlines if indenting + output_ << "{"; if (!obj.empty()) { @@ -431,7 +445,7 @@ struct JsonDataSerializer [](const auto& pair) { return pair.first; }); std::sort(keys.begin(), keys.end()); - output_ << "\n"; + output_ << newline; for (size_t i = 0; i < keys.size(); ++i) { const auto& key = keys[i]; @@ -443,7 +457,7 @@ struct JsonDataSerializer if (i < keys.size() - 1) output_ << ","; - output_ << "\n"; + output_ << newline; } output_ << std::string(current_indent, ' '); } diff --git a/src/data_processors/xml_processor.cc b/src/data_processors/xml_processor.cc index c987f02..a8f08f3 100644 --- a/src/data_processors/xml_processor.cc +++ b/src/data_processors/xml_processor.cc @@ -71,6 +71,10 @@ struct XmlDataParser // Parse attribute name auto attr_name = parse_attribute_name(); + if (attr_name.empty()) + { + throw std::runtime_error(gul17::cat("Malformed XML: attribute name cannot be empty at position ", pos_)); + } skip_whitespace(); expect('='); @@ -127,7 +131,7 @@ struct XmlDataParser if (closing_tag != tag_name) { - throw std::runtime_error(gul17::cat("Mismatched tags: ", tag_name, " vs ", closing_tag)); + throw std::runtime_error(gul17::cat("Mismatched tags: ", tag_name, " vs ", closing_tag, " at position ", pos_)); } } @@ -163,7 +167,7 @@ struct XmlDataParser auto key = "@" + attr_name; if (obj.find(key) != obj.end()) { - throw std::runtime_error(gul17::cat("Duplicate attribute name: ", attr_name)); + throw std::runtime_error(gul17::cat("Duplicate attribute name: ", attr_name, " at position ", pos_)); } obj[key] = attr_value; } @@ -200,7 +204,7 @@ struct XmlDataParser } else { - throw std::runtime_error(gul17::cat("Multiple text contents in simple element: ", tag_name)); + throw std::runtime_error(gul17::cat("Multiple text contents in simple element at position ", pos_)); } } else @@ -247,7 +251,7 @@ struct XmlDataParser { char quote_char = current_char(); if (quote_char != '"' && quote_char != '\'') - throw std::runtime_error("Expected quote for attribute value"); + throw std::runtime_error(gul17::cat("Expected quote for attribute value at position ", pos_)); advance(); // skip opening quote diff --git a/src/data_processors/yaml_processor.cc b/src/data_processors/yaml_processor.cc index 4c2d92f..7b68187 100644 --- a/src/data_processors/yaml_processor.cc +++ b/src/data_processors/yaml_processor.cc @@ -21,6 +21,7 @@ */ #include "gul17/data_processors.h" +#include "gul17/cat.h" #include "gul17/join_split.h" #include @@ -110,6 +111,14 @@ struct YamlDataParser { // Remove the sequence marker and parse the value auto item_content = trim(content.substr(1)); // Remove '-' + if (!item_content.empty()) + { + // Simple scalar on same line + sequence.push_back(parse_scalar(item_content)); + current_line_++; + continue; + } + current_line_++; // Check if this is a complex item (object or nested sequence) @@ -268,6 +277,8 @@ struct YamlDataParser size_t i = 0; while (i < line.length() && (line[i] == ' ' || line[i] == '\t')) { + if (line[i] == '\t') + throw std::runtime_error(gul17::cat("Tabs are not allowed for indentation in YAML at line ", current_line_ + 1)); i++; } @@ -276,9 +287,26 @@ struct YamlDataParser std::string_view strip_comment(const std::string_view& line) { - auto comment_pos = line.find('#'); - if (comment_pos != std::string::npos) - return line.substr(0, comment_pos); + bool in_single_quote = false; + bool in_double_quote = false; + for (size_t i = 0; i < line.size(); ++i) + { + char c = line[i]; + if (c == '\'' && !in_double_quote) + { + in_single_quote = !in_single_quote; + } + else if (c == '"' && !in_single_quote) + { + // Only toggle if not escaped + if (i == 0 || line[i-1] != '\\') + in_double_quote = !in_double_quote; + } + else if (c == '#' && !in_single_quote && !in_double_quote) + { + return line.substr(0, i); + } + } return line; } @@ -301,10 +329,29 @@ struct YamlDataParser bool is_mapping_item(const std::string_view& line) { - return line.find(':') != std::string::npos; + // Check for a colon outside of single or double quotes + bool in_single_quote = false; + bool in_double_quote = false; + for (size_t i = 0; i < line.length(); ++i) + { + char c = line[i]; + if (c == '\'' && !in_double_quote) + { + in_single_quote = !in_single_quote; + } + else if (c == '"' && !in_single_quote) + { + in_double_quote = !in_double_quote; + } + else if (c == ':' && !in_single_quote && !in_double_quote) + { + return true; + } + } + return false; } - static std::string unescape_yaml_string(const std::string_view& str) + std::string unescape_yaml_string(const std::string_view& str) { // TODO - Implement full YAML string unescaping @@ -313,7 +360,8 @@ struct YamlDataParser { if (str[i] == '\\' && i + 1 < str.length()) { - switch (str[i + 1]) + auto esc = str[i + 1]; + switch (esc) { case '"': result += '\"'; break; case '\'': result += '\''; break; @@ -339,12 +387,14 @@ struct YamlDataParser if (i + 3 < str.length()) { auto hex = str.substr(i + 2, 2); - try { + try + { auto ch = std::stoi(std::string(hex), nullptr, 16); result += static_cast(ch); i += 2; } - catch (...) { + catch (...) + { result += str[i + 1]; // Invalid hex, treat as literal } } @@ -355,45 +405,44 @@ struct YamlDataParser if (i + 5 < str.length()) { auto num = str.substr(i + 2, 4); - try { - auto ch = std::stoi(std::string(num), nullptr, 16); - if (ch < 0x80) - { - result += static_cast(ch); - } - else if (ch < 0x800) - { - result += static_cast(0xC0 | (ch >> 6)); - result += static_cast(0x80 | (ch & 0x3F)); - } - else if (ch < 0x10000) - { - result += static_cast(0xE0 | (ch >> 12)); - result += static_cast(0x80 | ((ch >> 6) & 0x3F)); - result += static_cast(0x80 | (ch & 0x3F)); - } - else - { - result += static_cast(0xF0 | (ch >> 18)); - result += static_cast(0x80 | ((ch >> 12) & 0x3F)); - result += static_cast(0x80 | ((ch >> 6) & 0x3F)); - result += static_cast(0x80 | (ch & 0x3F)); - } - i += 4; + unsigned int ch; + try + { + ch = std::stoi(std::string(num), nullptr, 16); + } + catch (...) + { + throw std::runtime_error(gul17::cat("Invalid number format in Unicode escape at line ", current_line_)); } - catch (...) { - result += str[i + 1]; // Invalid number, treat as literal + + if (ch < 0x80) + { + result += static_cast(ch); } + else if (ch < 0x800) + { + result += static_cast(0xC0 | (ch >> 6)); + result += static_cast(0x80 | (ch & 0x3F)); + } + else if (ch < 0x10000) + { + result += static_cast(0xE0 | (ch >> 12)); + result += static_cast(0x80 | ((ch >> 6) & 0x3F)); + result += static_cast(0x80 | (ch & 0x3F)); + } + else + { + // Note: JSON \uXXXX escapes only support BMP (<= 0xFFFF). + throw std::runtime_error(gul17::cat("Invalid Unicode code point at line ", current_line_)); + } + i += 4; } break; case 'U': - // FIXME - Unicode escape sequences not implemented yet - throw std::runtime_error("Unicode escape sequences not supported"); - + // TODO - Unicode escape sequence (e.g., \UXXXXXXXX) not implemented yet default: - // Unknown escape - treat as literal character - result += str[i + 1]; + throw std::runtime_error(gul17::cat("Invalid escape sequence: ", esc, " at line ", current_line_)); } ++i; // Skip next character after escape `\` } @@ -467,7 +516,15 @@ struct YamlDataSerializer // Quote strings if they contain special characters if (str.empty() || str.find_first_of(":#{}[]&*!|>\"'%") != std::string::npos) { - output_ << "\"" << str << "\""; + output_ << "\""; + for (char c : str) + { + if (c == '"') + output_ << "\\\""; + else + output_ << c; + } + output_ << "\""; } else { From 7f29d064b7afc86d61189544942e79f7e31e7a81 Mon Sep 17 00:00:00 2001 From: zykure <54305315+zykure@users.noreply.github.com> Date: Thu, 27 Nov 2025 17:44:44 +0100 Subject: [PATCH 11/12] Use ordered map in DataTree to avoid extra sorting --- include/gul17/DataTree.h | 4 ++-- src/data_processors/json_processor.cc | 14 +++----------- src/data_processors/xml_processor.cc | 18 +++--------------- src/data_processors/yaml_processor.cc | 11 +---------- 4 files changed, 9 insertions(+), 38 deletions(-) diff --git a/include/gul17/DataTree.h b/include/gul17/DataTree.h index d84f2d5..bb9a27b 100644 --- a/include/gul17/DataTree.h +++ b/include/gul17/DataTree.h @@ -23,9 +23,9 @@ #ifndef GUL17_DATA_TREE_H_ #define GUL17_DATA_TREE_H_ +#include #include #include -#include #include #include @@ -68,7 +68,7 @@ class DataTree // Type definitions /// Type of an object (key-value pairs) - using Object = std::unordered_map; + using Object = std::map; /// Type of an array (list of values) using Array = std::vector; /// Underlying variant type to hold different data types diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc index 47f54d9..b9e8249 100644 --- a/src/data_processors/json_processor.cc +++ b/src/data_processors/json_processor.cc @@ -439,23 +439,15 @@ struct JsonDataSerializer output_ << "{"; if (!obj.empty()) { - // Sort keys for consistent output - std::vector keys; - std::transform(obj.begin(), obj.end(), std::back_inserter(keys), - [](const auto& pair) { return pair.first; }); - std::sort(keys.begin(), keys.end()); - output_ << newline; - for (size_t i = 0; i < keys.size(); ++i) + size_t i = 0; + for (const auto & [key, val] : obj) { - const auto& key = keys[i]; - const auto& val = obj.at(key); - output_ << std::string(current_indent + indent, ' '); output_ << "\"" << escape_string(key) << "\": "; serialize_value(val, indent, current_indent + indent); - if (i < keys.size() - 1) + if (i++ < obj.size() - 1) output_ << ","; output_ << newline; } diff --git a/src/data_processors/xml_processor.cc b/src/data_processors/xml_processor.cc index a8f08f3..ef58ee1 100644 --- a/src/data_processors/xml_processor.cc +++ b/src/data_processors/xml_processor.cc @@ -139,7 +139,7 @@ struct XmlDataParser if (!attributes.empty() || !children.empty()) { // Handle arrays for multiple same-tag children / attributes - std::unordered_map obj; + std::map obj; std::unordered_map> child_groups; for (const auto& [child_tag, child_value] : children) @@ -480,19 +480,10 @@ struct XmlDataSerializer { const auto& obj = value.as(); - // Sort keys for consistent output - std::vector keys; - std::transform(obj.begin(), obj.end(), std::back_inserter(keys), - [](const auto& pair) { return pair.first; }); - std::sort(keys.begin(), keys.end()); - // Opening tag with attributes output_ << indent_str << opening_tag; - for (size_t i = 0; i < keys.size(); ++i) + for (const auto & [key, val] : obj) { - const auto& key = keys[i]; - const auto& val = obj.at(key); - if (key.rfind("@", 0) == 0) { // Attribute @@ -512,11 +503,8 @@ struct XmlDataSerializer output_ << newline; // Child elements and text content - for (size_t i = 0; i < keys.size(); ++i) + for (const auto & [key, val] : obj) { - const auto& key = keys[i]; - const auto& val = obj.at(key); - // Skip already handled attributes and text content handled later if (key.rfind("@", 0) == 0 || key == "#text") continue; diff --git a/src/data_processors/yaml_processor.cc b/src/data_processors/yaml_processor.cc index 7b68187..a4330b0 100644 --- a/src/data_processors/yaml_processor.cc +++ b/src/data_processors/yaml_processor.cc @@ -553,17 +553,8 @@ struct YamlDataSerializer void serialize_mapping(const DataTree::Object& obj, size_t indent, size_t current_indent) { - // Sort keys for consistent output - std::vector keys; - std::transform(obj.begin(), obj.end(), std::back_inserter(keys), - [](const auto& pair) { return pair.first; }); - std::sort(keys.begin(), keys.end()); - - for (size_t i = 0; i < keys.size(); ++i) + for (const auto & [key, val] : obj) { - const auto& key = keys[i]; - const auto& val = obj.at(key); - output_ << std::string(current_indent, ' ') << key << ":"; if (val.is_object() || val.is_array()) From 758341462dd3ce7a91c1f034f85ab459b1f130c3 Mon Sep 17 00:00:00 2001 From: zykure <54305315+zykure@users.noreply.github.com> Date: Thu, 27 Nov 2025 18:01:46 +0100 Subject: [PATCH 12/12] Fix more issues from Copilot review --- src/data_processors/json_processor.cc | 6 +++--- src/data_processors/xml_processor.cc | 2 +- src/data_processors/yaml_processor.cc | 8 +++----- tests/data_processors/test_json_processor.cc | 14 +++++++------- tests/data_processors/test_xml_processor.cc | 18 +++++++++--------- tests/data_processors/test_yaml_processor.cc | 14 +++++++------- 6 files changed, 30 insertions(+), 32 deletions(-) diff --git a/src/data_processors/json_processor.cc b/src/data_processors/json_processor.cc index b9e8249..fb02b1b 100644 --- a/src/data_processors/json_processor.cc +++ b/src/data_processors/json_processor.cc @@ -158,7 +158,7 @@ struct JsonDataParser case 'u': // Unicode escape sequence (e.g., \uXXXX) - if (pos_ + 5 < data_.length()) + if (pos_ + 5 <= data_.length()) { auto num = data_.substr(pos_ + 1, 4); unsigned int ch; @@ -196,7 +196,7 @@ struct JsonDataParser break; case 'U': - // TODO - Unicode escape sequence (e.g., \UXXXXXXXX) not implemented yet + throw std::runtime_error(gul17::cat("Unicode escape sequence (\\UXXXXXXXX) not supported at position ", pos_)); default: throw std::runtime_error(gul17::cat("Invalid escape sequence: ", esc, " at position ", pos_)); } @@ -476,7 +476,7 @@ struct JsonDataSerializer // escape control characters if (static_cast(c) < 0x20) { - char buf[7]; + char buf[12]; snprintf(buf, sizeof(buf), "\\u%04x", static_cast(c)); result += buf; } diff --git a/src/data_processors/xml_processor.cc b/src/data_processors/xml_processor.cc index ef58ee1..9f006bd 100644 --- a/src/data_processors/xml_processor.cc +++ b/src/data_processors/xml_processor.cc @@ -181,7 +181,7 @@ struct XmlDataParser } else { - /// FIXME: Better to pass string_views directly to DataTree? + /// TODO: Better to pass string_views directly to DataTree? DataTree::Array text_array; std::transform( text_content.begin(), text_content.end(), diff --git a/src/data_processors/yaml_processor.cc b/src/data_processors/yaml_processor.cc index a4330b0..689bee4 100644 --- a/src/data_processors/yaml_processor.cc +++ b/src/data_processors/yaml_processor.cc @@ -353,8 +353,6 @@ struct YamlDataParser std::string unescape_yaml_string(const std::string_view& str) { - // TODO - Implement full YAML string unescaping - std::string result; for (size_t i = 0; i < str.length(); ++i) { @@ -384,7 +382,7 @@ struct YamlDataParser // Hexcode and Unicode escapes case 'x': - if (i + 3 < str.length()) + if (i + 3 <= str.length()) { auto hex = str.substr(i + 2, 2); try @@ -402,7 +400,7 @@ struct YamlDataParser case 'u': // Unicode escape sequence (e.g., \uXXXX) - if (i + 5 < str.length()) + if (i + 5 <= str.length()) { auto num = str.substr(i + 2, 4); unsigned int ch; @@ -440,7 +438,7 @@ struct YamlDataParser break; case 'U': - // TODO - Unicode escape sequence (e.g., \UXXXXXXXX) not implemented yet + throw std::runtime_error(gul17::cat("Unicode escape sequence (\\UXXXXXXXX) not supported at line ", current_line_)); default: throw std::runtime_error(gul17::cat("Invalid escape sequence: ", esc, " at line ", current_line_)); } diff --git a/tests/data_processors/test_json_processor.cc b/tests/data_processors/test_json_processor.cc index 017f9b0..8ef80e2 100644 --- a/tests/data_processors/test_json_processor.cc +++ b/tests/data_processors/test_json_processor.cc @@ -1,8 +1,8 @@ /** - * \file test_JsonDataProcessor.cc + * \file test_json_processor.cc * \author Jan Behrens * \date Created on November 19, 2025 - * \brief Test suite for the JsonDataProcessor class. + * \brief Test suite for the JsonProcessor class. * * \copyright Copyright 2019-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg * @@ -30,7 +30,7 @@ using gul17::DataTree; using gul17::from_json_string; using gul17::to_json_string; -TEST_CASE("JsonDataProcessor: JSON parsing", "[JsonDataProcessor]") +TEST_CASE("JsonProcessor: JSON parsing", "[JsonProcessor]") { auto tree = from_json_string( R"({"key1": "value1", "key2": 42, "key3": [1, 2, 3], "key4": {"nestedKey": 3.1415}, "key5": null})"); @@ -57,7 +57,7 @@ TEST_CASE("JsonDataProcessor: JSON parsing", "[JsonDataProcessor]") REQUIRE(tree["invalid"].is_empty()); } -TEST_CASE("JsonDataProcessor: JSON parsing with comments", "[JsonDataProcessor]") +TEST_CASE("JsonProcessor: JSON parsing with comments", "[JsonProcessor]") { auto tree = from_json_string( R"({ @@ -73,7 +73,7 @@ R"({ REQUIRE(tree["key2"].as() == 42); } -TEST_CASE("JsonDataProcessor: JSON parsing with escape sequences", "[JsonDataProcessor]") +TEST_CASE("JsonProcessor: JSON parsing with escape sequences", "[JsonProcessor]") { auto tree = from_json_string( R"({ @@ -92,7 +92,7 @@ R"({ REQUIRE(tree["key3"].as() == "24"); } -TEST_CASE("JsonDataProcessor: JSON parsing with errors", "[JsonDataProcessor]") +TEST_CASE("JsonProcessor: JSON parsing with errors", "[JsonProcessor]") { REQUIRE_THROWS(from_json_string(R"({"key1": "value1", "key2": 42, )")); // Trailing comma REQUIRE_THROWS(from_json_string(R"({"key1": "value1" "key2": 42})")); // Missing comma @@ -100,7 +100,7 @@ TEST_CASE("JsonDataProcessor: JSON parsing with errors", "[JsonDataProcessor]") REQUIRE_THROWS(from_json_string(R"({"key1": "value1", "key2": 42)")); // Missing closing brace } -TEST_CASE("JsonDataProcessor: JSON serialization", "[JsonDataProcessor]") +TEST_CASE("JsonProcessor: JSON serialization", "[JsonProcessor]") { auto tree = DataTree::make_object(); diff --git a/tests/data_processors/test_xml_processor.cc b/tests/data_processors/test_xml_processor.cc index fcc56e6..76f5b9d 100644 --- a/tests/data_processors/test_xml_processor.cc +++ b/tests/data_processors/test_xml_processor.cc @@ -1,8 +1,8 @@ /** - * \file test_XmlDataProcessor.cc + * \file test_xml_processor.cc * \author Jan Behrens * \date Created on November 19, 2025 - * \brief Test suite for the XmlDataProcessor class. + * \brief Test suite for the XmlProcessor class. * * \copyright Copyright 2019-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg * @@ -30,7 +30,7 @@ using gul17::DataTree; using gul17::from_xml_string; using gul17::to_xml_string; -TEST_CASE("XmlDataProcessor: XML parsing", "[XmlDataProcessor]") +TEST_CASE("XmlProcessor: XML parsing", "[XmlProcessor]") { auto tree = from_xml_string( R"( @@ -67,7 +67,7 @@ R"( REQUIRE(tree["invalid"].is_empty()); } -TEST_CASE("XmlDataProcessor: XML parsing with attributes and comments", "[XmlDataProcessor]") +TEST_CASE("XmlProcessor: XML parsing with attributes and comments", "[XmlProcessor]") { auto tree = from_xml_string( R"( @@ -100,7 +100,7 @@ R"( REQUIRE(tree["#text"][2].as() == "TEXT CONTENT 3"); } -TEST_CASE("XmlDataProcessor: XML parsing with escape sequences", "[XmlDataProcessor]") +TEST_CASE("XmlProcessor: XML parsing with escape sequences", "[XmlProcessor]") { auto tree = from_xml_string( R"( @@ -113,7 +113,7 @@ R"( REQUIRE(tree["key1"].as() == "><&\"'"); } -TEST_CASE("XmlDataProcessor: XML parsing with errors", "[XmlDataProcessor]") +TEST_CASE("XmlProcessor: XML parsing with errors", "[XmlProcessor]") { REQUIRE_THROWS(from_xml_string(R"()")); // Missing closing tag REQUIRE_THROWS(from_xml_string(R"()")); // Mismatched closing tag @@ -121,7 +121,7 @@ TEST_CASE("XmlDataProcessor: XML parsing with errors", "[XmlDataProcessor]") REQUIRE_THROWS(from_xml_string(R"()")); // Duplicate attribute } -TEST_CASE("XmlDataProcessor: XML serialization", "[XmlDataProcessor]") +TEST_CASE("XmlProcessor: XML serialization", "[XmlProcessor]") { auto tree = DataTree::make_object(); @@ -149,7 +149,7 @@ R"( REQUIRE(xml_str == expected_xml); } -TEST_CASE("XmlDataProcessor: XML serialization with attributes", "[XmlDataProcessor]") +TEST_CASE("XmlProcessor: XML serialization with attributes", "[XmlProcessor]") { auto tree = DataTree::make_object(); @@ -176,7 +176,7 @@ R"( REQUIRE(xml_str == expected_xml); } -TEST_CASE("XmlDataProcessor: XML parsing of SVR.AUTH string", "[XmlDataProcessor]") +TEST_CASE("XmlProcessor: XML parsing of SVR.AUTH string", "[XmlProcessor]") { auto tree = from_xml_string( R"( diff --git a/tests/data_processors/test_yaml_processor.cc b/tests/data_processors/test_yaml_processor.cc index 19cd098..39e2a17 100644 --- a/tests/data_processors/test_yaml_processor.cc +++ b/tests/data_processors/test_yaml_processor.cc @@ -1,8 +1,8 @@ /** - * \file test_YamlDataProcessor.cc + * \file test_yaml_processor.cc * \author Jan Behrens * \date Created on November 20, 2025 - * \brief Test suite for the YamlDataProcessor class. + * \brief Test suite for the YamlProcessor class. * * \copyright Copyright 2019-2025 Deutsches Elektronen-Synchrotron (DESY), Hamburg * @@ -30,7 +30,7 @@ using gul17::DataTree; using gul17::from_yaml_string; using gul17::to_yaml_string; -TEST_CASE("YamlDataProcessor: YAML parsing", "[YamlDataProcessor]") +TEST_CASE("YamlProcessor: YAML parsing", "[YamlProcessor]") { auto tree = from_yaml_string( R"( @@ -67,7 +67,7 @@ key5: null REQUIRE(tree["invalid"].is_empty()); } -TEST_CASE("YamlDataProcessor: YAML parsing with comments", "[YamlDataProcessor]") +TEST_CASE("YamlProcessor: YAML parsing with comments", "[YamlProcessor]") { auto tree = from_yaml_string( R"( @@ -83,7 +83,7 @@ key2: 42 # another comment REQUIRE(tree["key2"].as() == 42); } -TEST_CASE("YamlDataProcessor: YAML parsing with escape sequences", "[YamlDataProcessor]") +TEST_CASE("YamlProcessor: YAML parsing with escape sequences", "[YamlProcessor]") { auto tree = from_yaml_string( R"( @@ -102,12 +102,12 @@ key3: "\u0032\u0034" REQUIRE(tree["key3"].as() == "24"); } -TEST_CASE("YamlDataProcessor: YAML parsing with errors", "[YamlDataProcessor]") +TEST_CASE("YamlProcessor: YAML parsing with errors", "[YamlProcessor]") { // Currently, the parser does not throw exceptions for malformed YAML. } -TEST_CASE("YamlDataProcessor: YAML serialization", "[YamlDataProcessor]") +TEST_CASE("YamlProcessor: YAML serialization", "[YamlProcessor]") { auto tree = DataTree::make_object();