diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f039b7..148ae30 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -494,6 +494,7 @@ set(binsrv_source_files src/binsrv/storage_config_fwd.hpp src/binsrv/storage_config.hpp + src/binsrv/storage_config.cpp src/binsrv/storage_metadata_fwd.hpp src/binsrv/storage_metadata.hpp diff --git a/README.md b/README.md index 8a5f288..9a4226b 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,7 @@ For instance, ``` may print ``` -0.1.0 +0.2.3 ``` #### 'search_by_timestamp' operation mode @@ -197,7 +197,7 @@ may print "uri": "s3://binsrv-bucket/storage/binlog.000001", "min_timestamp": "2026-02-09T17:22:01", "max_timestamp": "2026-02-09T17:22:08", - "initial_gtids": "", + "previous_gtids": "", "added_gtids": "11111111-aaaa-1111-aaaa-111111111111:1-123456" }, { @@ -206,7 +206,7 @@ may print "uri": "s3://binsrv-bucket/storage/binlog.000002", "min_timestamp": "2026-02-09T17:22:08", "max_timestamp": "2026-02-09T17:22:09", - "initial_gtids": "11111111-aaaa-1111-aaaa-111111111111:1-123456", + "previous_gtids": "11111111-aaaa-1111-aaaa-111111111111:1-123456", "added_gtids": "11111111-aaaa-1111-aaaa-111111111111:123457-246912" } ] @@ -243,7 +243,7 @@ may print "uri": "s3://binsrv-bucket/storage/binlog.000001", "min_timestamp": "2026-02-09T17:22:01", "max_timestamp": "2026-02-09T17:22:08", - "initial_gtids": "", + "previous_gtids": "", "added_gtids": "11111111-aaaa-1111-aaaa-111111111111:1-123456" } ] @@ -264,7 +264,7 @@ may print "uri": "s3://binsrv-bucket/storage/binlog.000001", "min_timestamp": "2026-02-09T17:22:01", "max_timestamp": "2026-02-09T17:22:08", - "initial_gtids": "", + "previous_gtids": "", "added_gtids": "11111111-aaaa-1111-aaaa-111111111111:1-123456", }, { @@ -273,7 +273,7 @@ may print "uri": "s3://binsrv-bucket/storage/binlog.000002", "min_timestamp": "2026-02-09T17:22:08", "max_timestamp": "2026-02-09T17:22:09", - "initial_gtids": "11111111-aaaa-1111-aaaa-111111111111:1-123456", + "previous_gtids": "11111111-aaaa-1111-aaaa-111111111111:1-123456", "added_gtids": "11111111-aaaa-1111-aaaa-111111111111:123457-246912" } ] diff --git a/mtr/binlog_streaming/r/gtid_purged.result b/mtr/binlog_streaming/r/gtid_purged.result new file mode 100644 index 0000000..91d74d5 --- /dev/null +++ b/mtr/binlog_streaming/r/gtid_purged.result @@ -0,0 +1,50 @@ +*** Resetting replication at the very beginning of the test. + +*** Determining the first binary log name. + +*** Generating a configuration file in JSON format for the Binlog +*** Server utility. + +*** Determining binlog file directory from the server. + +*** Creating a temporary directory for storing +*** binlog files downloaded via the Binlog Server utility. + +*** Creating a simple table. +CREATE TABLE t1(id INT UNSIGNED NOT NULL AUTO_INCREMENT, PRIMARY KEY(id)) ENGINE=InnoDB; + +*** Filling the table with some data. +INSERT INTO t1 VALUES(); + +*** Flushing the first binary log and switching to the second one. +FLUSH BINARY LOGS; + +*** Determining the second binary log name. + +*** Filling the table with more data. +INSERT INTO t1 VALUES(); + +*** Filling the table with more data. +PURGE BINARY LOGS TO ''; + +*** Executing the Binlog Server utility and fetching all events. + +*** Executing the Binlog Server utility in the 'search_by_gtid_set' +include/read_file_to_var.inc + +*** Executing the Binlog Server utility one more time (fetching nothing) + +*** Executing the Binlog Server utility in the 'search_by_gtid_set' +*** one more time (expecting the same results) +include/read_file_to_var.inc + +*** Removing the search result file. + +*** Dropping the table. +DROP TABLE t1; + +*** Removing the Binlog Server utility storage directory. + +*** Removing the Binlog Server utility log file. + +*** Removing the Binlog Server utility configuration file. diff --git a/mtr/binlog_streaming/r/thousand_binlogs.result b/mtr/binlog_streaming/r/thousand_binlogs.result new file mode 100644 index 0000000..03449e2 --- /dev/null +++ b/mtr/binlog_streaming/r/thousand_binlogs.result @@ -0,0 +1,36 @@ +*** Resetting replication at the very beginning of the test. + +*** Generating a configuration file in JSON format for the Binlog +*** Server utility. + +*** Determining binlog file directory from the server. + +*** Creating a temporary directory for storing +*** binlog files downloaded via the Binlog Server utility. + +*** Creating a simple table. +CREATE TABLE t1(id INT UNSIGNED NOT NULL AUTO_INCREMENT, PRIMARY KEY(id)) ENGINE=InnoDB; + +*** Generating 1000 binlogs + +*** Executing the Binlog Server utility and fetching all events. + +*** Executing the Binlog Server utility in the 'search_by_timestamp' +include/read_file_to_var.inc + +*** Executing the Binlog Server utility one more time (fetching nothing) + +*** Executing the Binlog Server utility in the 'search_by_timestamp' +*** one more time (expecting the same results) +include/read_file_to_var.inc + +*** Removing the search result file. + +*** Dropping the table. +DROP TABLE t1; + +*** Removing the Binlog Server utility storage directory. + +*** Removing the Binlog Server utility log file. + +*** Removing the Binlog Server utility configuration file. diff --git a/mtr/binlog_streaming/t/data_directory_8_0_to_8_4_upgrade.test b/mtr/binlog_streaming/t/data_directory_8_0_to_8_4_upgrade.test index 71280da..153181d 100644 --- a/mtr/binlog_streaming/t/data_directory_8_0_to_8_4_upgrade.test +++ b/mtr/binlog_streaming/t/data_directory_8_0_to_8_4_upgrade.test @@ -42,7 +42,7 @@ if ($lts_series != v84) --echo *** Running 8.4 binaries on a 8.0 data directory --let $MYSQLD_LOG = $MYSQLTEST_VARDIR/log/upgrade.log --let $do_not_echo_parameters = 1 ---let $restart_parameters = restart: --datadir=$CUSTOM_MYSQLD_DATADIR --log-error=$MYSQLD_LOG +--let $restart_parameters = restart: --datadir=$CUSTOM_MYSQLD_DATADIR --log-error=$MYSQLD_LOG --binlog_expire_logs_auto_purge=OFF --source include/start_mysqld.inc --echo diff --git a/mtr/binlog_streaming/t/gtid_purged-master.opt b/mtr/binlog_streaming/t/gtid_purged-master.opt new file mode 100644 index 0000000..c7529f0 --- /dev/null +++ b/mtr/binlog_streaming/t/gtid_purged-master.opt @@ -0,0 +1,2 @@ +--gtid-mode=on +--enforce-gtid-consistency diff --git a/mtr/binlog_streaming/t/gtid_purged.test b/mtr/binlog_streaming/t/gtid_purged.test new file mode 100644 index 0000000..242950f --- /dev/null +++ b/mtr/binlog_streaming/t/gtid_purged.test @@ -0,0 +1,98 @@ +--source ../include/have_binsrv.inc + +--source ../include/v80_v84_compatibility_defines.inc + +# in case of --repeat=N, we need to start from a fresh binary log to make +# this test deterministic +--echo *** Resetting replication at the very beginning of the test. +--disable_query_log +eval $stmt_reset_binary_logs_and_gtids; +--enable_query_log + +--echo +--echo *** Determining the first binary log name. +--let $first_binlog = query_get_value($stmt_show_binary_log_status, File, 1) + +# identifying backend storage type ('file' or 's3') +--source ../include/identify_storage_backend.inc + +# creating data directory, configuration file, etc. +--let $binsrv_connect_timeout = 20 +--let $binsrv_read_timeout = 60 +--let $binsrv_idle_time = 10 +--let $binsrv_verify_checksum = TRUE +--let $binsrv_replication_mode = gtid +--let $binsrv_checkpoint_size = 1 +--source ../include/set_up_binsrv_environment.inc + +--let $read_from_file = $MYSQL_TMP_DIR/search_result.json + +--echo +--echo *** Creating a simple table. +CREATE TABLE t1(id INT UNSIGNED NOT NULL AUTO_INCREMENT, PRIMARY KEY(id)) ENGINE=InnoDB; + +--echo +--echo *** Filling the table with some data. +INSERT INTO t1 VALUES(); + +--echo +--echo *** Flushing the first binary log and switching to the second one. +FLUSH BINARY LOGS; + +--echo +--echo *** Determining the second binary log name. +--let $second_binlog = query_get_value($stmt_show_binary_log_status, File, 1) + +--echo +--echo *** Filling the table with more data. +INSERT INTO t1 VALUES(); + +--echo +--echo *** Filling the table with more data. +--replace_result $second_binlog +eval PURGE BINARY LOGS TO '$second_binlog'; + +--let $captured_gtid_purged = `SELECT @@global.gtid_purged` +--let $captured_second_insert_gtid = `SELECT GTID_SUBTRACT(@@global.gtid_executed, @@global.gtid_purged)` + +--echo +--echo *** Executing the Binlog Server utility and fetching all events. +--exec $BINSRV fetch $binsrv_config_file_path > /dev/null + +--echo +--echo *** Executing the Binlog Server utility in the 'search_by_gtid_set' +--exec $BINSRV search_by_gtid_set $binsrv_config_file_path $captured_second_insert_gtid > $read_from_file + +--source include/read_file_to_var.inc +--assert(`SELECT JSON_EXTRACT('$result', '$.status') = 'success'`) +--assert(`SELECT JSON_LENGTH(JSON_EXTRACT('$result', '$.result')) = 1`) +--assert(`SELECT JSON_EXTRACT('$result', '$.result[0].name') = '$second_binlog'`) +--assert(`SELECT JSON_EXTRACT('$result', '$.result[0].previous_gtids') = '$captured_gtid_purged'`) +--assert(`SELECT JSON_EXTRACT('$result', '$.result[0].added_gtids') = '$captured_second_insert_gtid'`) + +--echo +--echo *** Executing the Binlog Server utility one more time (fetching nothing) +--exec $BINSRV fetch $binsrv_config_file_path > /dev/null + +--echo +--echo *** Executing the Binlog Server utility in the 'search_by_gtid_set' +--echo *** one more time (expecting the same results) +--exec $BINSRV search_by_gtid_set $binsrv_config_file_path $captured_second_insert_gtid > $read_from_file + +--source include/read_file_to_var.inc +--assert(`SELECT JSON_EXTRACT('$result', '$.status') = 'success'`) +--assert(`SELECT JSON_LENGTH(JSON_EXTRACT('$result', '$.result')) = 1`) +--assert(`SELECT JSON_EXTRACT('$result', '$.result[0].name') = '$second_binlog'`) +--assert(`SELECT JSON_EXTRACT('$result', '$.result[0].previous_gtids') = '$captured_gtid_purged'`) +--assert(`SELECT JSON_EXTRACT('$result', '$.result[0].added_gtids') = '$captured_second_insert_gtid'`) + +--echo +--echo *** Removing the search result file. +--remove_file $read_from_file + +--echo +--echo *** Dropping the table. +DROP TABLE t1; + +# cleaning up +--source ../include/tear_down_binsrv_environment.inc diff --git a/mtr/binlog_streaming/t/thousand_binlogs.test b/mtr/binlog_streaming/t/thousand_binlogs.test new file mode 100644 index 0000000..a40dda7 --- /dev/null +++ b/mtr/binlog_streaming/t/thousand_binlogs.test @@ -0,0 +1,90 @@ +--source ../include/have_binsrv.inc +--source include/big_test.inc + +--source ../include/v80_v84_compatibility_defines.inc + +# in case of --repeat=N, we need to start from a fresh binary log to make +# this test deterministic +--echo *** Resetting replication at the very beginning of the test. +--disable_query_log +eval $stmt_reset_binary_logs_and_gtids; +--enable_query_log + +# identifying backend storage type ('file' or 's3') +--source ../include/identify_storage_backend.inc + +# creating data directory, configuration file, etc. +--let $binsrv_connect_timeout = 20 +--let $binsrv_read_timeout = 60 +--let $binsrv_idle_time = 10 +--let $binsrv_verify_checksum = TRUE +--let $binsrv_replication_mode = position +--let $binsrv_checkpoint_size = 1 +--source ../include/set_up_binsrv_environment.inc + +--let $read_from_file = $MYSQL_TMP_DIR/search_result.json + +--echo +--echo *** Creating a simple table. +CREATE TABLE t1(id INT UNSIGNED NOT NULL AUTO_INCREMENT, PRIMARY KEY(id)) ENGINE=InnoDB; + +--let $number_of_binlogs = 1000 +--echo +--echo *** Generating $number_of_binlogs binlogs +--let $index = 0 + +--disable_query_log +while($index < $number_of_binlogs) +{ + INSERT INTO t1 VALUES(); + FLUSH BINARY LOGS; + --inc $index +} +INSERT INTO t1 VALUES(); +--enable_query_log + +--echo +--echo *** Executing the Binlog Server utility and fetching all events. +--exec $BINSRV fetch $binsrv_config_file_path > /dev/null + +--let $current_timestamp = `SELECT DATE_FORMAT(CONVERT_TZ(NOW(), @@session.time_zone, '+00:00'),'%Y-%m-%dT%H:%i:%s')` + +--echo +--echo *** Executing the Binlog Server utility in the 'search_by_timestamp' +--exec $BINSRV search_by_timestamp $binsrv_config_file_path $current_timestamp > $read_from_file + +--source include/read_file_to_var.inc +# the result will be too big, so storing it in a session variable +--disable_query_log +eval SET @search_result = '$result'; +--enable_query_log +--assert(`SELECT JSON_EXTRACT(@search_result, '$.status') = 'success'`) +--assert(`SELECT JSON_LENGTH(JSON_EXTRACT(@search_result, '$.result')) = $number_of_binlogs + 1`) + +--echo +--echo *** Executing the Binlog Server utility one more time (fetching nothing) +--exec $BINSRV fetch $binsrv_config_file_path > /dev/null + +--echo +--echo *** Executing the Binlog Server utility in the 'search_by_timestamp' +--echo *** one more time (expecting the same results) +--exec $BINSRV search_by_timestamp $binsrv_config_file_path $current_timestamp > $read_from_file + +--source include/read_file_to_var.inc +# the result will be too big, so storing it in a session variable +--disable_query_log +eval SET @search_result = '$result'; +--enable_query_log +--assert(`SELECT JSON_EXTRACT('$result', '$.status') = 'success'`) +--assert(`SELECT JSON_LENGTH(JSON_EXTRACT('$result', '$.result')) = $number_of_binlogs + 1`) + +--echo +--echo *** Removing the search result file. +--remove_file $read_from_file + +--echo +--echo *** Dropping the table. +DROP TABLE t1; + +# cleaning up +--source ../include/tear_down_binsrv_environment.inc diff --git a/packaging/debian/changelog b/packaging/debian/changelog index b193069..8406430 100644 --- a/packaging/debian/changelog +++ b/packaging/debian/changelog @@ -1,3 +1,29 @@ +percona-binlog-server (0.2.3-1) unstable; urgency=low + + * PS-11033 Crash when S3 bucket accumulates large number of objects; recovery requires manual intervention. + + -- Yura Sorokin Fri, 24 Apr 2026 04:29:21 +0200 + +percona-binlog-server (0.2.2-1) unstable; urgency=low + + * PS-11054 Cannot replicate because the source purged required binary logs. + + -- Yura Sorokin Tue, 21 Apr 2026 19:09:19 +0200 + +percona-binlog-server (0.2.1-1) unstable; urgency=low + + * PS-10910 Bucket name is missing in search_* outputs. + * PS-11002 Changing storage prefix fails. + * PS-10911 Unexpected binlog position in artificial rotate event. + + -- Yura Sorokin Fri, 10 Apr 2026 16:13:14 +0200 + +percona-binlog-server (0.2.0-1) unstable; urgency=low + + * Packaging for 0.2.0-1 + + -- Yura Sorokin Mon, 16 Mar 2026 15:33:00 +0200 + percona-binlog-server (0.1.0-1) unstable; urgency=low * Packaging for 0.1.0-1 diff --git a/packaging/rpm/binlog-server.spec b/packaging/rpm/binlog-server.spec index f3f35d7..5fa5d16 100644 --- a/packaging/rpm/binlog-server.spec +++ b/packaging/rpm/binlog-server.spec @@ -134,8 +134,22 @@ install -D -m 0644 main_config.json %{buildroot}/%{_sysconfdir}/percona-binlog- %changelog -* Fri Jan 16 2026 Vadim Yalovets - 1.0.0-1 -- PKG-1208 Prepare packages for Percona Binlog Server +* Fri Apr 24 2026 Yura Sorokin - 0.2.3-1 +- PS-11033 Crash when S3 bucket accumulates large number of objects; recovery requires manual intervention. + +* Tue Apr 21 2026 Yura Sorokin - 0.2.2-1 +- PS-11054 Cannot replicate because the source purged required binary logs. + +* Fri Apr 10 2026 Yura Sorokin - 0.2.1-1 +- PS-10910 Bucket name is missing in search_* outputs. +- PS-11002 Changing storage prefix fails. +- PS-10911 Unexpected binlog position in artificial rotate event. + +* Mon Mar 16 2026 Yura Sorokin - 0.2.0-1 +- Percona Binlog Server with GTID replication support. + +* Fri Jan 16 2026 Vadim Yalovets - 0.1.0-2 +- PKG-1208 Prepare packages for Percona Binlog Server. * Mon Aug 26 2024 Surabhi Bhat - 0.1.0-1 - Initial package with separate builds for Debug and RelWithDebInfo versions. diff --git a/packaging/scripts/binlog-server_builder.sh b/packaging/scripts/binlog-server_builder.sh index e91758f..8aa707f 100644 --- a/packaging/scripts/binlog-server_builder.sh +++ b/packaging/scripts/binlog-server_builder.sh @@ -264,7 +264,7 @@ install_deps() { apt install -y libzstd-dev fi apt install -y zlib1g-dev libcurl4-openssl-dev libssl-dev - apt install -y libmysqlclient-dev + apt install -y libperconaserverclient24-dev fi return } diff --git a/src/app.cpp b/src/app.cpp index 6bb3ba2..60eccba 100644 --- a/src/app.cpp +++ b/src/app.cpp @@ -249,7 +249,9 @@ void log_storage_config_info(binsrv::basic_logger &logger, log_config_param<"backend">(logger, storage_config, "binlog storage backend type"); - // not printing "uri" here deliberately to avoid credentials leaking + logger.log(binsrv::log_severity::info, + "binlog storage backend URI (masked): " + + storage_config.get_masked_uri()); log_config_param<"fs_buffer_directory">( logger, storage_config, "binlog storage backend filesystem buffer directory"); @@ -420,14 +422,17 @@ void process_artificial_rotate_event( storage.get_current_binlog_name()) { // in addition, in position-based replication mode we also need to check // the position - const binsrv::events::generic_post_header< - binsrv::events::code_type::rotate> - current_rotate_post_header{current_event_v.get_post_header_raw()}; - - if (current_rotate_post_header.get_position_raw() != - storage.get_current_position()) { - util::exception_location().raise( - "unexpected binlog position in artificial rotate event"); + if (storage.get_replication_mode() == + binsrv::replication_mode_type::position) { + const binsrv::events::generic_post_header< + binsrv::events::code_type::rotate> + current_rotate_post_header{current_event_v.get_post_header_raw()}; + + if (current_rotate_post_header.get_position_raw() != + storage.get_current_position()) { + util::exception_location().raise( + "unexpected binlog position in artificial rotate event"); + } } binlog_opening_needed = false; @@ -773,7 +778,7 @@ bool open_connection_and_switch_to_replication( if (operation_mode == binsrv::operation_mode_type::fetch) { throw; } - logger.log(binsrv::log_severity::info, + logger.log(binsrv::log_severity::error, "unable to establish connection to mysql server"); return false; } @@ -790,7 +795,20 @@ bool open_connection_and_switch_to_replication( try { if (storage.is_in_gtid_replication_mode()) { - const auto >ids{storage.get_gtids()}; + if (storage.is_empty()) { + static constexpr std::string_view select_gtid_purged_query{ + "SELECT @@GLOBAL.gtid_purged"}; + storage.set_purged_gtids(binsrv::gtids::gtid_set{ + connection.execute_select_query_string_result( + select_gtid_purged_query)}); + logger.log( + binsrv::log_severity::info, + "extracted purged GTIDs from the mysql server for an empty " + "storage: " + + boost::lexical_cast(storage.get_purged_gtids())); + } + + const auto gtids{storage.get_gtids()}; const auto encoded_size{gtids.calculate_encoded_size()}; binsrv::gtids::gtid_set_storage encoded_gtids_buffer(encoded_size); @@ -814,7 +832,7 @@ bool open_connection_and_switch_to_replication( if (operation_mode == binsrv::operation_mode_type::fetch) { throw; } - logger.log(binsrv::log_severity::info, "unable to switch to replication"); + logger.log(binsrv::log_severity::error, "unable to switch to replication"); return false; } @@ -1073,7 +1091,9 @@ int main(int argc, char *argv[]) { std::cerr << "usage: " << executable_name << " (fetch|pull)) \n" << " " << executable_name - << " search_by_timestamp \n" + << " search_by_timestamp \n" + << " " << executable_name + << " search_by_gtid_set \n" << " " << executable_name << " version\n"; return EXIT_FAILURE; } diff --git a/src/app_version.hpp b/src/app_version.hpp index 4ce2568..12322aa 100644 --- a/src/app_version.hpp +++ b/src/app_version.hpp @@ -18,6 +18,6 @@ #include "util/semantic_version.hpp" -static constexpr util::semantic_version app_version{0U, 1U, 0U}; +static constexpr util::semantic_version app_version{0U, 2U, 3U}; #endif // APP_VERSION_HPP diff --git a/src/binsrv/basic_logger.cpp b/src/binsrv/basic_logger.cpp index e917db2..1c426d6 100644 --- a/src/binsrv/basic_logger.cpp +++ b/src/binsrv/basic_logger.cpp @@ -15,6 +15,7 @@ #include "binsrv/basic_logger.hpp" +#include #include #include @@ -31,17 +32,22 @@ basic_logger::basic_logger(log_severity min_level) noexcept void basic_logger::log(log_severity level, std::string_view message) { if (level >= min_level_) { - static constexpr auto timestamp_length{ + // the length of the longest log severity label + // ('trace' / 'debug' / 'info' / 'warning' / 'error' / 'fatal') + static constexpr std::size_t padded_label_length{7U}; + static constexpr std::size_t timestamp_length{ std::size("YYYY-MM-DDTHH:MM:SS.fffffffff") - 1U}; const auto timestamp = boost::posix_time::microsec_clock::universal_time(); - ; const auto level_label = to_string_view(level); + const std::string label_padding( + padded_label_length - std::size(level_label), ' '); std::string buf; - buf.reserve(1 + timestamp_length + 1 + 1 + 1 + std::size(level_label) + 1 + - 1 + std::size(message)); + buf.reserve(1U + timestamp_length + 1U + 1U + 1U + padded_label_length + + 1U + 1U + std::size(message)); buf += '['; buf += boost::posix_time::to_iso_extended_string(timestamp); buf += "] ["; + buf += label_padding; buf += level_label; buf += "] "; buf += message; diff --git a/src/binsrv/s3_storage_backend.cpp b/src/binsrv/s3_storage_backend.cpp index 77e1be7..5a3ebe8 100644 --- a/src/binsrv/s3_storage_backend.cpp +++ b/src/binsrv/s3_storage_backend.cpp @@ -47,6 +47,8 @@ #include +#include + #include #include @@ -57,6 +59,7 @@ #include #include #include +#include #include #include "binsrv/s3_error_helpers_private.hpp" @@ -187,6 +190,12 @@ class s3_storage_backend::aws_context : private aws_context_base { void get_object_internal(const qualified_object_path &source, const stream_factory_type &stream_factory, const stream_handler_type &stream_handler) const; + + using list_object_container = Aws::Vector; + static void + process_list_objects_internal(const list_object_container &list_objects, + const std::string &prefix, + storage_object_name_container &storage_objects); }; s3_storage_backend::aws_context::aws_context( @@ -370,49 +379,45 @@ s3_storage_backend::aws_context::list_objects( list_objects_request.SetPrefix(prefix_str); } - const auto list_objects_outcome{client_->ListObjectsV2(list_objects_request)}; - if (!list_objects_outcome.IsSuccess()) { - raise_s3_error_from_outcome("cannot list objects in the specified bucket", - list_objects_outcome.GetError()); - } - const auto &list_objects_result = list_objects_outcome.GetResult(); - // TODO: implement receiving the rest of the list - if (list_objects_result.GetIsTruncated()) { - util::exception_location().raise( - "too many objects in the specified bucket"); - } + std::string continuation_token; + bool has_more{true}; + while (has_more) { + if (!continuation_token.empty()) { + list_objects_request.SetContinuationToken(continuation_token); + } - auto model_key_count = list_objects_result.GetKeyCount(); - if (!std::in_range(model_key_count)) { - util::exception_location().raise( - "invalid key count in the list objects result"); - } - auto key_count{static_cast(model_key_count)}; + const auto list_objects_outcome{ + client_->ListObjectsV2(list_objects_request)}; + if (!list_objects_outcome.IsSuccess()) { + raise_s3_error_from_outcome("cannot list objects in the specified bucket", + list_objects_outcome.GetError()); + } + const auto &list_objects_result = list_objects_outcome.GetResult(); - const auto &model_objects{list_objects_result.GetContents()}; - if (key_count != std::size(model_objects)) { - util::exception_location().raise( - "key count does not match the number of objects in the list objects " - "result"); - } - result.reserve(key_count); + auto model_key_count = list_objects_result.GetKeyCount(); + if (!std::in_range(model_key_count)) { + util::exception_location().raise( + "invalid key count in the list objects result"); + } + auto key_count{static_cast(model_key_count)}; - for (const auto &model_object : model_objects) { - // if the prefix is set, the list of objects in the response will include - // the prefix itself (as a directory) with zero size - it needs to be - // skipped + const auto &model_objects{list_objects_result.GetContents()}; + if (key_count != std::size(model_objects)) { + util::exception_location().raise( + "key count does not match the number of objects in the list objects " + "result"); + } + result.reserve(std::size(result) + key_count); - // moreover, we need to remove the prefix itself from the object paths - std::string_view key{model_object.GetKey()}; - if (!prefix_str.empty()) { - if (!key.starts_with(prefix_str)) { + process_list_objects_internal(model_objects, prefix_str, result); + + has_more = list_objects_result.GetIsTruncated(); + if (has_more) { + continuation_token = list_objects_result.GetNextContinuationToken(); + if (continuation_token.empty()) { util::exception_location().raise( - "encountered an object with unexpected prefix"); + "truncated list objects result is missing continuation token"); } - key.remove_prefix(std::size(prefix_str)); - } - if (!key.empty()) { - result.emplace(key, model_object.GetSize()); } } @@ -452,6 +457,29 @@ void s3_storage_backend::aws_context::get_object_internal( } } +void s3_storage_backend::aws_context::process_list_objects_internal( + const list_object_container &list_objects, const std::string &prefix, + storage_object_name_container &storage_objects) { + for (const auto &list_object : list_objects) { + // if the prefix is set, the list of objects in the response will + // include the prefix itself (as a directory) with zero size - it + // needs to be skipped + + // moreover, we need to remove the prefix itself from the object paths + std::string_view key{list_object.GetKey()}; + if (!prefix.empty()) { + if (!key.starts_with(prefix)) { + util::exception_location().raise( + "encountered an object with unexpected prefix"); + } + key.remove_prefix(std::size(prefix)); + } + if (!key.empty()) { + storage_objects.emplace(key, list_object.GetSize()); + } + } +} + s3_storage_backend::s3_storage_backend(const storage_config &config) : bucket_{}, root_path_{}, current_name_{}, uuid_generator_{}, tmp_file_directory_{}, current_tmp_file_path_{}, tmp_fstream_{}, impl_{} { @@ -737,8 +765,11 @@ s3_storage_backend::do_get_object_uri(std::string_view name) const { if (impl_->has_endpoint()) { result.set_scheme(impl_->get_scheme_label()); result.set_encoded_authority(impl_->get_endpoint()); - std::filesystem::path result_path{result.path()}; - result_path /= get_object_path(name); + std::filesystem::path result_path{"/"}; + result_path /= get_bucket(); + // according to the standard if the rhs of the operator /= is an + // absolute path, it must overwrite parts of the lhs + result_path /= get_object_path(name).relative_path(); result.set_path(result_path.generic_string()); } else { result.set_scheme(original_uri_schema); diff --git a/src/binsrv/storage.cpp b/src/binsrv/storage.cpp index 3c55169..90fd655 100644 --- a/src/binsrv/storage.cpp +++ b/src/binsrv/storage.cpp @@ -83,6 +83,13 @@ storage::storage(const storage_config &config, load_metadata(); validate_metadata(replication_mode); + // if after metadata erasure 'storage_objects' is empty, then this mean + // that it has only metadata in it that passes validation and we can + // consider it as an initialized empty storage, so just return + if (storage_objects.empty()) { + return; + } + const auto binlog_index_it{storage_objects.find(default_binlog_index_name)}; if (binlog_index_it == std::cend(storage_objects)) { util::exception_location().raise( @@ -108,6 +115,8 @@ storage::storage(const storage_config &config, load_and_validate_binlog_metadata_set(storage_objects, storage_metadata_objects); + assert(!binlog_records_.front().added_gtids.has_value() || + purged_gtids_ == binlog_records_.front().added_gtids); } storage::~storage() { @@ -120,6 +129,18 @@ storage::~storage() { } } +void storage::set_purged_gtids(const gtids::gtid_set &purged_gtids) { + if (!is_in_gtid_replication_mode()) { + util::exception_location().raise( + "cannot set purged GTIDs in position-based replication mode"); + } + if (!is_empty()) { + util::exception_location().raise( + "cannot set purged GTIDs in a non-empty storage"); + } + purged_gtids_ = purged_gtids; +} + [[nodiscard]] std::string storage::get_backend_description() const { return backend_->get_description(); } @@ -533,6 +554,13 @@ void storage::load_and_validate_binlog_metadata_set( util::exception_location().raise( "found metadata for a non-existing binlog"); } + + // if we are in GTID replication mode, then we can consider GTIDs from the + // first binlog metadata as purged GTIDs for the whole storage + const auto &optional_added_gtids{binlog_records_.front().added_gtids}; + if (optional_added_gtids.has_value()) { + purged_gtids_ = *optional_added_gtids; + } } } // namespace binsrv diff --git a/src/binsrv/storage.hpp b/src/binsrv/storage.hpp index e68dce2..ecdbbcc 100644 --- a/src/binsrv/storage.hpp +++ b/src/binsrv/storage.hpp @@ -71,6 +71,11 @@ class [[nodiscard]] storage { // file to complete the rule of 5 ~storage(); + [[nodiscard]] const gtids::gtid_set &get_purged_gtids() const noexcept { + return purged_gtids_; + } + void set_purged_gtids(const gtids::gtid_set &purged_gtids); + [[nodiscard]] std::string get_backend_description() const; [[nodiscard]] replication_mode_type get_replication_mode() const noexcept { @@ -95,16 +100,19 @@ class [[nodiscard]] storage { } [[nodiscard]] gtids::gtid_set get_gtids() const { - gtids::gtid_set result{}; + if (!is_in_gtid_replication_mode()) { + return {}; + } + if (is_empty()) { - return result; + return get_purged_gtids(); } + gtids::gtid_set result{}; const auto &optional_previous_gtids{ get_current_binlog_record().previous_gtids}; - if (!optional_previous_gtids.has_value()) { - return result; + if (optional_previous_gtids.has_value()) { + result = *optional_previous_gtids; } - result = *optional_previous_gtids; const auto &optional_added_gtids{get_current_binlog_record().added_gtids}; if (optional_added_gtids.has_value()) { result.add(*optional_added_gtids); @@ -134,6 +142,7 @@ class [[nodiscard]] storage { replication_mode_type replication_mode_; events::composite_binlog_name binlog_name_sentinel_{}; + gtids::gtid_set purged_gtids_{}; binlog_record_container binlog_records_{}; std::uint64_t checkpoint_size_bytes_{0ULL}; diff --git a/src/binsrv/storage_config.cpp b/src/binsrv/storage_config.cpp new file mode 100644 index 0000000..31707f7 --- /dev/null +++ b/src/binsrv/storage_config.cpp @@ -0,0 +1,32 @@ +// Copyright (c) 2023-2024 Percona and/or its affiliates. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License, version 2.0, +// as published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License, version 2.0, for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +#include "binsrv/storage_config.hpp" + +#include + +#include + +namespace binsrv { + +[[nodiscard]] std::string storage_config::get_masked_uri() const { + boost::urls::url masked_uri{get<"uri">()}; + if (masked_uri.has_userinfo()) { + masked_uri.set_userinfo("***:***"); + } + return masked_uri.c_str(); +} + +} // namespace binsrv diff --git a/src/binsrv/storage_config.hpp b/src/binsrv/storage_config.hpp index 9ebe0ec..53217b4 100644 --- a/src/binsrv/storage_config.hpp +++ b/src/binsrv/storage_config.hpp @@ -37,7 +37,9 @@ struct [[nodiscard]] storage_config util::nv<"fs_buffer_directory", util::optional_string>, util::nv<"checkpoint_size", optional_size_unit>, util::nv<"checkpoint_interval", optional_time_unit> - > {}; + > { + [[nodiscard]] std::string get_masked_uri() const; +}; // clang-format on } // namespace binsrv diff --git a/src/easymysql/connection.cpp b/src/easymysql/connection.cpp index 53d8956..fd8140a 100644 --- a/src/easymysql/connection.cpp +++ b/src/easymysql/connection.cpp @@ -429,10 +429,64 @@ void connection::execute_generic_query_noresult(std::string_view query) { auto *casted_impl = mysql_deimpl::get(mysql_impl_); if (mysql_real_query(casted_impl, std::data(query), std::size(query)) != 0) { - raise_core_error_from_connection("cannot execute query", *this); + raise_core_error_from_connection("cannot execute noresult query", *this); } } +[[nodiscard]] std::string +connection::execute_select_query_string_result(std::string_view query) { + assert(!is_empty()); + if (is_in_replication_mode()) { + util::exception_location().raise( + "cannot execute query in replication mode"); + } + + auto *casted_impl = mysql_deimpl::get(mysql_impl_); + if (mysql_real_query(casted_impl, std::data(query), std::size(query)) != 0) { + raise_core_error_from_connection("cannot execute string result query", + *this); + } + + const auto mysql_res_deleter = [](MYSQL_RES *result_raw) { + if (result_raw != nullptr) { + mysql_free_result(result_raw); + } + }; + using mysql_res_ptr = std::unique_ptr; + + const mysql_res_ptr result{mysql_store_result(casted_impl), + mysql_res_deleter}; + if (!result) { + raise_core_error_from_connection("cannot store query result", *this); + } + if (mysql_num_rows(result.get()) != 1U) { + raise_core_error_from_connection("query did not return exactly one row", + *this); + } + + static constexpr std::size_t expected_num_fields{1U}; + if (mysql_num_fields(result.get()) != expected_num_fields) { + raise_core_error_from_connection("query did not return exactly one column", + *this); + } + + MYSQL_ROW row_raw{mysql_fetch_row(result.get())}; + assert(row_raw != nullptr); + + const std::span row{ + row_raw, expected_num_fields}; + if (row.front() == nullptr) { + raise_core_error_from_connection("query returned NULL value", *this); + } + + const auto *const lengths_raw{mysql_fetch_lengths(result.get())}; + assert(lengths_raw != nullptr); + const std::span lengths{ + lengths_raw, expected_num_fields}; + + return std::string{row.front(), lengths.front()}; +} + bool connection::ping() { assert(!is_empty()); if (is_in_replication_mode()) { diff --git a/src/easymysql/connection.hpp b/src/easymysql/connection.hpp index 3bff42e..1d55dc1 100644 --- a/src/easymysql/connection.hpp +++ b/src/easymysql/connection.hpp @@ -58,6 +58,8 @@ class [[nodiscard]] connection { [[nodiscard]] std::string_view get_character_set_name() const noexcept; void execute_generic_query_noresult(std::string_view query); + [[nodiscard]] std::string + execute_select_query_string_result(std::string_view query); [[nodiscard]] bool ping(); [[nodiscard]] bool is_in_replication_mode() const noexcept { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a0e928c..4e014e7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -95,9 +95,11 @@ set_target_properties(event_test PROPERTIES CXX_EXTENSIONS NO ) -add_test(NAME byte_span_encoding_test COMMAND byte_span_encoding_test) -add_test(NAME uuid_test COMMAND uuid_test) -add_test(NAME tag_test COMMAND tag_test) -add_test(NAME gtid_test COMMAND gtid_test) -add_test(NAME gtid_set_test COMMAND gtid_set_test) -add_test(NAME event_test COMMAND event_test) + +set(test_run_options --no_color_output) +add_test(NAME byte_span_encoding_test COMMAND byte_span_encoding_test ${test_run_options}) +add_test(NAME uuid_test COMMAND uuid_test ${test_run_options}) +add_test(NAME tag_test COMMAND tag_test ${test_run_options}) +add_test(NAME gtid_test COMMAND gtid_test ${test_run_options}) +add_test(NAME gtid_set_test COMMAND gtid_set_test ${test_run_options}) +add_test(NAME event_test COMMAND event_test ${test_run_options}) diff --git a/tests/gtid_set_test.cpp b/tests/gtid_set_test.cpp index 592503c..16a1b70 100644 --- a/tests/gtid_set_test.cpp +++ b/tests/gtid_set_test.cpp @@ -721,3 +721,33 @@ BOOST_AUTO_TEST_CASE(GtidSetWhitespaces) { boost::lexical_cast(gtids_str)}; BOOST_CHECK_EQUAL(gtids, restored_gtids); } + +BOOST_AUTO_TEST_CASE(GtidSetSimulateSearchByGTIDSet) { + constexpr std::string_view first_random_uuid_sv{ + "ae6896b1-2e94-11f1-af82-76efd046f53d"}; + constexpr std::string_view second_random_uuid_sv{ + "bcd66d18-2e94-11f1-b929-76efd046f53d"}; + + const std::string first_binlog_file_added_gtids_str{ + std::string{first_random_uuid_sv} + ":1-4, " + + std::string{second_random_uuid_sv} + ":1-1103"}; + const binsrv::gtids::gtid_set first_binlog_file_added_gtids{ + first_binlog_file_added_gtids_str}; + BOOST_CHECK(first_binlog_file_added_gtids.str() == + first_binlog_file_added_gtids_str); + + const binsrv::gtids::gtid lookup_gtid{ + binsrv::gtids::uuid{second_random_uuid_sv}, 1103ULL}; + + BOOST_CHECK(first_binlog_file_added_gtids.contains(lookup_gtid)); + + const binsrv::gtids::gtid_set lookup_gtids{lookup_gtid}; + BOOST_CHECK( + binsrv::gtids::intersects(first_binlog_file_added_gtids, lookup_gtids)); + BOOST_CHECK( + binsrv::gtids::intersects(lookup_gtids, first_binlog_file_added_gtids)); + + binsrv::gtids::gtid_set remaining_gtids{lookup_gtids}; + remaining_gtids.subtract(first_binlog_file_added_gtids); + BOOST_CHECK(remaining_gtids.is_empty()); +}