From 00c1ed424565c2cbd3a3f16d4212087ce4361ce1 Mon Sep 17 00:00:00 2001 From: slach Date: Wed, 13 May 2026 23:24:49 +0500 Subject: [PATCH 1/9] feat: add clean_broken_retention CLI command Walks top-level of remote `path` and `object_disks_path` and batch-deletes (with retry, via the existing BatchDeleter pipeline) every entry that is not present in the live BackupList and not matched by any --keep=. Dry-run by default; --commit performs the deletes. Useful for cleaning up orphans left behind by failed retention runs (e.g. the GCS 503 scenario from #1356). --- ChangeLog.md | 5 ++ cmd/clickhouse-backup/main.go | 20 +++++ pkg/backup/delete.go | 142 ++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 315aa70f..3e742e74 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,8 @@ +# unreleased + +NEW FEATURES +- add `clean_broken_retention` CLI command — walks top-level of remote `path` and `object_disks_path` and batch-deletes (with retry) every entry that is not present in the live backup list and not matched by any `--keep=`. Dry-run by default; pass `--commit` to actually delete. Useful for cleaning up orphans left by failed retention runs + # v2.6.43 NEW FEATURES diff --git a/cmd/clickhouse-backup/main.go b/cmd/clickhouse-backup/main.go index 99589137..2e897c8c 100644 --- a/cmd/clickhouse-backup/main.go +++ b/cmd/clickhouse-backup/main.go @@ -709,6 +709,26 @@ func main() { }, Flags: cliapp.Flags, }, + { + Name: "clean_broken_retention", + Usage: "Remove orphan entries under remote `path` and `object_disks_path` that are not in the live backup list", + UsageText: "clickhouse-backup clean_broken_retention [--commit] [--keep=glob ...]", + Description: "Walks top-level of remote `path` and `object_disks_path`, batch-deletes (with retry) every entry that is not a live backup and does not match any --keep glob. Runs in dry-run mode unless --commit is set.", + Action: func(c *cli.Context) error { + b := backup.NewBackuper(config.GetConfigFromCli(c)) + return b.CleanBrokenRetention(status.NotFromAPI, c.StringSlice("keep"), c.Bool("commit")) + }, + Flags: append(cliapp.Flags, + cli.StringSliceFlag{ + Name: "keep", + Usage: "Glob (path.Match syntax) of backup names to preserve in addition to live backups; can be passed multiple times", + }, + cli.BoolFlag{ + Name: "commit", + Usage: "Actually delete orphans; without this flag the command only logs what would be deleted", + }, + ), + }, { Name: "watch", diff --git a/pkg/backup/delete.go b/pkg/backup/delete.go index 5009a744..54265e78 100644 --- a/pkg/backup/delete.go +++ b/pkg/backup/delete.go @@ -14,6 +14,7 @@ import ( "github.com/Altinity/clickhouse-backup/v2/pkg/clickhouse" "github.com/Altinity/clickhouse-backup/v2/pkg/custom" + "github.com/Altinity/clickhouse-backup/v2/pkg/metadata" "github.com/Altinity/clickhouse-backup/v2/pkg/status" "github.com/Altinity/clickhouse-backup/v2/pkg/storage" "github.com/Altinity/clickhouse-backup/v2/pkg/storage/object_disk" @@ -557,6 +558,147 @@ func (b *Backuper) CleanRemoteBroken(commandId int) error { return nil } +// CleanBrokenRetention walks remote `path` and `object_disks_path` top-level entries +// and removes everything that is NOT present in the live BackupList and NOT matched by keepGlobs. +// Uses BatchDeleter with retry. When commit=false, only logs orphans without deleting. +// keepGlobs follows path.Match syntax (e.g. "prod-*", "snapshot-2026-??-*"). +func (b *Backuper) CleanBrokenRetention(commandId int, keepGlobs []string, commit bool) error { + ctx, cancel, err := status.Current.GetContextWithCancel(commandId) + if err != nil { + return errors.WithMessage(err, "status.Current.GetContextWithCancel") + } + ctx, cancel = context.WithCancel(ctx) + defer cancel() + + if b.cfg.General.RemoteStorage == "none" { + return errors.New("aborted: RemoteStorage set to \"none\"") + } + if b.cfg.General.RemoteStorage == "custom" { + return errors.New("aborted: clean_broken_retention does not support custom remote storage") + } + for _, g := range keepGlobs { + if _, err := path.Match(g, ""); err != nil { + return errors.Wrapf(err, "invalid keep-glob %q", g) + } + } + if err := b.ch.Connect(); err != nil { + return errors.Wrap(err, "can't connect to clickhouse") + } + defer b.ch.Close() + + bd, err := storage.NewBackupDestination(ctx, b.cfg, b.ch, "") + if err != nil { + return errors.WithMessage(err, "storage.NewBackupDestination") + } + if err = bd.Connect(ctx); err != nil { + return errors.Wrap(err, "can't connect to remote storage") + } + defer func() { + if closeErr := bd.Close(ctx); closeErr != nil { + log.Warn().Msgf("can't close BackupDestination error: %v", closeErr) + } + }() + b.dst = bd + + backupList, err := bd.BackupList(ctx, false, "") + if err != nil { + return errors.WithMessage(err, "bd.BackupList") + } + keepNames := make(map[string]struct{}, len(backupList)) + for _, backup := range backupList { + keepNames[backup.BackupName] = struct{}{} + } + isKept := func(name string) bool { + if _, ok := keepNames[name]; ok { + return true + } + for _, g := range keepGlobs { + if ok, _ := path.Match(g, name); ok { + return true + } + } + return false + } + + mode := "dry-run" + if commit { + mode = "commit" + } + log.Info().Msgf("clean_broken_retention: mode=%s, %d live backups, %d keep-globs", mode, len(backupList), len(keepGlobs)) + + orphansInPath, err := b.findOrphanTopLevelNames(ctx, bd, "/", isKept) + if err != nil { + return errors.WithMessage(err, "scan path orphans") + } + for _, name := range orphansInPath { + if !commit { + log.Info().Str("orphan", name).Str("location", "path").Msg("clean_broken_retention: would delete") + continue + } + log.Info().Str("orphan", name).Str("location", "path").Msg("clean_broken_retention: deleting") + if err := bd.RemoveBackupRemote(ctx, storage.Backup{BackupMetadata: metadata.BackupMetadata{BackupName: name}}, b.cfg, b); err != nil { + return errors.Wrapf(err, "bd.RemoveBackupRemote orphan %s", name) + } + } + + objectDiskPath, err := b.getObjectDiskPath() + if err != nil { + return errors.WithMessage(err, "b.getObjectDiskPath") + } + var orphansInObj []string + if objectDiskPath != "" { + orphansInObj, err = b.findOrphanTopLevelNames(ctx, bd, objectDiskPath, isKept) + if err != nil { + return errors.WithMessage(err, "scan object_disks_path orphans") + } + for _, name := range orphansInObj { + if !commit { + log.Info().Str("orphan", name).Str("location", "object_disks_path").Msg("clean_broken_retention: would delete") + continue + } + log.Info().Str("orphan", name).Str("location", "object_disks_path").Msg("clean_broken_retention: deleting") + deletedKeys, deleteErr := b.cleanBackupObjectDisks(ctx, name) + if deleteErr != nil { + return errors.Wrapf(deleteErr, "cleanBackupObjectDisks orphan %s", name) + } + log.Info().Str("orphan", name).Uint("deleted", deletedKeys).Msg("clean_broken_retention: object disk orphan cleaned") + } + } + log.Info().Msgf("clean_broken_retention: done, mode=%s, path orphans=%d, object_disks_path orphans=%d", mode, len(orphansInPath), len(orphansInObj)) + return nil +} + +// findOrphanTopLevelNames lists top-level entries under rootPath (absolute when rootPath != "/") +// and returns names that are not kept by isKept. Top-level only: any names containing "/" are skipped. +func (b *Backuper) findOrphanTopLevelNames(ctx context.Context, bd *storage.BackupDestination, rootPath string, isKept func(string) bool) ([]string, error) { + seen := make(map[string]struct{}) + walkFn := func(_ context.Context, f storage.RemoteFile) error { + name := strings.TrimSuffix(f.Name(), "/") + if name == "" || strings.Contains(name, "/") { + return nil + } + if isKept(name) { + return nil + } + seen[name] = struct{}{} + return nil + } + var err error + if rootPath == "/" || rootPath == "" { + err = bd.Walk(ctx, "/", false, walkFn) + } else { + err = bd.WalkAbsolute(ctx, rootPath, false, walkFn) + } + if err != nil { + return nil, errors.Wrapf(err, "walk %q", rootPath) + } + out := make([]string, 0, len(seen)) + for n := range seen { + out = append(out, n) + } + return out, nil +} + func (b *Backuper) cleanPartialRequiredBackup(ctx context.Context, disks []clickhouse.Disk, currentBackupName string) error { if localBackups, _, err := b.GetLocalBackups(ctx, disks); err == nil { for _, localBackup := range localBackups { From 374938d77813db2b8f5b1ebf8a7236389833c8c6 Mon Sep 17 00:00:00 2001 From: slach Date: Wed, 13 May 2026 23:35:04 +0500 Subject: [PATCH 2/9] test(integration): cover clean_broken_retention against S3 (minio) Plant orphans directly under /minio/data/clickhouse for both `path` and `object_disks_path`, then verify: - dry-run lists them without deleting, - --commit removes them from both locations, - --keep= preserves matching entries, - the live backup is never touched. --- test/integration/cleanBrokenRetention_test.go | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 test/integration/cleanBrokenRetention_test.go diff --git a/test/integration/cleanBrokenRetention_test.go b/test/integration/cleanBrokenRetention_test.go new file mode 100644 index 00000000..232a1abe --- /dev/null +++ b/test/integration/cleanBrokenRetention_test.go @@ -0,0 +1,105 @@ +//go:build integration + +package main + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/rs/zerolog/log" +) + +// TestCleanBrokenRetention verifies that `clean_broken_retention`: +// - lists orphans (dry-run) without deleting, +// - on --commit removes orphans from both `path` and `object_disks_path`, +// - preserves the live backup and entries matched by --keep globs. +// +// Runs against S3 (minio) — the remote-storage-agnostic logic lives in pkg/backup +// and reuses the BatchDeleter pipeline already exercised by every backend. +func TestCleanBrokenRetention(t *testing.T) { + env, r := NewTestEnvironment(t) + env.connectWithWait(t, r, 0*time.Second, 1*time.Second, 1*time.Minute) + defer env.Cleanup(t, r) + r.NoError(env.DockerCP("configs/config-s3.yml", "clickhouse-backup:/etc/clickhouse-backup/config.yml")) + + const tableName = "default.clean_broken_retention_test" + env.queryWithNoError(r, fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s(id UInt64) ENGINE=MergeTree() ORDER BY id", tableName)) + env.queryWithNoError(r, fmt.Sprintf("INSERT INTO %s SELECT number FROM numbers(50)", tableName)) + + keepBackup := fmt.Sprintf("keep_backup_%d", time.Now().UnixNano()) + orphanPath := fmt.Sprintf("orphan_path_%d", time.Now().UnixNano()) + orphanObj := fmt.Sprintf("orphan_obj_%d", time.Now().UnixNano()) + orphanKept := fmt.Sprintf("orphan_keep_%d", time.Now().UnixNano()) + + // macros from configs/clickhouse-config.xml: {cluster}=cluster, {shard}=0 + const pathPrefix = "/minio/data/clickhouse/backup/cluster/0" + const objPrefix = "/minio/data/clickhouse/object_disk/cluster/0" + + log.Debug().Msg("Create a live backup that must survive the cleanup") + env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "create_remote", "--tables", tableName, keepBackup) + + log.Debug().Msg("Plant orphans directly on the minio filesystem") + plant := func(parent, name string) { + env.DockerExecNoError(r, "minio", "bash", "-c", fmt.Sprintf( + "mkdir -p %s/%s/sub && echo garbage > %s/%s/data.bin && echo garbage > %s/%s/sub/nested.bin", + parent, name, parent, name, parent, name, + )) + } + plant(pathPrefix, orphanPath) + plant(objPrefix, orphanObj) + plant(pathPrefix, orphanKept) + plant(objPrefix, orphanKept) + + assertExists := func(parent, name string) { + out, err := env.DockerExecOut("minio", "ls", parent+"/"+name) + r.NoError(err, "expected %s/%s to exist, output: %s", parent, name, out) + } + assertGone := func(parent, name string) { + out, err := env.DockerExecOut("minio", "bash", "-c", fmt.Sprintf("ls %s/%s 2>/dev/null || true", parent, name)) + r.Empty(strings.TrimSpace(out), "expected %s/%s to be deleted, but ls returned: %s", parent, name, out) + _ = err + } + + assertExists(pathPrefix, orphanPath) + assertExists(objPrefix, orphanObj) + assertExists(pathPrefix, orphanKept) + assertExists(objPrefix, orphanKept) + assertExists(pathPrefix, keepBackup) + + log.Debug().Msg("Dry-run: should report orphans but not delete them") + dryRunOut, err := env.DockerExecOut("clickhouse-backup", "clickhouse-backup", "clean_broken_retention") + r.NoError(err, "dry-run failed: %s", dryRunOut) + r.Contains(dryRunOut, orphanPath, "dry-run output must mention path orphan") + r.Contains(dryRunOut, orphanObj, "dry-run output must mention object disk orphan") + r.Contains(dryRunOut, "would delete", "dry-run must announce planned deletions") + r.NotContains(dryRunOut, "clean_broken_retention: deleting", "dry-run must not perform deletion") + r.NotContains(dryRunOut, fmt.Sprintf("would delete %s", keepBackup), "live backup must never be marked as orphan") + + assertExists(pathPrefix, orphanPath) + assertExists(objPrefix, orphanObj) + + log.Debug().Msg("Commit with --keep glob: orphan_keep_* must survive, the other two must be removed") + keepGlob := strings.SplitN(orphanKept, "_", 3)[0] + "_" + strings.SplitN(orphanKept, "_", 3)[1] + "_*" + commitOut, err := env.DockerExecOut("clickhouse-backup", "clickhouse-backup", "clean_broken_retention", "--commit", "--keep="+keepGlob) + r.NoError(err, "commit failed: %s", commitOut) + r.Contains(commitOut, "clean_broken_retention: deleting") + + assertGone(pathPrefix, orphanPath) + assertGone(objPrefix, orphanObj) + assertExists(pathPrefix, orphanKept) + assertExists(objPrefix, orphanKept) + assertExists(pathPrefix, keepBackup) + + log.Debug().Msg("Cleanup: drop the kept orphan via second commit run (no --keep), then remove the live backup and table") + env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "clean_broken_retention", "--commit") + assertGone(pathPrefix, orphanKept) + assertGone(objPrefix, orphanKept) + + env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "delete", "remote", keepBackup) + env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "delete", "local", keepBackup) + env.queryWithNoError(r, "DROP TABLE IF EXISTS "+tableName+" NO DELAY") + env.checkObjectStorageIsEmpty(t, r, "S3") +} + From 9da9f06909b7aefa85e6b0268217817b81b621b2 Mon Sep 17 00:00:00 2001 From: slach Date: Wed, 13 May 2026 23:51:58 +0500 Subject: [PATCH 3/9] test(integration): cover clean_broken_retention across all storage backends Table-driven, with one sub-test per backend: - S3 (minio), SFTP (sshd), FTP (proftpd), GCS_EMULATOR (fake-gcs-server) use direct container-FS plant/assert, - AZBLOB plants via az-cli docker run against azurite, - real GCS and COS skip themselves unless GCS_TESTS / QA_TENCENT_SECRET_KEY is set (their plant helpers fail loudly if reached without infra). For each backend: create a real backup that must survive, plant 3 orphans in path/object_disks_path, verify dry-run lists them without deleting, verify --commit + --keep glob deletes only unmatched orphans, then a second --commit clears the rest. --- test/integration/cleanBrokenRetention_test.go | 345 +++++++++++++++--- 1 file changed, 285 insertions(+), 60 deletions(-) diff --git a/test/integration/cleanBrokenRetention_test.go b/test/integration/cleanBrokenRetention_test.go index 232a1abe..c3da3ed8 100644 --- a/test/integration/cleanBrokenRetention_test.go +++ b/test/integration/cleanBrokenRetention_test.go @@ -3,12 +3,17 @@ package main import ( + "context" "fmt" + "os" "strings" "testing" "time" + "github.com/Altinity/clickhouse-backup/v2/pkg/utils" + "github.com/rs/zerolog/log" + "github.com/stretchr/testify/require" ) // TestCleanBrokenRetention verifies that `clean_broken_retention`: @@ -16,90 +21,310 @@ import ( // - on --commit removes orphans from both `path` and `object_disks_path`, // - preserves the live backup and entries matched by --keep globs. // -// Runs against S3 (minio) — the remote-storage-agnostic logic lives in pkg/backup -// and reuses the BatchDeleter pipeline already exercised by every backend. +// The body of the test is shared across all supported remote-storage backends. +// Backends that need cloud credentials skip themselves when the corresponding env +// var (GCS_TESTS, AZURE_TESTS, QA_TENCENT_SECRET_KEY) is unset. func TestCleanBrokenRetention(t *testing.T) { + cases := []cleanBrokenRetentionCase{ + s3CleanBrokenRetentionCase(), + sftpCleanBrokenRetentionCase(), + ftpCleanBrokenRetentionCase(), + gcsEmulatorCleanBrokenRetentionCase(), + azblobCleanBrokenRetentionCase(), + gcsRealCleanBrokenRetentionCase(), + cosCleanBrokenRetentionCase(), + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + if tc.skip() { + t.Skip(tc.skipReason) + return + } + runCleanBrokenRetentionScenario(t, tc) + }) + } +} + +// cleanBrokenRetentionCase wires one remote-storage backend to the shared scenario. +// plantOrphan, assertExists and assertGone are responsible for *physically* putting +// or observing a top-level entry under the given root path on that backend. +type cleanBrokenRetentionCase struct { + name string + storageType string + configFile string + pathRoot string // top-level prefix used by plantOrphan to simulate path orphans + objRoot string // top-level prefix used by plantOrphan to simulate object-disk orphans + skip func() bool + skipReason string + setup func(env *TestEnvironment, r *require.Assertions) + plantOrphan func(env *TestEnvironment, r *require.Assertions, root, name string) + assertExists func(env *TestEnvironment, r *require.Assertions, root, name string) + assertGone func(env *TestEnvironment, r *require.Assertions, root, name string) + finalEmptyType string // value passed to env.checkObjectStorageIsEmpty at end (empty = skip) +} + +func runCleanBrokenRetentionScenario(t *testing.T, tc cleanBrokenRetentionCase) { env, r := NewTestEnvironment(t) env.connectWithWait(t, r, 0*time.Second, 1*time.Second, 1*time.Minute) defer env.Cleanup(t, r) - r.NoError(env.DockerCP("configs/config-s3.yml", "clickhouse-backup:/etc/clickhouse-backup/config.yml")) - const tableName = "default.clean_broken_retention_test" + r.NoError(env.DockerCP("configs/"+tc.configFile, "clickhouse-backup:/etc/clickhouse-backup/config.yml")) + if tc.setup != nil { + tc.setup(env, r) + } + + tableName := fmt.Sprintf("default.clean_broken_retention_%s", strings.ToLower(tc.storageType)) env.queryWithNoError(r, fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s(id UInt64) ENGINE=MergeTree() ORDER BY id", tableName)) env.queryWithNoError(r, fmt.Sprintf("INSERT INTO %s SELECT number FROM numbers(50)", tableName)) - keepBackup := fmt.Sprintf("keep_backup_%d", time.Now().UnixNano()) - orphanPath := fmt.Sprintf("orphan_path_%d", time.Now().UnixNano()) - orphanObj := fmt.Sprintf("orphan_obj_%d", time.Now().UnixNano()) - orphanKept := fmt.Sprintf("orphan_keep_%d", time.Now().UnixNano()) + suffix := time.Now().UnixNano() + keepBackup := fmt.Sprintf("cbr_keep_%d", suffix) + orphanPath := fmt.Sprintf("cbr_orphan_path_%d", suffix) + orphanObj := fmt.Sprintf("cbr_orphan_obj_%d", suffix) + orphanKept := fmt.Sprintf("cbr_orphan_keep_%d", suffix) + keepGlob := fmt.Sprintf("cbr_orphan_keep_*") - // macros from configs/clickhouse-config.xml: {cluster}=cluster, {shard}=0 - const pathPrefix = "/minio/data/clickhouse/backup/cluster/0" - const objPrefix = "/minio/data/clickhouse/object_disk/cluster/0" - - log.Debug().Msg("Create a live backup that must survive the cleanup") + log.Debug().Str("backend", tc.name).Msg("Create a live backup that must survive the cleanup") env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "create_remote", "--tables", tableName, keepBackup) - log.Debug().Msg("Plant orphans directly on the minio filesystem") - plant := func(parent, name string) { - env.DockerExecNoError(r, "minio", "bash", "-c", fmt.Sprintf( - "mkdir -p %s/%s/sub && echo garbage > %s/%s/data.bin && echo garbage > %s/%s/sub/nested.bin", - parent, name, parent, name, parent, name, - )) - } - plant(pathPrefix, orphanPath) - plant(objPrefix, orphanObj) - plant(pathPrefix, orphanKept) - plant(objPrefix, orphanKept) - - assertExists := func(parent, name string) { - out, err := env.DockerExecOut("minio", "ls", parent+"/"+name) - r.NoError(err, "expected %s/%s to exist, output: %s", parent, name, out) - } - assertGone := func(parent, name string) { - out, err := env.DockerExecOut("minio", "bash", "-c", fmt.Sprintf("ls %s/%s 2>/dev/null || true", parent, name)) - r.Empty(strings.TrimSpace(out), "expected %s/%s to be deleted, but ls returned: %s", parent, name, out) - _ = err - } + log.Debug().Str("backend", tc.name).Msg("Plant orphans") + tc.plantOrphan(env, r, tc.pathRoot, orphanPath) + tc.plantOrphan(env, r, tc.objRoot, orphanObj) + tc.plantOrphan(env, r, tc.pathRoot, orphanKept) + tc.plantOrphan(env, r, tc.objRoot, orphanKept) - assertExists(pathPrefix, orphanPath) - assertExists(objPrefix, orphanObj) - assertExists(pathPrefix, orphanKept) - assertExists(objPrefix, orphanKept) - assertExists(pathPrefix, keepBackup) + tc.assertExists(env, r, tc.pathRoot, orphanPath) + tc.assertExists(env, r, tc.objRoot, orphanObj) + tc.assertExists(env, r, tc.pathRoot, orphanKept) + tc.assertExists(env, r, tc.objRoot, orphanKept) - log.Debug().Msg("Dry-run: should report orphans but not delete them") + log.Debug().Str("backend", tc.name).Msg("Dry-run lists orphans but deletes nothing") dryRunOut, err := env.DockerExecOut("clickhouse-backup", "clickhouse-backup", "clean_broken_retention") r.NoError(err, "dry-run failed: %s", dryRunOut) - r.Contains(dryRunOut, orphanPath, "dry-run output must mention path orphan") - r.Contains(dryRunOut, orphanObj, "dry-run output must mention object disk orphan") + r.Contains(dryRunOut, orphanPath, "dry-run must mention path orphan") + r.Contains(dryRunOut, orphanObj, "dry-run must mention object disk orphan") r.Contains(dryRunOut, "would delete", "dry-run must announce planned deletions") - r.NotContains(dryRunOut, "clean_broken_retention: deleting", "dry-run must not perform deletion") - r.NotContains(dryRunOut, fmt.Sprintf("would delete %s", keepBackup), "live backup must never be marked as orphan") - - assertExists(pathPrefix, orphanPath) - assertExists(objPrefix, orphanObj) + r.NotContains(dryRunOut, "clean_broken_retention: deleting", "dry-run must not delete") + r.NotContains(dryRunOut, fmt.Sprintf("\"orphan\":\"%s\"", keepBackup), "live backup must not appear as orphan") + tc.assertExists(env, r, tc.pathRoot, orphanPath) + tc.assertExists(env, r, tc.objRoot, orphanObj) - log.Debug().Msg("Commit with --keep glob: orphan_keep_* must survive, the other two must be removed") - keepGlob := strings.SplitN(orphanKept, "_", 3)[0] + "_" + strings.SplitN(orphanKept, "_", 3)[1] + "_*" + log.Debug().Str("backend", tc.name).Msg("--commit with --keep glob deletes only unmatched orphans") commitOut, err := env.DockerExecOut("clickhouse-backup", "clickhouse-backup", "clean_broken_retention", "--commit", "--keep="+keepGlob) r.NoError(err, "commit failed: %s", commitOut) r.Contains(commitOut, "clean_broken_retention: deleting") + tc.assertGone(env, r, tc.pathRoot, orphanPath) + tc.assertGone(env, r, tc.objRoot, orphanObj) + tc.assertExists(env, r, tc.pathRoot, orphanKept) + tc.assertExists(env, r, tc.objRoot, orphanKept) - assertGone(pathPrefix, orphanPath) - assertGone(objPrefix, orphanObj) - assertExists(pathPrefix, orphanKept) - assertExists(objPrefix, orphanKept) - assertExists(pathPrefix, keepBackup) - - log.Debug().Msg("Cleanup: drop the kept orphan via second commit run (no --keep), then remove the live backup and table") + log.Debug().Str("backend", tc.name).Msg("Second --commit without --keep clears the remaining orphan") env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "clean_broken_retention", "--commit") - assertGone(pathPrefix, orphanKept) - assertGone(objPrefix, orphanKept) + tc.assertGone(env, r, tc.pathRoot, orphanKept) + tc.assertGone(env, r, tc.objRoot, orphanKept) + log.Debug().Str("backend", tc.name).Msg("Cleanup live backup and table") env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "delete", "remote", keepBackup) env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "delete", "local", keepBackup) - env.queryWithNoError(r, "DROP TABLE IF EXISTS "+tableName+" NO DELAY") - env.checkObjectStorageIsEmpty(t, r, "S3") + dropQuery := "DROP TABLE IF EXISTS " + tableName + if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "20.3") > 0 { + dropQuery += " NO DELAY" + } + env.queryWithNoError(r, dropQuery) + if tc.finalEmptyType != "" { + env.checkObjectStorageIsEmpty(t, r, tc.finalEmptyType) + } } +// ---- helpers for backends that map remote storage to a docker-container filesystem ---- + +func plantOnContainerFS(container string) func(env *TestEnvironment, r *require.Assertions, root, name string) { + return func(env *TestEnvironment, r *require.Assertions, root, name string) { + env.DockerExecNoError(r, container, "bash", "-c", fmt.Sprintf( + "mkdir -p %s/%s/sub && echo garbage > %s/%s/data.bin && echo garbage > %s/%s/sub/nested.bin", + root, name, root, name, root, name, + )) + } +} +func assertContainerFSExists(container string) func(env *TestEnvironment, r *require.Assertions, root, name string) { + return func(env *TestEnvironment, r *require.Assertions, root, name string) { + out, err := env.DockerExecOut(container, "ls", root+"/"+name) + r.NoError(err, "expected %s/%s to exist on %s, output: %s", root, name, container, out) + } +} +func assertContainerFSGone(container string) func(env *TestEnvironment, r *require.Assertions, root, name string) { + return func(env *TestEnvironment, r *require.Assertions, root, name string) { + out, err := env.DockerExecOut(container, "bash", "-c", fmt.Sprintf("ls %s/%s 2>/dev/null || true", root, name)) + r.Empty(strings.TrimSpace(out), "expected %s/%s on %s to be removed, ls returned: %s", root, name, container, out) + _ = err + } +} + +// ---- per-backend case factories ---- + +func s3CleanBrokenRetentionCase() cleanBrokenRetentionCase { + return cleanBrokenRetentionCase{ + name: "S3", + storageType: "S3", + configFile: "config-s3.yml", + pathRoot: "/minio/data/clickhouse/backup/cluster/0", + objRoot: "/minio/data/clickhouse/object_disk/cluster/0", + skip: func() bool { return false }, + plantOrphan: plantOnContainerFS("minio"), + assertExists: assertContainerFSExists("minio"), + assertGone: assertContainerFSGone("minio"), + finalEmptyType: "S3", + } +} + +func sftpCleanBrokenRetentionCase() cleanBrokenRetentionCase { + return cleanBrokenRetentionCase{ + name: "SFTP", + storageType: "SFTP", + configFile: "config-sftp-auth-key.yaml", + pathRoot: "/root", + objRoot: "/object_disk", + skip: func() bool { return false }, + setup: func(env *TestEnvironment, r *require.Assertions) { + env.uploadSSHKeys(r, "clickhouse-backup") + env.DockerExecNoError(r, "sshd", "mkdir", "-p", "/object_disk") + }, + plantOrphan: plantOnContainerFS("sshd"), + assertExists: assertContainerFSExists("sshd"), + assertGone: assertContainerFSGone("sshd"), + finalEmptyType: "", + } +} + +func ftpCleanBrokenRetentionCase() cleanBrokenRetentionCase { + homePrefix := "/home/test_backup" + if isAdvancedMode() { + homePrefix = "/home/ftpusers/test_backup" + } + return cleanBrokenRetentionCase{ + name: "FTP", + storageType: "FTP", + configFile: "config-ftp.yaml", + pathRoot: homePrefix + "/backup", + objRoot: homePrefix + "/object_disk", + skip: func() bool { return compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "21.8") <= 0 }, + skipReason: "FTP scenario only validated on ClickHouse > 21.8", + setup: func(env *TestEnvironment, r *require.Assertions) { + env.DockerExecNoError(r, "ftp", "sh", "-c", fmt.Sprintf("mkdir -p %s/backup %s/object_disk && chown -R test_backup:test_backup %s", homePrefix, homePrefix, homePrefix)) + }, + plantOrphan: plantOnContainerFS("ftp"), + assertExists: assertContainerFSExists("ftp"), + assertGone: assertContainerFSGone("ftp"), + finalEmptyType: "", + } +} + +func gcsEmulatorCleanBrokenRetentionCase() cleanBrokenRetentionCase { + return cleanBrokenRetentionCase{ + name: "GCS_EMULATOR", + storageType: "GCS_EMULATOR", + configFile: "config-gcs-custom-endpoint.yml", + pathRoot: "/data/altinity-qa-test/backup/cluster/0", + objRoot: "/data/altinity-qa-test/object_disks/cluster/0", + skip: func() bool { return false }, + plantOrphan: plantOnContainerFS("gcs"), + assertExists: assertContainerFSExists("gcs"), + assertGone: assertContainerFSGone("gcs"), + finalEmptyType: "GCS_EMULATOR", + } +} + +func gcsRealCleanBrokenRetentionCase() cleanBrokenRetentionCase { + return cleanBrokenRetentionCase{ + name: "GCS", + storageType: "GCS", + configFile: "config-gcs.yml", + skip: func() bool { return isTestShouldSkip("GCS_TESTS") }, + skipReason: "Skipping GCS integration tests (GCS_TESTS not set)", + plantOrphan: func(env *TestEnvironment, r *require.Assertions, root, name string) { + // real GCS — would require gsutil; out of scope without credentials infra. + r.FailNow("plantOrphan for real GCS is not implemented; gate this case behind credentials") + }, + assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) {}, + assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) {}, + } +} + +func cosCleanBrokenRetentionCase() cleanBrokenRetentionCase { + return cleanBrokenRetentionCase{ + name: "COS", + storageType: "COS", + configFile: "config-cos.yml", + skip: func() bool { return os.Getenv("QA_TENCENT_SECRET_KEY") == "" }, + skipReason: "Skipping COS integration tests (QA_TENCENT_SECRET_KEY not set)", + plantOrphan: func(env *TestEnvironment, r *require.Assertions, root, name string) { + r.FailNow("plantOrphan for COS is not implemented; gate this case behind credentials") + }, + assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) {}, + assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) {}, + } +} + +func azblobCleanBrokenRetentionCase() cleanBrokenRetentionCase { + const account = "devstoreaccount1" + const accountKey = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" + const container = "container1" + connectionString := fmt.Sprintf("DefaultEndpointsProtocol=http;AccountName=%s;AccountKey=%s;BlobEndpoint=http://devstoreaccount1.blob.azure:10000/devstoreaccount1;", account, accountKey) + + plant := func(env *TestEnvironment, r *require.Assertions, root, name string) { + blobPath := strings.TrimPrefix(root+"/"+name, "/") + cmd := []string{ + "run", "--rm", "--network", env.tc.networkName, + "-e", "AZURE_STORAGE_CONNECTION_STRING=" + connectionString, + "mcr.microsoft.com/azure-cli:latest", + "sh", "-c", + fmt.Sprintf("echo garbage > /tmp/data.bin && az storage blob upload --container-name %s --name %s/data.bin --file /tmp/data.bin --overwrite >/dev/null && az storage blob upload --container-name %s --name %s/sub/nested.bin --file /tmp/data.bin --overwrite >/dev/null", + container, blobPath, container, blobPath), + } + out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", cmd...) + r.NoError(err, "azblob plantOrphan failed: %s", out) + } + exists := func(env *TestEnvironment, r *require.Assertions, root, name string) { + blobPath := strings.TrimPrefix(root+"/"+name, "/") + out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", + "run", "--rm", "--network", env.tc.networkName, + "-e", "AZURE_STORAGE_CONNECTION_STRING="+connectionString, + "mcr.microsoft.com/azure-cli:latest", + "sh", "-c", + fmt.Sprintf("az storage blob list --container-name %s --prefix %s/ --num-results 1 --query '[].name' -o tsv", container, blobPath), + ) + r.NoError(err, "azblob list failed: %s", out) + r.NotEmpty(strings.TrimSpace(out), "expected blobs under %s/, got empty list", blobPath) + } + gone := func(env *TestEnvironment, r *require.Assertions, root, name string) { + blobPath := strings.TrimPrefix(root+"/"+name, "/") + out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", + "run", "--rm", "--network", env.tc.networkName, + "-e", "AZURE_STORAGE_CONNECTION_STRING="+connectionString, + "mcr.microsoft.com/azure-cli:latest", + "sh", "-c", + fmt.Sprintf("az storage blob list --container-name %s --prefix %s/ --num-results 1 --query '[].name' -o tsv", container, blobPath), + ) + r.NoError(err, "azblob list failed: %s", out) + r.Empty(strings.TrimSpace(out), "expected no blobs under %s/, got: %s", blobPath, out) + } + return cleanBrokenRetentionCase{ + name: "AZBLOB", + storageType: "AZBLOB", + configFile: "config-azblob.yml", + // per config-azblob.yml: path=backup, object_disk_path=object_disks (no macros) + pathRoot: "backup", + objRoot: "object_disks", + skip: func() bool { return isTestShouldSkip("AZURE_TESTS") }, + skipReason: "Skipping AZBLOB integration tests (AZURE_TESTS not set)", + setup: func(env *TestEnvironment, r *require.Assertions) { + env.tc.pullImageIfNeeded(context.Background(), "mcr.microsoft.com/azure-cli:latest") + }, + plantOrphan: plant, + assertExists: exists, + assertGone: gone, + finalEmptyType: "", + } +} From 761fb6c0380ac7aeed1f8d74729ad543e2176bb5 Mon Sep 17 00:00:00 2001 From: slach Date: Wed, 13 May 2026 23:54:34 +0500 Subject: [PATCH 4/9] test(integration): implement real GCS and COS plant/check for clean_broken_retention GCS: spins up google/cloud-sdk:slim with --volumes-from clickhouse-backup to access /etc/clickhouse-backup/credentials.json, authenticates the service account, and uses gsutil cp/ls. COS: spins up amazon/aws-cli:latest with --endpoint-url pointing at the regional COS endpoint and QA_TENCENT_SECRET_ID/KEY as AWS credentials, since COS supports the S3 API. Setup hook renders config.yml from the copied template via envsubst. Both run only when their credential env var is set. --- test/integration/cleanBrokenRetention_test.go | 102 ++++++++++++++++-- 1 file changed, 93 insertions(+), 9 deletions(-) diff --git a/test/integration/cleanBrokenRetention_test.go b/test/integration/cleanBrokenRetention_test.go index c3da3ed8..bad1f153 100644 --- a/test/integration/cleanBrokenRetention_test.go +++ b/test/integration/cleanBrokenRetention_test.go @@ -237,33 +237,117 @@ func gcsEmulatorCleanBrokenRetentionCase() cleanBrokenRetentionCase { } func gcsRealCleanBrokenRetentionCase() cleanBrokenRetentionCase { + // real GCS — uses gsutil from google/cloud-sdk:slim with credentials.json + // from the clickhouse-backup container (mounted via --volumes-from). + const bucket = "altinity-qa-test" + gsutilRun := func(env *TestEnvironment, r *require.Assertions, sh string) (string, error) { + return utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", + "run", "--rm", "--network", env.tc.networkName, + "--volumes-from", env.tc.GetContainerID("clickhouse-backup"), + "-e", "GOOGLE_APPLICATION_CREDENTIALS=/etc/clickhouse-backup/credentials.json", + "google/cloud-sdk:slim", + "bash", "-c", sh, + ) + } return cleanBrokenRetentionCase{ name: "GCS", storageType: "GCS", configFile: "config-gcs.yml", + pathRoot: "backup/cluster/0", + objRoot: "object_disks/cluster/0", skip: func() bool { return isTestShouldSkip("GCS_TESTS") }, skipReason: "Skipping GCS integration tests (GCS_TESTS not set)", + setup: func(env *TestEnvironment, r *require.Assertions) { + env.tc.pullImageIfNeeded(context.Background(), "google/cloud-sdk:slim") + }, plantOrphan: func(env *TestEnvironment, r *require.Assertions, root, name string) { - // real GCS — would require gsutil; out of scope without credentials infra. - r.FailNow("plantOrphan for real GCS is not implemented; gate this case behind credentials") + objectPath := root + "/" + name + sh := fmt.Sprintf( + "gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS >/dev/null 2>&1 && "+ + "echo garbage > /tmp/data.bin && "+ + "gsutil -q cp /tmp/data.bin gs://%s/%s/data.bin && "+ + "gsutil -q cp /tmp/data.bin gs://%s/%s/sub/nested.bin", + bucket, objectPath, bucket, objectPath, + ) + out, err := gsutilRun(env, r, sh) + r.NoError(err, "gcs plantOrphan failed: %s", out) }, - assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) {}, - assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) {}, + assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) { + objectPath := root + "/" + name + sh := fmt.Sprintf("gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS >/dev/null 2>&1 && gsutil ls gs://%s/%s/", bucket, objectPath) + out, err := gsutilRun(env, r, sh) + r.NoError(err, "gcs assertExists failed for %s: %s", objectPath, out) + r.Contains(out, "gs://"+bucket+"/"+objectPath+"/", "expected listing to contain %s", objectPath) + }, + assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) { + objectPath := root + "/" + name + sh := fmt.Sprintf("gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS >/dev/null 2>&1 && gsutil ls gs://%s/%s/** 2>&1 || true", bucket, objectPath) + out, _ := gsutilRun(env, r, sh) + r.NotContains(out, "gs://"+bucket+"/"+objectPath, "expected no blobs under gs://%s/%s, got: %s", bucket, objectPath, out) + }, + finalEmptyType: "", } } func cosCleanBrokenRetentionCase() cleanBrokenRetentionCase { + // COS supports the S3 API on its regional endpoint; we use aws-cli docker image + // with secret_id/secret_key as AWS credentials and the S3 endpoint pointing at COS. + const bucket = "clickhouse-backup-1336113806" + const region = "na-ashburn" + const endpoint = "https://cos.na-ashburn.myqcloud.com" + awsRun := func(env *TestEnvironment, r *require.Assertions, sh string) (string, error) { + return utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", + "run", "--rm", "--network", env.tc.networkName, + "-e", "AWS_ACCESS_KEY_ID="+os.Getenv("QA_TENCENT_SECRET_ID"), + "-e", "AWS_SECRET_ACCESS_KEY="+os.Getenv("QA_TENCENT_SECRET_KEY"), + "-e", "AWS_DEFAULT_REGION="+region, + "--entrypoint", "sh", + "amazon/aws-cli:latest", + "-c", sh, + ) + } return cleanBrokenRetentionCase{ name: "COS", storageType: "COS", configFile: "config-cos.yml", - skip: func() bool { return os.Getenv("QA_TENCENT_SECRET_KEY") == "" }, - skipReason: "Skipping COS integration tests (QA_TENCENT_SECRET_KEY not set)", + pathRoot: "backup/cluster/0", + objRoot: "object_disk/cluster/0", + skip: func() bool { + return os.Getenv("QA_TENCENT_SECRET_KEY") == "" || os.Getenv("QA_TENCENT_SECRET_ID") == "" + }, + skipReason: "Skipping COS integration tests (QA_TENCENT_SECRET_ID / QA_TENCENT_SECRET_KEY not set)", + setup: func(env *TestEnvironment, r *require.Assertions) { + env.tc.pullImageIfNeeded(context.Background(), "amazon/aws-cli:latest") + env.InstallDebIfNotExists(r, "clickhouse-backup", "gettext-base") + // config.yml was copied raw and still has ${QA_TENCENT_SECRET_*} placeholders — render in place. + env.DockerExecNoError(r, "clickhouse-backup", "bash", "-xec", "envsubst < /etc/clickhouse-backup/config.yml > /etc/clickhouse-backup/config.yml.rendered && mv /etc/clickhouse-backup/config.yml.rendered /etc/clickhouse-backup/config.yml") + }, plantOrphan: func(env *TestEnvironment, r *require.Assertions, root, name string) { - r.FailNow("plantOrphan for COS is not implemented; gate this case behind credentials") + objectPath := root + "/" + name + sh := fmt.Sprintf( + "echo garbage > /tmp/data.bin && "+ + "aws --endpoint-url=%s s3 cp /tmp/data.bin s3://%s/%s/data.bin >/dev/null && "+ + "aws --endpoint-url=%s s3 cp /tmp/data.bin s3://%s/%s/sub/nested.bin >/dev/null", + endpoint, bucket, objectPath, endpoint, bucket, objectPath, + ) + out, err := awsRun(env, r, sh) + r.NoError(err, "cos plantOrphan failed: %s", out) + }, + assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) { + objectPath := root + "/" + name + sh := fmt.Sprintf("aws --endpoint-url=%s s3 ls s3://%s/%s/", endpoint, bucket, objectPath) + out, err := awsRun(env, r, sh) + r.NoError(err, "cos assertExists failed for %s: %s", objectPath, out) + r.Contains(out, "data.bin", "expected data.bin under %s, got: %s", objectPath, out) }, - assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) {}, - assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) {}, + assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) { + objectPath := root + "/" + name + sh := fmt.Sprintf("aws --endpoint-url=%s s3 ls s3://%s/%s/ 2>&1 || true", endpoint, bucket, objectPath) + out, _ := awsRun(env, r, sh) + r.NotContains(out, "data.bin", "expected no blobs under s3://%s/%s, got: %s", bucket, objectPath, out) + r.NotContains(out, "nested.bin", "expected no blobs under s3://%s/%s, got: %s", bucket, objectPath, out) + }, + finalEmptyType: "", } } From ccf6a995ad10f614e1c8ed2c91dc896fe12cb8fb Mon Sep 17 00:00:00 2001 From: slach Date: Thu, 14 May 2026 00:01:41 +0500 Subject: [PATCH 5/9] test(integration): simplify clean_broken_retention test - Collapse the three container-FS factories (S3/SFTP/FTP/GCS_EMULATOR) into a single containerFSCase helper. - Extract dockerRunSh and an azList closure to remove repetition in the AZBLOB case; reuse a single gsutil/awsRun wrapper for GCS and COS. - Drop the redundant storageType field; derive the table name from name. - Loop the plant+assertExists step over a small table instead of open-coding four calls. - Pull repeated keep-glob string into a const, drop fmt.Sprintf with no format args. --- test/integration/cleanBrokenRetention_test.go | 384 ++++++++---------- 1 file changed, 163 insertions(+), 221 deletions(-) diff --git a/test/integration/cleanBrokenRetention_test.go b/test/integration/cleanBrokenRetention_test.go index bad1f153..1abcb6b7 100644 --- a/test/integration/cleanBrokenRetention_test.go +++ b/test/integration/cleanBrokenRetention_test.go @@ -16,6 +16,8 @@ import ( "github.com/stretchr/testify/require" ) +const cleanBrokenRetentionKeepGlob = "cbr_orphan_keep_*" + // TestCleanBrokenRetention verifies that `clean_broken_retention`: // - lists orphans (dry-run) without deleting, // - on --commit removes orphans from both `path` and `object_disks_path`, @@ -25,7 +27,7 @@ import ( // Backends that need cloud credentials skip themselves when the corresponding env // var (GCS_TESTS, AZURE_TESTS, QA_TENCENT_SECRET_KEY) is unset. func TestCleanBrokenRetention(t *testing.T) { - cases := []cleanBrokenRetentionCase{ + for _, tc := range []cleanBrokenRetentionCase{ s3CleanBrokenRetentionCase(), sftpCleanBrokenRetentionCase(), ftpCleanBrokenRetentionCase(), @@ -33,12 +35,10 @@ func TestCleanBrokenRetention(t *testing.T) { azblobCleanBrokenRetentionCase(), gcsRealCleanBrokenRetentionCase(), cosCleanBrokenRetentionCase(), - } - - for _, tc := range cases { + } { tc := tc t.Run(tc.name, func(t *testing.T) { - if tc.skip() { + if tc.skip != nil && tc.skip() { t.Skip(tc.skipReason) return } @@ -48,23 +48,24 @@ func TestCleanBrokenRetention(t *testing.T) { } // cleanBrokenRetentionCase wires one remote-storage backend to the shared scenario. -// plantOrphan, assertExists and assertGone are responsible for *physically* putting -// or observing a top-level entry under the given root path on that backend. +// plant/assertExists/assertGone are responsible for *physically* putting or observing +// a top-level entry under the given root path on that backend. type cleanBrokenRetentionCase struct { name string - storageType string configFile string - pathRoot string // top-level prefix used by plantOrphan to simulate path orphans - objRoot string // top-level prefix used by plantOrphan to simulate object-disk orphans + pathRoot string + objRoot string skip func() bool skipReason string setup func(env *TestEnvironment, r *require.Assertions) - plantOrphan func(env *TestEnvironment, r *require.Assertions, root, name string) - assertExists func(env *TestEnvironment, r *require.Assertions, root, name string) - assertGone func(env *TestEnvironment, r *require.Assertions, root, name string) - finalEmptyType string // value passed to env.checkObjectStorageIsEmpty at end (empty = skip) + plant orphanAction + assertExists orphanAction + assertGone orphanAction + finalEmptyType string // when non-empty, passed to env.checkObjectStorageIsEmpty at end } +type orphanAction func(env *TestEnvironment, r *require.Assertions, root, name string) + func runCleanBrokenRetentionScenario(t *testing.T, tc cleanBrokenRetentionCase) { env, r := NewTestEnvironment(t) env.connectWithWait(t, r, 0*time.Second, 1*time.Second, 1*time.Minute) @@ -75,7 +76,7 @@ func runCleanBrokenRetentionScenario(t *testing.T, tc cleanBrokenRetentionCase) tc.setup(env, r) } - tableName := fmt.Sprintf("default.clean_broken_retention_%s", strings.ToLower(tc.storageType)) + tableName := fmt.Sprintf("default.clean_broken_retention_%s", strings.ToLower(tc.name)) env.queryWithNoError(r, fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s(id UInt64) ENGINE=MergeTree() ORDER BY id", tableName)) env.queryWithNoError(r, fmt.Sprintf("INSERT INTO %s SELECT number FROM numbers(50)", tableName)) @@ -84,21 +85,18 @@ func runCleanBrokenRetentionScenario(t *testing.T, tc cleanBrokenRetentionCase) orphanPath := fmt.Sprintf("cbr_orphan_path_%d", suffix) orphanObj := fmt.Sprintf("cbr_orphan_obj_%d", suffix) orphanKept := fmt.Sprintf("cbr_orphan_keep_%d", suffix) - keepGlob := fmt.Sprintf("cbr_orphan_keep_*") log.Debug().Str("backend", tc.name).Msg("Create a live backup that must survive the cleanup") env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "create_remote", "--tables", tableName, keepBackup) log.Debug().Str("backend", tc.name).Msg("Plant orphans") - tc.plantOrphan(env, r, tc.pathRoot, orphanPath) - tc.plantOrphan(env, r, tc.objRoot, orphanObj) - tc.plantOrphan(env, r, tc.pathRoot, orphanKept) - tc.plantOrphan(env, r, tc.objRoot, orphanKept) - - tc.assertExists(env, r, tc.pathRoot, orphanPath) - tc.assertExists(env, r, tc.objRoot, orphanObj) - tc.assertExists(env, r, tc.pathRoot, orphanKept) - tc.assertExists(env, r, tc.objRoot, orphanKept) + for _, p := range []struct{ root, name string }{ + {tc.pathRoot, orphanPath}, {tc.objRoot, orphanObj}, + {tc.pathRoot, orphanKept}, {tc.objRoot, orphanKept}, + } { + tc.plant(env, r, p.root, p.name) + tc.assertExists(env, r, p.root, p.name) + } log.Debug().Str("backend", tc.name).Msg("Dry-run lists orphans but deletes nothing") dryRunOut, err := env.DockerExecOut("clickhouse-backup", "clickhouse-backup", "clean_broken_retention") @@ -112,7 +110,7 @@ func runCleanBrokenRetentionScenario(t *testing.T, tc cleanBrokenRetentionCase) tc.assertExists(env, r, tc.objRoot, orphanObj) log.Debug().Str("backend", tc.name).Msg("--commit with --keep glob deletes only unmatched orphans") - commitOut, err := env.DockerExecOut("clickhouse-backup", "clickhouse-backup", "clean_broken_retention", "--commit", "--keep="+keepGlob) + commitOut, err := env.DockerExecOut("clickhouse-backup", "clickhouse-backup", "clean_broken_retention", "--commit", "--keep="+cleanBrokenRetentionKeepGlob) r.NoError(err, "commit failed: %s", commitOut) r.Contains(commitOut, "clean_broken_retention: deleting") tc.assertGone(env, r, tc.pathRoot, orphanPath) @@ -138,277 +136,221 @@ func runCleanBrokenRetentionScenario(t *testing.T, tc cleanBrokenRetentionCase) } } -// ---- helpers for backends that map remote storage to a docker-container filesystem ---- - -func plantOnContainerFS(container string) func(env *TestEnvironment, r *require.Assertions, root, name string) { - return func(env *TestEnvironment, r *require.Assertions, root, name string) { +// containerFSCase builds a case for a backend that maps its remote storage to a +// path on the given docker container's filesystem. +func containerFSCase(name, configFile, container, pathRoot, objRoot, finalEmptyType string) cleanBrokenRetentionCase { + plant := func(env *TestEnvironment, r *require.Assertions, root, name string) { env.DockerExecNoError(r, container, "bash", "-c", fmt.Sprintf( "mkdir -p %s/%s/sub && echo garbage > %s/%s/data.bin && echo garbage > %s/%s/sub/nested.bin", root, name, root, name, root, name, )) } -} -func assertContainerFSExists(container string) func(env *TestEnvironment, r *require.Assertions, root, name string) { - return func(env *TestEnvironment, r *require.Assertions, root, name string) { + exists := func(env *TestEnvironment, r *require.Assertions, root, name string) { out, err := env.DockerExecOut(container, "ls", root+"/"+name) r.NoError(err, "expected %s/%s to exist on %s, output: %s", root, name, container, out) } -} -func assertContainerFSGone(container string) func(env *TestEnvironment, r *require.Assertions, root, name string) { - return func(env *TestEnvironment, r *require.Assertions, root, name string) { - out, err := env.DockerExecOut(container, "bash", "-c", fmt.Sprintf("ls %s/%s 2>/dev/null || true", root, name)) + gone := func(env *TestEnvironment, r *require.Assertions, root, name string) { + out, _ := env.DockerExecOut(container, "bash", "-c", fmt.Sprintf("ls %s/%s 2>/dev/null || true", root, name)) r.Empty(strings.TrimSpace(out), "expected %s/%s on %s to be removed, ls returned: %s", root, name, container, out) - _ = err + } + return cleanBrokenRetentionCase{ + name: name, + configFile: configFile, + pathRoot: pathRoot, + objRoot: objRoot, + plant: plant, + assertExists: exists, + assertGone: gone, + finalEmptyType: finalEmptyType, } } -// ---- per-backend case factories ---- +// dockerRunSh runs an ephemeral container on the integration test network with the +// given image, environment, and shell command. Returns combined stdout/stderr. +func dockerRunSh(env *TestEnvironment, image, sh string, envVars ...string) (string, error) { + args := []string{"run", "--rm", "--network", env.tc.networkName} + for _, e := range envVars { + args = append(args, "-e", e) + } + args = append(args, image, "sh", "-c", sh) + return utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", args...) +} func s3CleanBrokenRetentionCase() cleanBrokenRetentionCase { - return cleanBrokenRetentionCase{ - name: "S3", - storageType: "S3", - configFile: "config-s3.yml", - pathRoot: "/minio/data/clickhouse/backup/cluster/0", - objRoot: "/minio/data/clickhouse/object_disk/cluster/0", - skip: func() bool { return false }, - plantOrphan: plantOnContainerFS("minio"), - assertExists: assertContainerFSExists("minio"), - assertGone: assertContainerFSGone("minio"), - finalEmptyType: "S3", - } + return containerFSCase("S3", "config-s3.yml", "minio", + "/minio/data/clickhouse/backup/cluster/0", + "/minio/data/clickhouse/object_disk/cluster/0", + "S3") } func sftpCleanBrokenRetentionCase() cleanBrokenRetentionCase { - return cleanBrokenRetentionCase{ - name: "SFTP", - storageType: "SFTP", - configFile: "config-sftp-auth-key.yaml", - pathRoot: "/root", - objRoot: "/object_disk", - skip: func() bool { return false }, - setup: func(env *TestEnvironment, r *require.Assertions) { - env.uploadSSHKeys(r, "clickhouse-backup") - env.DockerExecNoError(r, "sshd", "mkdir", "-p", "/object_disk") - }, - plantOrphan: plantOnContainerFS("sshd"), - assertExists: assertContainerFSExists("sshd"), - assertGone: assertContainerFSGone("sshd"), - finalEmptyType: "", + tc := containerFSCase("SFTP", "config-sftp-auth-key.yaml", "sshd", "/root", "/object_disk", "") + tc.setup = func(env *TestEnvironment, r *require.Assertions) { + env.uploadSSHKeys(r, "clickhouse-backup") + env.DockerExecNoError(r, "sshd", "mkdir", "-p", "/object_disk") } + return tc } func ftpCleanBrokenRetentionCase() cleanBrokenRetentionCase { - homePrefix := "/home/test_backup" + home := "/home/test_backup" if isAdvancedMode() { - homePrefix = "/home/ftpusers/test_backup" + home = "/home/ftpusers/test_backup" } - return cleanBrokenRetentionCase{ - name: "FTP", - storageType: "FTP", - configFile: "config-ftp.yaml", - pathRoot: homePrefix + "/backup", - objRoot: homePrefix + "/object_disk", - skip: func() bool { return compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "21.8") <= 0 }, - skipReason: "FTP scenario only validated on ClickHouse > 21.8", - setup: func(env *TestEnvironment, r *require.Assertions) { - env.DockerExecNoError(r, "ftp", "sh", "-c", fmt.Sprintf("mkdir -p %s/backup %s/object_disk && chown -R test_backup:test_backup %s", homePrefix, homePrefix, homePrefix)) - }, - plantOrphan: plantOnContainerFS("ftp"), - assertExists: assertContainerFSExists("ftp"), - assertGone: assertContainerFSGone("ftp"), - finalEmptyType: "", + tc := containerFSCase("FTP", "config-ftp.yaml", "ftp", home+"/backup", home+"/object_disk", "") + tc.skip = func() bool { return compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "21.8") <= 0 } + tc.skipReason = "FTP scenario only validated on ClickHouse > 21.8" + tc.setup = func(env *TestEnvironment, r *require.Assertions) { + env.DockerExecNoError(r, "ftp", "sh", "-c", fmt.Sprintf("mkdir -p %s/backup %s/object_disk && chown -R test_backup:test_backup %s", home, home, home)) } + return tc } func gcsEmulatorCleanBrokenRetentionCase() cleanBrokenRetentionCase { - return cleanBrokenRetentionCase{ - name: "GCS_EMULATOR", - storageType: "GCS_EMULATOR", - configFile: "config-gcs-custom-endpoint.yml", - pathRoot: "/data/altinity-qa-test/backup/cluster/0", - objRoot: "/data/altinity-qa-test/object_disks/cluster/0", - skip: func() bool { return false }, - plantOrphan: plantOnContainerFS("gcs"), - assertExists: assertContainerFSExists("gcs"), - assertGone: assertContainerFSGone("gcs"), - finalEmptyType: "GCS_EMULATOR", - } + return containerFSCase("GCS_EMULATOR", "config-gcs-custom-endpoint.yml", "gcs", + "/data/altinity-qa-test/backup/cluster/0", + "/data/altinity-qa-test/object_disks/cluster/0", + "GCS_EMULATOR") } func gcsRealCleanBrokenRetentionCase() cleanBrokenRetentionCase { - // real GCS — uses gsutil from google/cloud-sdk:slim with credentials.json - // from the clickhouse-backup container (mounted via --volumes-from). const bucket = "altinity-qa-test" - gsutilRun := func(env *TestEnvironment, r *require.Assertions, sh string) (string, error) { - return utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", + const image = "google/cloud-sdk:slim" + // All gsutil invocations need the service account activated first. + const authPrefix = "gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS >/dev/null 2>&1 && " + gsutil := func(env *TestEnvironment, r *require.Assertions, sh string) string { + // --volumes-from gives us /etc/clickhouse-backup/credentials.json from the backup container. + args := []string{ "run", "--rm", "--network", env.tc.networkName, "--volumes-from", env.tc.GetContainerID("clickhouse-backup"), "-e", "GOOGLE_APPLICATION_CREDENTIALS=/etc/clickhouse-backup/credentials.json", - "google/cloud-sdk:slim", - "bash", "-c", sh, - ) + image, "bash", "-c", authPrefix + sh, + } + out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", args...) + r.NoError(err, "gsutil failed: %s", out) + return out } return cleanBrokenRetentionCase{ - name: "GCS", - storageType: "GCS", - configFile: "config-gcs.yml", - pathRoot: "backup/cluster/0", - objRoot: "object_disks/cluster/0", - skip: func() bool { return isTestShouldSkip("GCS_TESTS") }, - skipReason: "Skipping GCS integration tests (GCS_TESTS not set)", + name: "GCS", + configFile: "config-gcs.yml", + pathRoot: "backup/cluster/0", + objRoot: "object_disks/cluster/0", + skip: func() bool { return isTestShouldSkip("GCS_TESTS") }, + skipReason: "Skipping GCS integration tests (GCS_TESTS not set)", setup: func(env *TestEnvironment, r *require.Assertions) { - env.tc.pullImageIfNeeded(context.Background(), "google/cloud-sdk:slim") + env.tc.pullImageIfNeeded(context.Background(), image) }, - plantOrphan: func(env *TestEnvironment, r *require.Assertions, root, name string) { - objectPath := root + "/" + name - sh := fmt.Sprintf( - "gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS >/dev/null 2>&1 && "+ - "echo garbage > /tmp/data.bin && "+ - "gsutil -q cp /tmp/data.bin gs://%s/%s/data.bin && "+ - "gsutil -q cp /tmp/data.bin gs://%s/%s/sub/nested.bin", - bucket, objectPath, bucket, objectPath, - ) - out, err := gsutilRun(env, r, sh) - r.NoError(err, "gcs plantOrphan failed: %s", out) + plant: func(env *TestEnvironment, r *require.Assertions, root, name string) { + obj := root + "/" + name + gsutil(env, r, fmt.Sprintf( + "echo garbage > /tmp/data.bin && gsutil -q cp /tmp/data.bin gs://%s/%s/data.bin && gsutil -q cp /tmp/data.bin gs://%s/%s/sub/nested.bin", + bucket, obj, bucket, obj)) }, assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) { - objectPath := root + "/" + name - sh := fmt.Sprintf("gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS >/dev/null 2>&1 && gsutil ls gs://%s/%s/", bucket, objectPath) - out, err := gsutilRun(env, r, sh) - r.NoError(err, "gcs assertExists failed for %s: %s", objectPath, out) - r.Contains(out, "gs://"+bucket+"/"+objectPath+"/", "expected listing to contain %s", objectPath) + obj := root + "/" + name + out := gsutil(env, r, fmt.Sprintf("gsutil ls gs://%s/%s/", bucket, obj)) + r.Contains(out, "gs://"+bucket+"/"+obj+"/", "expected listing to contain %s", obj) }, assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) { - objectPath := root + "/" + name - sh := fmt.Sprintf("gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS >/dev/null 2>&1 && gsutil ls gs://%s/%s/** 2>&1 || true", bucket, objectPath) - out, _ := gsutilRun(env, r, sh) - r.NotContains(out, "gs://"+bucket+"/"+objectPath, "expected no blobs under gs://%s/%s, got: %s", bucket, objectPath, out) + obj := root + "/" + name + out := gsutil(env, r, fmt.Sprintf("gsutil ls gs://%s/%s/** 2>&1 || true", bucket, obj)) + r.NotContains(out, "gs://"+bucket+"/"+obj, "expected no blobs under gs://%s/%s, got: %s", bucket, obj, out) }, - finalEmptyType: "", } } func cosCleanBrokenRetentionCase() cleanBrokenRetentionCase { - // COS supports the S3 API on its regional endpoint; we use aws-cli docker image - // with secret_id/secret_key as AWS credentials and the S3 endpoint pointing at COS. + // COS exposes an S3-compatible API on its regional endpoint. const bucket = "clickhouse-backup-1336113806" - const region = "na-ashburn" const endpoint = "https://cos.na-ashburn.myqcloud.com" - awsRun := func(env *TestEnvironment, r *require.Assertions, sh string) (string, error) { - return utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", + const image = "amazon/aws-cli:latest" + awsRun := func(env *TestEnvironment, r *require.Assertions, sh string) string { + // --entrypoint sh overrides aws-cli's default `aws` entrypoint. + out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", "run", "--rm", "--network", env.tc.networkName, "-e", "AWS_ACCESS_KEY_ID="+os.Getenv("QA_TENCENT_SECRET_ID"), "-e", "AWS_SECRET_ACCESS_KEY="+os.Getenv("QA_TENCENT_SECRET_KEY"), - "-e", "AWS_DEFAULT_REGION="+region, - "--entrypoint", "sh", - "amazon/aws-cli:latest", - "-c", sh, - ) + "-e", "AWS_DEFAULT_REGION=na-ashburn", + "--entrypoint", "sh", image, "-c", sh) + r.NoError(err, "aws-cli failed: %s", out) + return out } return cleanBrokenRetentionCase{ - name: "COS", - storageType: "COS", - configFile: "config-cos.yml", - pathRoot: "backup/cluster/0", - objRoot: "object_disk/cluster/0", + name: "COS", + configFile: "config-cos.yml", + pathRoot: "backup/cluster/0", + objRoot: "object_disk/cluster/0", skip: func() bool { return os.Getenv("QA_TENCENT_SECRET_KEY") == "" || os.Getenv("QA_TENCENT_SECRET_ID") == "" }, skipReason: "Skipping COS integration tests (QA_TENCENT_SECRET_ID / QA_TENCENT_SECRET_KEY not set)", setup: func(env *TestEnvironment, r *require.Assertions) { - env.tc.pullImageIfNeeded(context.Background(), "amazon/aws-cli:latest") + env.tc.pullImageIfNeeded(context.Background(), image) env.InstallDebIfNotExists(r, "clickhouse-backup", "gettext-base") - // config.yml was copied raw and still has ${QA_TENCENT_SECRET_*} placeholders — render in place. - env.DockerExecNoError(r, "clickhouse-backup", "bash", "-xec", "envsubst < /etc/clickhouse-backup/config.yml > /etc/clickhouse-backup/config.yml.rendered && mv /etc/clickhouse-backup/config.yml.rendered /etc/clickhouse-backup/config.yml") + // config.yml was copied raw and still has ${QA_TENCENT_SECRET_*} placeholders. + env.DockerExecNoError(r, "clickhouse-backup", "bash", "-xec", + "envsubst < /etc/clickhouse-backup/config.yml > /tmp/c.yml && mv /tmp/c.yml /etc/clickhouse-backup/config.yml") }, - plantOrphan: func(env *TestEnvironment, r *require.Assertions, root, name string) { - objectPath := root + "/" + name - sh := fmt.Sprintf( - "echo garbage > /tmp/data.bin && "+ - "aws --endpoint-url=%s s3 cp /tmp/data.bin s3://%s/%s/data.bin >/dev/null && "+ - "aws --endpoint-url=%s s3 cp /tmp/data.bin s3://%s/%s/sub/nested.bin >/dev/null", - endpoint, bucket, objectPath, endpoint, bucket, objectPath, - ) - out, err := awsRun(env, r, sh) - r.NoError(err, "cos plantOrphan failed: %s", out) + plant: func(env *TestEnvironment, r *require.Assertions, root, name string) { + obj := root + "/" + name + awsRun(env, r, fmt.Sprintf( + "echo garbage > /tmp/data.bin && aws --endpoint-url=%s s3 cp /tmp/data.bin s3://%s/%s/data.bin >/dev/null && aws --endpoint-url=%s s3 cp /tmp/data.bin s3://%s/%s/sub/nested.bin >/dev/null", + endpoint, bucket, obj, endpoint, bucket, obj)) }, assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) { - objectPath := root + "/" + name - sh := fmt.Sprintf("aws --endpoint-url=%s s3 ls s3://%s/%s/", endpoint, bucket, objectPath) - out, err := awsRun(env, r, sh) - r.NoError(err, "cos assertExists failed for %s: %s", objectPath, out) - r.Contains(out, "data.bin", "expected data.bin under %s, got: %s", objectPath, out) + obj := root + "/" + name + out := awsRun(env, r, fmt.Sprintf("aws --endpoint-url=%s s3 ls s3://%s/%s/", endpoint, bucket, obj)) + r.Contains(out, "data.bin", "expected data.bin under %s, got: %s", obj, out) }, assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) { - objectPath := root + "/" + name - sh := fmt.Sprintf("aws --endpoint-url=%s s3 ls s3://%s/%s/ 2>&1 || true", endpoint, bucket, objectPath) - out, _ := awsRun(env, r, sh) - r.NotContains(out, "data.bin", "expected no blobs under s3://%s/%s, got: %s", bucket, objectPath, out) - r.NotContains(out, "nested.bin", "expected no blobs under s3://%s/%s, got: %s", bucket, objectPath, out) + obj := root + "/" + name + out := awsRun(env, r, fmt.Sprintf("aws --endpoint-url=%s s3 ls s3://%s/%s/ 2>&1 || true", endpoint, bucket, obj)) + r.NotContains(out, "data.bin", "expected no blobs under s3://%s/%s, got: %s", bucket, obj, out) + r.NotContains(out, "nested.bin", "expected no blobs under s3://%s/%s, got: %s", bucket, obj, out) }, - finalEmptyType: "", } } func azblobCleanBrokenRetentionCase() cleanBrokenRetentionCase { - const account = "devstoreaccount1" - const accountKey = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" const container = "container1" - connectionString := fmt.Sprintf("DefaultEndpointsProtocol=http;AccountName=%s;AccountKey=%s;BlobEndpoint=http://devstoreaccount1.blob.azure:10000/devstoreaccount1;", account, accountKey) + const connectionString = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://devstoreaccount1.blob.azure:10000/devstoreaccount1;" + const image = "mcr.microsoft.com/azure-cli:latest" + azEnv := "AZURE_STORAGE_CONNECTION_STRING=" + connectionString - plant := func(env *TestEnvironment, r *require.Assertions, root, name string) { - blobPath := strings.TrimPrefix(root+"/"+name, "/") - cmd := []string{ - "run", "--rm", "--network", env.tc.networkName, - "-e", "AZURE_STORAGE_CONNECTION_STRING=" + connectionString, - "mcr.microsoft.com/azure-cli:latest", - "sh", "-c", - fmt.Sprintf("echo garbage > /tmp/data.bin && az storage blob upload --container-name %s --name %s/data.bin --file /tmp/data.bin --overwrite >/dev/null && az storage blob upload --container-name %s --name %s/sub/nested.bin --file /tmp/data.bin --overwrite >/dev/null", - container, blobPath, container, blobPath), - } - out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", cmd...) - r.NoError(err, "azblob plantOrphan failed: %s", out) - } - exists := func(env *TestEnvironment, r *require.Assertions, root, name string) { - blobPath := strings.TrimPrefix(root+"/"+name, "/") - out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", - "run", "--rm", "--network", env.tc.networkName, - "-e", "AZURE_STORAGE_CONNECTION_STRING="+connectionString, - "mcr.microsoft.com/azure-cli:latest", - "sh", "-c", - fmt.Sprintf("az storage blob list --container-name %s --prefix %s/ --num-results 1 --query '[].name' -o tsv", container, blobPath), - ) + azList := func(env *TestEnvironment, r *require.Assertions, prefix string) string { + out, err := dockerRunSh(env, image, + fmt.Sprintf("az storage blob list --container-name %s --prefix %s/ --num-results 1 --query '[].name' -o tsv", container, prefix), + azEnv) r.NoError(err, "azblob list failed: %s", out) - r.NotEmpty(strings.TrimSpace(out), "expected blobs under %s/, got empty list", blobPath) - } - gone := func(env *TestEnvironment, r *require.Assertions, root, name string) { - blobPath := strings.TrimPrefix(root+"/"+name, "/") - out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", - "run", "--rm", "--network", env.tc.networkName, - "-e", "AZURE_STORAGE_CONNECTION_STRING="+connectionString, - "mcr.microsoft.com/azure-cli:latest", - "sh", "-c", - fmt.Sprintf("az storage blob list --container-name %s --prefix %s/ --num-results 1 --query '[].name' -o tsv", container, blobPath), - ) - r.NoError(err, "azblob list failed: %s", out) - r.Empty(strings.TrimSpace(out), "expected no blobs under %s/, got: %s", blobPath, out) + return strings.TrimSpace(out) } + blobPath := func(root, name string) string { return strings.TrimPrefix(root+"/"+name, "/") } + return cleanBrokenRetentionCase{ - name: "AZBLOB", - storageType: "AZBLOB", - configFile: "config-azblob.yml", - // per config-azblob.yml: path=backup, object_disk_path=object_disks (no macros) - pathRoot: "backup", - objRoot: "object_disks", - skip: func() bool { return isTestShouldSkip("AZURE_TESTS") }, - skipReason: "Skipping AZBLOB integration tests (AZURE_TESTS not set)", + name: "AZBLOB", + configFile: "config-azblob.yml", + // config-azblob.yml: path=backup, object_disk_path=object_disks (no macros). + pathRoot: "backup", + objRoot: "object_disks", + skip: func() bool { return isTestShouldSkip("AZURE_TESTS") }, + skipReason: "Skipping AZBLOB integration tests (AZURE_TESTS not set)", setup: func(env *TestEnvironment, r *require.Assertions) { - env.tc.pullImageIfNeeded(context.Background(), "mcr.microsoft.com/azure-cli:latest") + env.tc.pullImageIfNeeded(context.Background(), image) + }, + plant: func(env *TestEnvironment, r *require.Assertions, root, name string) { + p := blobPath(root, name) + out, err := dockerRunSh(env, image, + fmt.Sprintf("echo garbage > /tmp/data.bin && az storage blob upload --container-name %s --name %s/data.bin --file /tmp/data.bin --overwrite >/dev/null && az storage blob upload --container-name %s --name %s/sub/nested.bin --file /tmp/data.bin --overwrite >/dev/null", + container, p, container, p), + azEnv) + r.NoError(err, "azblob plant failed: %s", out) + }, + assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) { + r.NotEmpty(azList(env, r, blobPath(root, name)), "expected blobs under %s/", blobPath(root, name)) + }, + assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) { + r.Empty(azList(env, r, blobPath(root, name)), "expected no blobs under %s/", blobPath(root, name)) }, - plantOrphan: plant, - assertExists: exists, - assertGone: gone, - finalEmptyType: "", } } + From df2be732ef76a186152f4b62e8ea17918c16b202 Mon Sep 17 00:00:00 2001 From: slach Date: Thu, 14 May 2026 00:09:26 +0500 Subject: [PATCH 6/9] fix cli.snapshot Signed-off-by: slach --- .../clickhouse_backup/tests/snapshots/cli.py.cli.snapshot | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot b/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot index 2aff71a8..1ecc27aa 100644 --- a/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot +++ b/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot @@ -1,6 +1,6 @@ default_config = r"""'[\'general:\', \' remote_storage: none\', \' backups_to_keep_local: 0\', \' backups_to_keep_remote: 0\', \' log_level: info\', \' allow_empty_backups: false\', \' allow_object_disk_streaming: false\', \' use_resumable_state: true\', \' restore_schema_on_cluster: ""\', \' upload_by_part: true\', \' download_by_part: true\', \' restore_database_mapping: {}\', \' restore_table_mapping: {}\', \' retries_on_failure: 3\', \' retries_pause: 5s\', \' retries_jitter: 0\', \' watch_interval: 1h\', \' full_interval: 24h\', \' watch_backup_name_template: shard{shard}-{type}-{time:20060102150405}\', \' sharded_operation_mode: ""\', \' cpu_nice_priority: 15\', \' io_nice_priority: idle\', \' rbac_backup_always: true\', \' rbac_conflict_resolution: recreate\', \' config_backup_always: false\', \' named_collections_backup_always: false\', \' delete_batch_size: 1000\', \' retriesduration: 5s\', \' watchduration: 1h0m0s\', \' fullduration: 24h0m0s\', \'clickhouse:\', \' username: default\', \' password: ""\', \' host: localhost\', \' port: 9000\', \' disk_mapping: {}\', \' skip_tables:\', \' - system.*\', \' - INFORMATION_SCHEMA.*\', \' - information_schema.*\', \' - _temporary_and_external_tables.*\', \' skip_table_engines: []\', \' skip_disks: []\', \' skip_disk_types: []\', \' timeout: 30m\', \' freeze_by_part: false\', \' freeze_by_part_where: ""\', \' use_embedded_backup_restore: false\', \' use_embedded_backup_restore_cluster: ""\', \' embedded_backup_disk: ""\', \' backup_mutations: true\', \' restore_as_attach: false\', \' restore_distributed_cluster: ""\', \' check_parts_columns: true\', \' secure: false\', \' skip_verify: false\', \' sync_replicated_tables: false\', \' log_sql_queries: true\', \' config_dir: /etc/clickhouse-server/\', \' restart_command: exec:systemctl restart clickhouse-server\', \' ignore_not_exists_error_during_freeze: true\', \' check_replicas_before_attach: true\', \' default_replica_path: /clickhouse/tables/{cluster}/{shard}/{database}/{table}\', " default_replica_name: \'{replica}\'", \' tls_key: ""\', \' tls_cert: ""\', \' tls_ca: ""\', \' debug: false\', \' force_rebalance: false\', \'s3:\', \' access_key: ""\', \' secret_key: ""\', \' bucket: ""\', \' endpoint: ""\', \' region: us-east-1\', \' acl: private\', \' assume_role_arn: ""\', \' force_path_style: false\', \' path: ""\', \' object_disk_path: ""\', \' disable_ssl: false\', \' compression_level: 1\', \' compression_format: tar\', \' sse: ""\', \' sse_kms_key_id: ""\', \' sse_customer_algorithm: ""\', \' sse_customer_key: ""\', \' sse_customer_key_md5: ""\', \' sse_kms_encryption_context: ""\', \' disable_cert_verification: false\', \' use_custom_storage_class: false\', \' storage_class: STANDARD\', \' custom_storage_class_map: {}\', \' allow_multipart_download: false\', \' object_labels: {}\', \' request_payer: ""\', \' check_sum_algorithm: ""\', \' request_content_md5: false\', \' retry_mode: standard\', \' chunk_size: 5242880\', \' debug: false\', \'gcs:\', \' credentials_file: ""\', \' credentials_json: ""\', \' credentials_json_encoded: ""\', \' sa_email: ""\', \' embedded_access_key: ""\', \' embedded_secret_key: ""\', \' skip_credentials: false\', \' bucket: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_level: 1\', \' compression_format: tar\', \' debug: false\', \' force_http: false\', \' endpoint: ""\', \' storage_class: STANDARD\', \' object_labels: {}\', \' custom_storage_class_map: {}\', \' chunk_size: 16777216\', \' encryption_key: ""\', \'cos:\', \' url: ""\', \' timeout: 2m\', \' secret_id: ""\', \' secret_key: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' allow_multipart_download: false\', \' debug: false\', \'api:\', \' listen: localhost:7171\', \' enable_metrics: true\', \' enable_pprof: false\', \' username: ""\', \' password: ""\', \' secure: false\', \' certificate_file: ""\', \' private_key_file: ""\', \' ca_cert_file: ""\', \' ca_key_file: ""\', \' create_integration_tables: false\', \' integration_tables_host: ""\', \' allow_parallel: false\', \' complete_resumable_after_restart: true\', \' watch_is_main_process: false\', \'ftp:\', \' address: ""\', \' timeout: 2m\', \' username: ""\', \' password: ""\', \' tls: false\', \' skip_tls_verify: false\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'sftp:\', \' address: ""\', \' port: 22\', \' username: ""\', \' password: ""\', \' key: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'azblob:\', \' endpoint_schema: https\', \' endpoint_suffix: core.windows.net\', \' account_name: ""\', \' account_key: ""\', \' sas: ""\', \' use_managed_identity: false\', \' container: ""\', \' assume_container_exists: false\', \' path: ""\', \' object_disk_path: ""\', \' compression_level: 1\', \' compression_format: tar\', \' sse_key: ""\', \' buffer_count: 3\', \' timeout: 4h\', \' debug: false\', \'custom:\', \' upload_command: ""\', \' download_command: ""\', \' list_command: ""\', \' delete_command: ""\', \' command_timeout: 4h\', \' commandtimeoutduration: 4h0m0s\']'""" -help_flag = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'""" +help_flag = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.
] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n clean_broken_retention Remove orphan entries under remote `path` and `object_disks_path` that are not in the live backup list\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'""" -cli_usage = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.
] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'""" +cli_usage = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.
] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n clean_broken_retention Remove orphan entries under remote `path` and `object_disks_path` that are not in the live backup list\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'""" From 8ee342e780e8ce03caf29e80a187fec286fb1036 Mon Sep 17 00:00:00 2001 From: slach Date: Thu, 14 May 2026 00:46:57 +0500 Subject: [PATCH 7/9] fix(clean_broken_retention): force metadata parse + handle leading slash in Walk names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The command previously left orphans untouched because: 1. `BackupList(ctx, false, ...)` uses the on-disk metadata cache; on the second invocation (e.g. dry-run → --commit) the cache returns orphan directories with Broken="", so my Broken!="" filter let them into the keep-set and zero orphans were detected. Now passes parseMetadata=true so every top-level entry is stat'd for metadata.json on each call. 2. `bd.Walk("/", false, …)` emits names with a leading slash on S3 (from TrimPrefix mismatch); the previous `strings.Contains(name, "/")` filter rejected them as nested. Switched to strings.Trim("/") so leading and trailing slashes are both stripped before the top-level check. test/integration: - Split TestCleanBrokenRetention into one top-level test per backend so each can be run independently via RUN_TESTS=TestCleanBrokenRetention. - S3 case now plants orphans via 'mc cp' instead of raw file writes — MinIO only sees objects placed through its S3 API; direct files on disk are invisible to ListObjectsV2 and the cleanup batch finds nothing. --- pkg/backup/delete.go | 15 +++- test/integration/cleanBrokenRetention_test.go | 88 +++++++++++++------ 2 files changed, 75 insertions(+), 28 deletions(-) diff --git a/pkg/backup/delete.go b/pkg/backup/delete.go index 54265e78..6901bf6f 100644 --- a/pkg/backup/delete.go +++ b/pkg/backup/delete.go @@ -600,13 +600,21 @@ func (b *Backuper) CleanBrokenRetention(commandId int, keepGlobs []string, commi }() b.dst = bd - backupList, err := bd.BackupList(ctx, false, "") + // parseMetadata=true forces a metadata.json stat for every top-level entry, so that + // orphan dirs without metadata.json are returned with Broken!="" and excluded from the + // keep-set below. Otherwise the metadata cache from a prior run would mask them. + backupList, err := bd.BackupList(ctx, true, "") if err != nil { return errors.WithMessage(err, "bd.BackupList") } keepNames := make(map[string]struct{}, len(backupList)) + liveCount := 0 for _, backup := range backupList { + if backup.Broken != "" { + continue + } keepNames[backup.BackupName] = struct{}{} + liveCount++ } isKept := func(name string) bool { if _, ok := keepNames[name]; ok { @@ -624,7 +632,7 @@ func (b *Backuper) CleanBrokenRetention(commandId int, keepGlobs []string, commi if commit { mode = "commit" } - log.Info().Msgf("clean_broken_retention: mode=%s, %d live backups, %d keep-globs", mode, len(backupList), len(keepGlobs)) + log.Info().Msgf("clean_broken_retention: mode=%s, %d live backups (of %d in remote list), %d keep-globs", mode, liveCount, len(backupList), len(keepGlobs)) orphansInPath, err := b.findOrphanTopLevelNames(ctx, bd, "/", isKept) if err != nil { @@ -673,7 +681,8 @@ func (b *Backuper) CleanBrokenRetention(commandId int, keepGlobs []string, commi func (b *Backuper) findOrphanTopLevelNames(ctx context.Context, bd *storage.BackupDestination, rootPath string, isKept func(string) bool) ([]string, error) { seen := make(map[string]struct{}) walkFn := func(_ context.Context, f storage.RemoteFile) error { - name := strings.TrimSuffix(f.Name(), "/") + // Walk("/", false) emits names that may have a leading "/" (S3) and/or trailing "/" (CommonPrefix). + name := strings.Trim(f.Name(), "/") if name == "" || strings.Contains(name, "/") { return nil } diff --git a/test/integration/cleanBrokenRetention_test.go b/test/integration/cleanBrokenRetention_test.go index 1abcb6b7..fae2d72e 100644 --- a/test/integration/cleanBrokenRetention_test.go +++ b/test/integration/cleanBrokenRetention_test.go @@ -18,33 +18,44 @@ import ( const cleanBrokenRetentionKeepGlob = "cbr_orphan_keep_*" -// TestCleanBrokenRetention verifies that `clean_broken_retention`: +// Each TestCleanBrokenRetention* function verifies that `clean_broken_retention`: // - lists orphans (dry-run) without deleting, // - on --commit removes orphans from both `path` and `object_disks_path`, // - preserves the live backup and entries matched by --keep globs. // -// The body of the test is shared across all supported remote-storage backends. +// Each backend is its own top-level test so they can be run independently +// (e.g. `RUN_TESTS=TestCleanBrokenRetentionS3 ./test/integration/run.sh`). // Backends that need cloud credentials skip themselves when the corresponding env -// var (GCS_TESTS, AZURE_TESTS, QA_TENCENT_SECRET_KEY) is unset. -func TestCleanBrokenRetention(t *testing.T) { - for _, tc := range []cleanBrokenRetentionCase{ - s3CleanBrokenRetentionCase(), - sftpCleanBrokenRetentionCase(), - ftpCleanBrokenRetentionCase(), - gcsEmulatorCleanBrokenRetentionCase(), - azblobCleanBrokenRetentionCase(), - gcsRealCleanBrokenRetentionCase(), - cosCleanBrokenRetentionCase(), - } { - tc := tc - t.Run(tc.name, func(t *testing.T) { - if tc.skip != nil && tc.skip() { - t.Skip(tc.skipReason) - return - } - runCleanBrokenRetentionScenario(t, tc) - }) +// var (GCS_TESTS, AZURE_TESTS, QA_TENCENT_SECRET_KEY/QA_TENCENT_SECRET_ID) is unset. + +func TestCleanBrokenRetentionS3(t *testing.T) { + runCleanBrokenRetentionCase(t, s3CleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionSFTP(t *testing.T) { + runCleanBrokenRetentionCase(t, sftpCleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionFTP(t *testing.T) { + runCleanBrokenRetentionCase(t, ftpCleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionGCSEmulator(t *testing.T) { + runCleanBrokenRetentionCase(t, gcsEmulatorCleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionAZBLOB(t *testing.T) { + runCleanBrokenRetentionCase(t, azblobCleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionGCS(t *testing.T) { + runCleanBrokenRetentionCase(t, gcsRealCleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionCOS(t *testing.T) { + runCleanBrokenRetentionCase(t, cosCleanBrokenRetentionCase()) +} + +func runCleanBrokenRetentionCase(t *testing.T, tc cleanBrokenRetentionCase) { + if tc.skip != nil && tc.skip() { + t.Skip(tc.skipReason) + return } + runCleanBrokenRetentionScenario(t, tc) } // cleanBrokenRetentionCase wires one remote-storage backend to the shared scenario. @@ -177,10 +188,37 @@ func dockerRunSh(env *TestEnvironment, image, sh string, envVars ...string) (str } func s3CleanBrokenRetentionCase() cleanBrokenRetentionCase { - return containerFSCase("S3", "config-s3.yml", "minio", - "/minio/data/clickhouse/backup/cluster/0", - "/minio/data/clickhouse/object_disk/cluster/0", - "S3") + // Plant via `mc cp` instead of direct FS writes — MinIO ignores raw files on disk and + // only sees objects that went through its S3 API. + const mcAliasCmd = "mc alias set local https://localhost:9000 access_key it_is_my_super_secret_key >/dev/null 2>&1" + const bucketPath = "local/clickhouse/backup/cluster/0" + const objBucketPath = "local/clickhouse/object_disk/cluster/0" + plant := func(env *TestEnvironment, r *require.Assertions, root, name string) { + env.DockerExecNoError(r, "minio", "bash", "-c", fmt.Sprintf( + "%s && echo garbage > /tmp/data.bin && mc cp /tmp/data.bin %s/%s/data.bin >/dev/null && mc cp /tmp/data.bin %s/%s/sub/nested.bin >/dev/null", + mcAliasCmd, root, name, root, name, + )) + } + exists := func(env *TestEnvironment, r *require.Assertions, root, name string) { + out, err := env.DockerExecOut("minio", "bash", "-c", fmt.Sprintf("%s && mc ls %s/%s/", mcAliasCmd, root, name)) + r.NoError(err, "mc ls failed: %s", out) + r.Contains(out, "data.bin", "expected data.bin under %s/%s, got: %s", root, name, out) + } + gone := func(env *TestEnvironment, r *require.Assertions, root, name string) { + out, _ := env.DockerExecOut("minio", "bash", "-c", fmt.Sprintf("%s && mc ls -r %s/%s/ 2>&1 || true", mcAliasCmd, root, name)) + r.NotContains(out, "data.bin", "expected no objects under %s/%s, got: %s", root, name, out) + r.NotContains(out, "nested.bin", "expected no objects under %s/%s, got: %s", root, name, out) + } + return cleanBrokenRetentionCase{ + name: "S3", + configFile: "config-s3.yml", + pathRoot: bucketPath, + objRoot: objBucketPath, + plant: plant, + assertExists: exists, + assertGone: gone, + finalEmptyType: "S3", + } } func sftpCleanBrokenRetentionCase() cleanBrokenRetentionCase { From 6898973380e00069fb3b839862e1cf8bc11c0b5d Mon Sep 17 00:00:00 2001 From: slach Date: Thu, 14 May 2026 08:26:52 +0500 Subject: [PATCH 8/9] fix TestBrokenRetention Signed-off-by: slach --- pkg/backup/delete.go | 5 +++++ test/integration/cleanBrokenRetention_test.go | 15 ++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/pkg/backup/delete.go b/pkg/backup/delete.go index 6901bf6f..b5856d80 100644 --- a/pkg/backup/delete.go +++ b/pkg/backup/delete.go @@ -686,6 +686,11 @@ func (b *Backuper) findOrphanTopLevelNames(ctx context.Context, bd *storage.Back if name == "" || strings.Contains(name, "/") { return nil } + // Skip hidden/dotfile entries — clickhouse-backup never produces names starting with ".". + // Protects system dirs like /root/.ssh on filesystem-backed remotes (SFTP/FTP). + if strings.HasPrefix(name, ".") { + return nil + } if isKept(name) { return nil } diff --git a/test/integration/cleanBrokenRetention_test.go b/test/integration/cleanBrokenRetention_test.go index fae2d72e..2799a48e 100644 --- a/test/integration/cleanBrokenRetention_test.go +++ b/test/integration/cleanBrokenRetention_test.go @@ -151,7 +151,7 @@ func runCleanBrokenRetentionScenario(t *testing.T, tc cleanBrokenRetentionCase) // path on the given docker container's filesystem. func containerFSCase(name, configFile, container, pathRoot, objRoot, finalEmptyType string) cleanBrokenRetentionCase { plant := func(env *TestEnvironment, r *require.Assertions, root, name string) { - env.DockerExecNoError(r, container, "bash", "-c", fmt.Sprintf( + env.DockerExecNoError(r, container, "sh", "-c", fmt.Sprintf( "mkdir -p %s/%s/sub && echo garbage > %s/%s/data.bin && echo garbage > %s/%s/sub/nested.bin", root, name, root, name, root, name, )) @@ -161,7 +161,7 @@ func containerFSCase(name, configFile, container, pathRoot, objRoot, finalEmptyT r.NoError(err, "expected %s/%s to exist on %s, output: %s", root, name, container, out) } gone := func(env *TestEnvironment, r *require.Assertions, root, name string) { - out, _ := env.DockerExecOut(container, "bash", "-c", fmt.Sprintf("ls %s/%s 2>/dev/null || true", root, name)) + out, _ := env.DockerExecOut(container, "sh", "-c", fmt.Sprintf("ls %s/%s 2>/dev/null || true", root, name)) r.Empty(strings.TrimSpace(out), "expected %s/%s on %s to be removed, ls returned: %s", root, name, container, out) } return cleanBrokenRetentionCase{ @@ -239,7 +239,8 @@ func ftpCleanBrokenRetentionCase() cleanBrokenRetentionCase { tc.skip = func() bool { return compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "21.8") <= 0 } tc.skipReason = "FTP scenario only validated on ClickHouse > 21.8" tc.setup = func(env *TestEnvironment, r *require.Assertions) { - env.DockerExecNoError(r, "ftp", "sh", "-c", fmt.Sprintf("mkdir -p %s/backup %s/object_disk && chown -R test_backup:test_backup %s", home, home, home)) + // proftpd/vsftpd containers don't create `test_backup` as a system user; uid 1000 owns the home dir. + env.DockerExecNoError(r, "ftp", "sh", "-c", fmt.Sprintf("mkdir -p %s/backup %s/object_disk && chown -R 1000:1000 %s && chmod -R 0777 %s", home, home, home, home)) } return tc } @@ -302,6 +303,8 @@ func cosCleanBrokenRetentionCase() cleanBrokenRetentionCase { const bucket = "clickhouse-backup-1336113806" const endpoint = "https://cos.na-ashburn.myqcloud.com" const image = "amazon/aws-cli:latest" + // Tencent COS rejects path-style addressing (PathStyleDomainForbidden); force virtual-hosted style. + const awsPrefix = "aws configure set default.s3.addressing_style virtual >/dev/null && " awsRun := func(env *TestEnvironment, r *require.Assertions, sh string) string { // --entrypoint sh overrides aws-cli's default `aws` entrypoint. out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", @@ -309,7 +312,7 @@ func cosCleanBrokenRetentionCase() cleanBrokenRetentionCase { "-e", "AWS_ACCESS_KEY_ID="+os.Getenv("QA_TENCENT_SECRET_ID"), "-e", "AWS_SECRET_ACCESS_KEY="+os.Getenv("QA_TENCENT_SECRET_KEY"), "-e", "AWS_DEFAULT_REGION=na-ashburn", - "--entrypoint", "sh", image, "-c", sh) + "--entrypoint", "sh", image, "-c", awsPrefix+sh) r.NoError(err, "aws-cli failed: %s", out) return out } @@ -352,7 +355,9 @@ func cosCleanBrokenRetentionCase() cleanBrokenRetentionCase { func azblobCleanBrokenRetentionCase() cleanBrokenRetentionCase { const container = "container1" const connectionString = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://devstoreaccount1.blob.azure:10000/devstoreaccount1;" - const image = "mcr.microsoft.com/azure-cli:latest" + // Pinned: azure-cli :latest ships an SDK whose REST API version is newer than + // what current Azurite supports ("API version 2026-02-06 is not supported by Azurite"). + const image = "mcr.microsoft.com/azure-cli:2.65.0" azEnv := "AZURE_STORAGE_CONNECTION_STRING=" + connectionString azList := func(env *TestEnvironment, r *require.Assertions, prefix string) string { From f69ec03e77861562ff4d395df01c98815f2a0db4 Mon Sep 17 00:00:00 2001 From: slach Date: Fri, 15 May 2026 11:24:23 +0500 Subject: [PATCH 9/9] actualize cli snapshot for testflows Signed-off-by: slach --- .../clickhouse_backup/tests/snapshots/cli.py.cli.snapshot | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot b/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot index e7762e84..518413f1 100644 --- a/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot +++ b/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot @@ -1,6 +1,6 @@ default_config = r"""'[\'general:\', \' remote_storage: none\', \' backups_to_keep_local: 0\', \' backups_to_keep_remote: 0\', \' log_level: info\', \' allow_empty_backups: false\', \' allow_object_disk_streaming: false\', \' use_resumable_state: true\', \' restore_schema_on_cluster: ""\', \' upload_by_part: true\', \' download_by_part: true\', \' restore_database_mapping: {}\', \' restore_table_mapping: {}\', \' retries_on_failure: 3\', \' retries_pause: 5s\', \' retries_jitter: 0\', \' watch_interval: 1h\', \' full_interval: 24h\', \' watch_backup_name_template: shard{shard}-{type}-{time:20060102150405}\', \' sharded_operation_mode: ""\', \' cpu_nice_priority: 15\', \' io_nice_priority: idle\', \' rbac_backup_always: true\', \' rbac_conflict_resolution: recreate\', \' config_backup_always: false\', \' named_collections_backup_always: false\', \' delete_batch_size: 1000\', \' retriesduration: 5s\', \' watchduration: 1h0m0s\', \' fullduration: 24h0m0s\', \'clickhouse:\', \' username: default\', \' password: ""\', \' host: localhost\', \' port: 9000\', \' disk_mapping: {}\', \' skip_tables:\', \' - system.*\', \' - INFORMATION_SCHEMA.*\', \' - information_schema.*\', \' - _temporary_and_external_tables.*\', \' skip_table_engines: []\', \' skip_disks: []\', \' skip_disk_types: []\', \' timeout: 30m\', \' freeze_by_part: false\', \' freeze_by_part_where: ""\', \' use_embedded_backup_restore: false\', \' use_embedded_backup_restore_cluster: ""\', \' embedded_backup_disk: ""\', \' backup_mutations: true\', \' restore_as_attach: false\', \' restore_distributed_cluster: ""\', \' check_parts_columns: true\', \' secure: false\', \' skip_verify: false\', \' sync_replicated_tables: false\', \' log_sql_queries: true\', \' config_dir: /etc/clickhouse-server/\', \' restart_command: exec:systemctl restart clickhouse-server\', \' ignore_not_exists_error_during_freeze: true\', \' check_replicas_before_attach: true\', \' default_replica_path: /clickhouse/tables/{cluster}/{shard}/{database}/{table}\', " default_replica_name: \'{replica}\'", \' tls_key: ""\', \' tls_cert: ""\', \' tls_ca: ""\', \' debug: false\', \' force_rebalance: false\', \'s3:\', \' access_key: ""\', \' secret_key: ""\', \' bucket: ""\', \' endpoint: ""\', \' region: us-east-1\', \' acl: private\', \' assume_role_arn: ""\', \' force_path_style: false\', \' path: ""\', \' object_disk_path: ""\', \' disable_ssl: false\', \' compression_level: 1\', \' compression_format: tar\', \' sse: ""\', \' sse_kms_key_id: ""\', \' sse_customer_algorithm: ""\', \' sse_customer_key: ""\', \' sse_customer_key_md5: ""\', \' sse_kms_encryption_context: ""\', \' disable_cert_verification: false\', \' use_custom_storage_class: false\', \' storage_class: STANDARD\', \' custom_storage_class_map: {}\', \' allow_multipart_download: false\', \' object_labels: {}\', \' request_payer: ""\', \' check_sum_algorithm: ""\', \' request_content_md5: false\', \' retry_mode: standard\', \' chunk_size: 5242880\', \' debug: false\', \'gcs:\', \' credentials_file: ""\', \' credentials_json: ""\', \' credentials_json_encoded: ""\', \' sa_email: ""\', \' embedded_access_key: ""\', \' embedded_secret_key: ""\', \' skip_credentials: false\', \' bucket: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_level: 1\', \' compression_format: tar\', \' debug: false\', \' force_http: false\', \' endpoint: ""\', \' storage_class: STANDARD\', \' object_labels: {}\', \' custom_storage_class_map: {}\', \' chunk_size: 16777216\', \' encryption_key: ""\', \'cos:\', \' url: ""\', \' timeout: 2m\', \' secret_id: ""\', \' secret_key: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' allow_multipart_download: false\', \' debug: false\', \'api:\', \' listen: localhost:7171\', \' enable_metrics: true\', \' enable_pprof: false\', \' username: ""\', \' password: ""\', \' secure: false\', \' certificate_file: ""\', \' private_key_file: ""\', \' ca_cert_file: ""\', \' ca_key_file: ""\', \' create_integration_tables: false\', \' integration_tables_host: ""\', \' allow_parallel: false\', \' complete_resumable_after_restart: true\', \' watch_is_main_process: false\', \'ftp:\', \' address: ""\', \' timeout: 2m\', \' username: ""\', \' password: ""\', \' tls: false\', \' skip_tls_verify: false\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'sftp:\', \' address: ""\', \' port: 22\', \' username: ""\', \' password: ""\', \' key: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'azblob:\', \' endpoint_schema: https\', \' endpoint_suffix: core.windows.net\', \' account_name: ""\', \' account_key: ""\', \' sas: ""\', \' use_managed_identity: false\', \' container: ""\', \' assume_container_exists: false\', \' path: ""\', \' object_disk_path: ""\', \' compression_level: 1\', \' compression_format: tar\', \' sse_key: ""\', \' buffer_count: 3\', \' timeout: 4h\', \' debug: false\', \'custom:\', \' upload_command: ""\', \' download_command: ""\', \' list_command: ""\', \' delete_command: ""\', \' command_timeout: 4h\', \' commandtimeoutduration: 4h0m0s\']'""" -help_flag = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.
] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n acvp Run ACVP wrapper protocol over stdin/stdout\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'""" +help_flag = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.
] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n clean_broken_retention Remove orphan entries under remote `path` and `object_disks_path` that are not in the live backup list\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n acvp Run ACVP wrapper protocol over stdin/stdout\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'""" -cli_usage = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.
] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n acvp Run ACVP wrapper protocol over stdin/stdout\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'""" +cli_usage = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.
] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n clean_broken_retention Remove orphan entries under remote `path` and `object_disks_path` that are not in the live backup list\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n acvp Run ACVP wrapper protocol over stdin/stdout\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'"""