diff --git a/ChangeLog.md b/ChangeLog.md index 315aa70f..3e742e74 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,8 @@ +# unreleased + +NEW FEATURES +- add `clean_broken_retention` CLI command — walks top-level of remote `path` and `object_disks_path` and batch-deletes (with retry) every entry that is not present in the live backup list and not matched by any `--keep=`. Dry-run by default; pass `--commit` to actually delete. Useful for cleaning up orphans left by failed retention runs + # v2.6.43 NEW FEATURES diff --git a/cmd/clickhouse-backup/main.go b/cmd/clickhouse-backup/main.go index 1302abd1..fadfbaf1 100644 --- a/cmd/clickhouse-backup/main.go +++ b/cmd/clickhouse-backup/main.go @@ -717,6 +717,26 @@ func main() { }, Flags: cliapp.Flags, }, + { + Name: "clean_broken_retention", + Usage: "Remove orphan entries under remote `path` and `object_disks_path` that are not in the live backup list", + UsageText: "clickhouse-backup clean_broken_retention [--commit] [--keep=glob ...]", + Description: "Walks top-level of remote `path` and `object_disks_path`, batch-deletes (with retry) every entry that is not a live backup and does not match any --keep glob. Runs in dry-run mode unless --commit is set.", + Action: func(c *cli.Context) error { + b := backup.NewBackuper(config.GetConfigFromCli(c)) + return b.CleanBrokenRetention(status.NotFromAPI, c.StringSlice("keep"), c.Bool("commit")) + }, + Flags: append(cliapp.Flags, + cli.StringSliceFlag{ + Name: "keep", + Usage: "Glob (path.Match syntax) of backup names to preserve in addition to live backups; can be passed multiple times", + }, + cli.BoolFlag{ + Name: "commit", + Usage: "Actually delete orphans; without this flag the command only logs what would be deleted", + }, + ), + }, { Name: "watch", diff --git a/pkg/backup/delete.go b/pkg/backup/delete.go index 5009a744..b5856d80 100644 --- a/pkg/backup/delete.go +++ b/pkg/backup/delete.go @@ -14,6 +14,7 @@ import ( "github.com/Altinity/clickhouse-backup/v2/pkg/clickhouse" "github.com/Altinity/clickhouse-backup/v2/pkg/custom" + "github.com/Altinity/clickhouse-backup/v2/pkg/metadata" "github.com/Altinity/clickhouse-backup/v2/pkg/status" "github.com/Altinity/clickhouse-backup/v2/pkg/storage" "github.com/Altinity/clickhouse-backup/v2/pkg/storage/object_disk" @@ -557,6 +558,161 @@ func (b *Backuper) CleanRemoteBroken(commandId int) error { return nil } +// CleanBrokenRetention walks remote `path` and `object_disks_path` top-level entries +// and removes everything that is NOT present in the live BackupList and NOT matched by keepGlobs. +// Uses BatchDeleter with retry. When commit=false, only logs orphans without deleting. +// keepGlobs follows path.Match syntax (e.g. "prod-*", "snapshot-2026-??-*"). +func (b *Backuper) CleanBrokenRetention(commandId int, keepGlobs []string, commit bool) error { + ctx, cancel, err := status.Current.GetContextWithCancel(commandId) + if err != nil { + return errors.WithMessage(err, "status.Current.GetContextWithCancel") + } + ctx, cancel = context.WithCancel(ctx) + defer cancel() + + if b.cfg.General.RemoteStorage == "none" { + return errors.New("aborted: RemoteStorage set to \"none\"") + } + if b.cfg.General.RemoteStorage == "custom" { + return errors.New("aborted: clean_broken_retention does not support custom remote storage") + } + for _, g := range keepGlobs { + if _, err := path.Match(g, ""); err != nil { + return errors.Wrapf(err, "invalid keep-glob %q", g) + } + } + if err := b.ch.Connect(); err != nil { + return errors.Wrap(err, "can't connect to clickhouse") + } + defer b.ch.Close() + + bd, err := storage.NewBackupDestination(ctx, b.cfg, b.ch, "") + if err != nil { + return errors.WithMessage(err, "storage.NewBackupDestination") + } + if err = bd.Connect(ctx); err != nil { + return errors.Wrap(err, "can't connect to remote storage") + } + defer func() { + if closeErr := bd.Close(ctx); closeErr != nil { + log.Warn().Msgf("can't close BackupDestination error: %v", closeErr) + } + }() + b.dst = bd + + // parseMetadata=true forces a metadata.json stat for every top-level entry, so that + // orphan dirs without metadata.json are returned with Broken!="" and excluded from the + // keep-set below. Otherwise the metadata cache from a prior run would mask them. + backupList, err := bd.BackupList(ctx, true, "") + if err != nil { + return errors.WithMessage(err, "bd.BackupList") + } + keepNames := make(map[string]struct{}, len(backupList)) + liveCount := 0 + for _, backup := range backupList { + if backup.Broken != "" { + continue + } + keepNames[backup.BackupName] = struct{}{} + liveCount++ + } + isKept := func(name string) bool { + if _, ok := keepNames[name]; ok { + return true + } + for _, g := range keepGlobs { + if ok, _ := path.Match(g, name); ok { + return true + } + } + return false + } + + mode := "dry-run" + if commit { + mode = "commit" + } + log.Info().Msgf("clean_broken_retention: mode=%s, %d live backups (of %d in remote list), %d keep-globs", mode, liveCount, len(backupList), len(keepGlobs)) + + orphansInPath, err := b.findOrphanTopLevelNames(ctx, bd, "/", isKept) + if err != nil { + return errors.WithMessage(err, "scan path orphans") + } + for _, name := range orphansInPath { + if !commit { + log.Info().Str("orphan", name).Str("location", "path").Msg("clean_broken_retention: would delete") + continue + } + log.Info().Str("orphan", name).Str("location", "path").Msg("clean_broken_retention: deleting") + if err := bd.RemoveBackupRemote(ctx, storage.Backup{BackupMetadata: metadata.BackupMetadata{BackupName: name}}, b.cfg, b); err != nil { + return errors.Wrapf(err, "bd.RemoveBackupRemote orphan %s", name) + } + } + + objectDiskPath, err := b.getObjectDiskPath() + if err != nil { + return errors.WithMessage(err, "b.getObjectDiskPath") + } + var orphansInObj []string + if objectDiskPath != "" { + orphansInObj, err = b.findOrphanTopLevelNames(ctx, bd, objectDiskPath, isKept) + if err != nil { + return errors.WithMessage(err, "scan object_disks_path orphans") + } + for _, name := range orphansInObj { + if !commit { + log.Info().Str("orphan", name).Str("location", "object_disks_path").Msg("clean_broken_retention: would delete") + continue + } + log.Info().Str("orphan", name).Str("location", "object_disks_path").Msg("clean_broken_retention: deleting") + deletedKeys, deleteErr := b.cleanBackupObjectDisks(ctx, name) + if deleteErr != nil { + return errors.Wrapf(deleteErr, "cleanBackupObjectDisks orphan %s", name) + } + log.Info().Str("orphan", name).Uint("deleted", deletedKeys).Msg("clean_broken_retention: object disk orphan cleaned") + } + } + log.Info().Msgf("clean_broken_retention: done, mode=%s, path orphans=%d, object_disks_path orphans=%d", mode, len(orphansInPath), len(orphansInObj)) + return nil +} + +// findOrphanTopLevelNames lists top-level entries under rootPath (absolute when rootPath != "/") +// and returns names that are not kept by isKept. Top-level only: any names containing "/" are skipped. +func (b *Backuper) findOrphanTopLevelNames(ctx context.Context, bd *storage.BackupDestination, rootPath string, isKept func(string) bool) ([]string, error) { + seen := make(map[string]struct{}) + walkFn := func(_ context.Context, f storage.RemoteFile) error { + // Walk("/", false) emits names that may have a leading "/" (S3) and/or trailing "/" (CommonPrefix). + name := strings.Trim(f.Name(), "/") + if name == "" || strings.Contains(name, "/") { + return nil + } + // Skip hidden/dotfile entries — clickhouse-backup never produces names starting with ".". + // Protects system dirs like /root/.ssh on filesystem-backed remotes (SFTP/FTP). + if strings.HasPrefix(name, ".") { + return nil + } + if isKept(name) { + return nil + } + seen[name] = struct{}{} + return nil + } + var err error + if rootPath == "/" || rootPath == "" { + err = bd.Walk(ctx, "/", false, walkFn) + } else { + err = bd.WalkAbsolute(ctx, rootPath, false, walkFn) + } + if err != nil { + return nil, errors.Wrapf(err, "walk %q", rootPath) + } + out := make([]string, 0, len(seen)) + for n := range seen { + out = append(out, n) + } + return out, nil +} + func (b *Backuper) cleanPartialRequiredBackup(ctx context.Context, disks []clickhouse.Disk, currentBackupName string) error { if localBackups, _, err := b.GetLocalBackups(ctx, disks); err == nil { for _, localBackup := range localBackups { diff --git a/pkg/filesystemhelper/filesystemhelper.go b/pkg/filesystemhelper/filesystemhelper.go index 069dd26f..53431230 100644 --- a/pkg/filesystemhelper/filesystemhelper.go +++ b/pkg/filesystemhelper/filesystemhelper.go @@ -481,4 +481,3 @@ func IsDuplicatedParts(part1, part2 string) error { } return nil } - diff --git a/test/integration/cleanBrokenRetention_test.go b/test/integration/cleanBrokenRetention_test.go new file mode 100644 index 00000000..2799a48e --- /dev/null +++ b/test/integration/cleanBrokenRetention_test.go @@ -0,0 +1,399 @@ +//go:build integration + +package main + +import ( + "context" + "fmt" + "os" + "strings" + "testing" + "time" + + "github.com/Altinity/clickhouse-backup/v2/pkg/utils" + + "github.com/rs/zerolog/log" + "github.com/stretchr/testify/require" +) + +const cleanBrokenRetentionKeepGlob = "cbr_orphan_keep_*" + +// Each TestCleanBrokenRetention* function verifies that `clean_broken_retention`: +// - lists orphans (dry-run) without deleting, +// - on --commit removes orphans from both `path` and `object_disks_path`, +// - preserves the live backup and entries matched by --keep globs. +// +// Each backend is its own top-level test so they can be run independently +// (e.g. `RUN_TESTS=TestCleanBrokenRetentionS3 ./test/integration/run.sh`). +// Backends that need cloud credentials skip themselves when the corresponding env +// var (GCS_TESTS, AZURE_TESTS, QA_TENCENT_SECRET_KEY/QA_TENCENT_SECRET_ID) is unset. + +func TestCleanBrokenRetentionS3(t *testing.T) { + runCleanBrokenRetentionCase(t, s3CleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionSFTP(t *testing.T) { + runCleanBrokenRetentionCase(t, sftpCleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionFTP(t *testing.T) { + runCleanBrokenRetentionCase(t, ftpCleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionGCSEmulator(t *testing.T) { + runCleanBrokenRetentionCase(t, gcsEmulatorCleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionAZBLOB(t *testing.T) { + runCleanBrokenRetentionCase(t, azblobCleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionGCS(t *testing.T) { + runCleanBrokenRetentionCase(t, gcsRealCleanBrokenRetentionCase()) +} +func TestCleanBrokenRetentionCOS(t *testing.T) { + runCleanBrokenRetentionCase(t, cosCleanBrokenRetentionCase()) +} + +func runCleanBrokenRetentionCase(t *testing.T, tc cleanBrokenRetentionCase) { + if tc.skip != nil && tc.skip() { + t.Skip(tc.skipReason) + return + } + runCleanBrokenRetentionScenario(t, tc) +} + +// cleanBrokenRetentionCase wires one remote-storage backend to the shared scenario. +// plant/assertExists/assertGone are responsible for *physically* putting or observing +// a top-level entry under the given root path on that backend. +type cleanBrokenRetentionCase struct { + name string + configFile string + pathRoot string + objRoot string + skip func() bool + skipReason string + setup func(env *TestEnvironment, r *require.Assertions) + plant orphanAction + assertExists orphanAction + assertGone orphanAction + finalEmptyType string // when non-empty, passed to env.checkObjectStorageIsEmpty at end +} + +type orphanAction func(env *TestEnvironment, r *require.Assertions, root, name string) + +func runCleanBrokenRetentionScenario(t *testing.T, tc cleanBrokenRetentionCase) { + env, r := NewTestEnvironment(t) + env.connectWithWait(t, r, 0*time.Second, 1*time.Second, 1*time.Minute) + defer env.Cleanup(t, r) + + r.NoError(env.DockerCP("configs/"+tc.configFile, "clickhouse-backup:/etc/clickhouse-backup/config.yml")) + if tc.setup != nil { + tc.setup(env, r) + } + + tableName := fmt.Sprintf("default.clean_broken_retention_%s", strings.ToLower(tc.name)) + env.queryWithNoError(r, fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s(id UInt64) ENGINE=MergeTree() ORDER BY id", tableName)) + env.queryWithNoError(r, fmt.Sprintf("INSERT INTO %s SELECT number FROM numbers(50)", tableName)) + + suffix := time.Now().UnixNano() + keepBackup := fmt.Sprintf("cbr_keep_%d", suffix) + orphanPath := fmt.Sprintf("cbr_orphan_path_%d", suffix) + orphanObj := fmt.Sprintf("cbr_orphan_obj_%d", suffix) + orphanKept := fmt.Sprintf("cbr_orphan_keep_%d", suffix) + + log.Debug().Str("backend", tc.name).Msg("Create a live backup that must survive the cleanup") + env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "create_remote", "--tables", tableName, keepBackup) + + log.Debug().Str("backend", tc.name).Msg("Plant orphans") + for _, p := range []struct{ root, name string }{ + {tc.pathRoot, orphanPath}, {tc.objRoot, orphanObj}, + {tc.pathRoot, orphanKept}, {tc.objRoot, orphanKept}, + } { + tc.plant(env, r, p.root, p.name) + tc.assertExists(env, r, p.root, p.name) + } + + log.Debug().Str("backend", tc.name).Msg("Dry-run lists orphans but deletes nothing") + dryRunOut, err := env.DockerExecOut("clickhouse-backup", "clickhouse-backup", "clean_broken_retention") + r.NoError(err, "dry-run failed: %s", dryRunOut) + r.Contains(dryRunOut, orphanPath, "dry-run must mention path orphan") + r.Contains(dryRunOut, orphanObj, "dry-run must mention object disk orphan") + r.Contains(dryRunOut, "would delete", "dry-run must announce planned deletions") + r.NotContains(dryRunOut, "clean_broken_retention: deleting", "dry-run must not delete") + r.NotContains(dryRunOut, fmt.Sprintf("\"orphan\":\"%s\"", keepBackup), "live backup must not appear as orphan") + tc.assertExists(env, r, tc.pathRoot, orphanPath) + tc.assertExists(env, r, tc.objRoot, orphanObj) + + log.Debug().Str("backend", tc.name).Msg("--commit with --keep glob deletes only unmatched orphans") + commitOut, err := env.DockerExecOut("clickhouse-backup", "clickhouse-backup", "clean_broken_retention", "--commit", "--keep="+cleanBrokenRetentionKeepGlob) + r.NoError(err, "commit failed: %s", commitOut) + r.Contains(commitOut, "clean_broken_retention: deleting") + tc.assertGone(env, r, tc.pathRoot, orphanPath) + tc.assertGone(env, r, tc.objRoot, orphanObj) + tc.assertExists(env, r, tc.pathRoot, orphanKept) + tc.assertExists(env, r, tc.objRoot, orphanKept) + + log.Debug().Str("backend", tc.name).Msg("Second --commit without --keep clears the remaining orphan") + env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "clean_broken_retention", "--commit") + tc.assertGone(env, r, tc.pathRoot, orphanKept) + tc.assertGone(env, r, tc.objRoot, orphanKept) + + log.Debug().Str("backend", tc.name).Msg("Cleanup live backup and table") + env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "delete", "remote", keepBackup) + env.DockerExecNoError(r, "clickhouse-backup", "clickhouse-backup", "delete", "local", keepBackup) + dropQuery := "DROP TABLE IF EXISTS " + tableName + if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "20.3") > 0 { + dropQuery += " NO DELAY" + } + env.queryWithNoError(r, dropQuery) + if tc.finalEmptyType != "" { + env.checkObjectStorageIsEmpty(t, r, tc.finalEmptyType) + } +} + +// containerFSCase builds a case for a backend that maps its remote storage to a +// path on the given docker container's filesystem. +func containerFSCase(name, configFile, container, pathRoot, objRoot, finalEmptyType string) cleanBrokenRetentionCase { + plant := func(env *TestEnvironment, r *require.Assertions, root, name string) { + env.DockerExecNoError(r, container, "sh", "-c", fmt.Sprintf( + "mkdir -p %s/%s/sub && echo garbage > %s/%s/data.bin && echo garbage > %s/%s/sub/nested.bin", + root, name, root, name, root, name, + )) + } + exists := func(env *TestEnvironment, r *require.Assertions, root, name string) { + out, err := env.DockerExecOut(container, "ls", root+"/"+name) + r.NoError(err, "expected %s/%s to exist on %s, output: %s", root, name, container, out) + } + gone := func(env *TestEnvironment, r *require.Assertions, root, name string) { + out, _ := env.DockerExecOut(container, "sh", "-c", fmt.Sprintf("ls %s/%s 2>/dev/null || true", root, name)) + r.Empty(strings.TrimSpace(out), "expected %s/%s on %s to be removed, ls returned: %s", root, name, container, out) + } + return cleanBrokenRetentionCase{ + name: name, + configFile: configFile, + pathRoot: pathRoot, + objRoot: objRoot, + plant: plant, + assertExists: exists, + assertGone: gone, + finalEmptyType: finalEmptyType, + } +} + +// dockerRunSh runs an ephemeral container on the integration test network with the +// given image, environment, and shell command. Returns combined stdout/stderr. +func dockerRunSh(env *TestEnvironment, image, sh string, envVars ...string) (string, error) { + args := []string{"run", "--rm", "--network", env.tc.networkName} + for _, e := range envVars { + args = append(args, "-e", e) + } + args = append(args, image, "sh", "-c", sh) + return utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", args...) +} + +func s3CleanBrokenRetentionCase() cleanBrokenRetentionCase { + // Plant via `mc cp` instead of direct FS writes — MinIO ignores raw files on disk and + // only sees objects that went through its S3 API. + const mcAliasCmd = "mc alias set local https://localhost:9000 access_key it_is_my_super_secret_key >/dev/null 2>&1" + const bucketPath = "local/clickhouse/backup/cluster/0" + const objBucketPath = "local/clickhouse/object_disk/cluster/0" + plant := func(env *TestEnvironment, r *require.Assertions, root, name string) { + env.DockerExecNoError(r, "minio", "bash", "-c", fmt.Sprintf( + "%s && echo garbage > /tmp/data.bin && mc cp /tmp/data.bin %s/%s/data.bin >/dev/null && mc cp /tmp/data.bin %s/%s/sub/nested.bin >/dev/null", + mcAliasCmd, root, name, root, name, + )) + } + exists := func(env *TestEnvironment, r *require.Assertions, root, name string) { + out, err := env.DockerExecOut("minio", "bash", "-c", fmt.Sprintf("%s && mc ls %s/%s/", mcAliasCmd, root, name)) + r.NoError(err, "mc ls failed: %s", out) + r.Contains(out, "data.bin", "expected data.bin under %s/%s, got: %s", root, name, out) + } + gone := func(env *TestEnvironment, r *require.Assertions, root, name string) { + out, _ := env.DockerExecOut("minio", "bash", "-c", fmt.Sprintf("%s && mc ls -r %s/%s/ 2>&1 || true", mcAliasCmd, root, name)) + r.NotContains(out, "data.bin", "expected no objects under %s/%s, got: %s", root, name, out) + r.NotContains(out, "nested.bin", "expected no objects under %s/%s, got: %s", root, name, out) + } + return cleanBrokenRetentionCase{ + name: "S3", + configFile: "config-s3.yml", + pathRoot: bucketPath, + objRoot: objBucketPath, + plant: plant, + assertExists: exists, + assertGone: gone, + finalEmptyType: "S3", + } +} + +func sftpCleanBrokenRetentionCase() cleanBrokenRetentionCase { + tc := containerFSCase("SFTP", "config-sftp-auth-key.yaml", "sshd", "/root", "/object_disk", "") + tc.setup = func(env *TestEnvironment, r *require.Assertions) { + env.uploadSSHKeys(r, "clickhouse-backup") + env.DockerExecNoError(r, "sshd", "mkdir", "-p", "/object_disk") + } + return tc +} + +func ftpCleanBrokenRetentionCase() cleanBrokenRetentionCase { + home := "/home/test_backup" + if isAdvancedMode() { + home = "/home/ftpusers/test_backup" + } + tc := containerFSCase("FTP", "config-ftp.yaml", "ftp", home+"/backup", home+"/object_disk", "") + tc.skip = func() bool { return compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "21.8") <= 0 } + tc.skipReason = "FTP scenario only validated on ClickHouse > 21.8" + tc.setup = func(env *TestEnvironment, r *require.Assertions) { + // proftpd/vsftpd containers don't create `test_backup` as a system user; uid 1000 owns the home dir. + env.DockerExecNoError(r, "ftp", "sh", "-c", fmt.Sprintf("mkdir -p %s/backup %s/object_disk && chown -R 1000:1000 %s && chmod -R 0777 %s", home, home, home, home)) + } + return tc +} + +func gcsEmulatorCleanBrokenRetentionCase() cleanBrokenRetentionCase { + return containerFSCase("GCS_EMULATOR", "config-gcs-custom-endpoint.yml", "gcs", + "/data/altinity-qa-test/backup/cluster/0", + "/data/altinity-qa-test/object_disks/cluster/0", + "GCS_EMULATOR") +} + +func gcsRealCleanBrokenRetentionCase() cleanBrokenRetentionCase { + const bucket = "altinity-qa-test" + const image = "google/cloud-sdk:slim" + // All gsutil invocations need the service account activated first. + const authPrefix = "gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS >/dev/null 2>&1 && " + gsutil := func(env *TestEnvironment, r *require.Assertions, sh string) string { + // --volumes-from gives us /etc/clickhouse-backup/credentials.json from the backup container. + args := []string{ + "run", "--rm", "--network", env.tc.networkName, + "--volumes-from", env.tc.GetContainerID("clickhouse-backup"), + "-e", "GOOGLE_APPLICATION_CREDENTIALS=/etc/clickhouse-backup/credentials.json", + image, "bash", "-c", authPrefix + sh, + } + out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", args...) + r.NoError(err, "gsutil failed: %s", out) + return out + } + return cleanBrokenRetentionCase{ + name: "GCS", + configFile: "config-gcs.yml", + pathRoot: "backup/cluster/0", + objRoot: "object_disks/cluster/0", + skip: func() bool { return isTestShouldSkip("GCS_TESTS") }, + skipReason: "Skipping GCS integration tests (GCS_TESTS not set)", + setup: func(env *TestEnvironment, r *require.Assertions) { + env.tc.pullImageIfNeeded(context.Background(), image) + }, + plant: func(env *TestEnvironment, r *require.Assertions, root, name string) { + obj := root + "/" + name + gsutil(env, r, fmt.Sprintf( + "echo garbage > /tmp/data.bin && gsutil -q cp /tmp/data.bin gs://%s/%s/data.bin && gsutil -q cp /tmp/data.bin gs://%s/%s/sub/nested.bin", + bucket, obj, bucket, obj)) + }, + assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) { + obj := root + "/" + name + out := gsutil(env, r, fmt.Sprintf("gsutil ls gs://%s/%s/", bucket, obj)) + r.Contains(out, "gs://"+bucket+"/"+obj+"/", "expected listing to contain %s", obj) + }, + assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) { + obj := root + "/" + name + out := gsutil(env, r, fmt.Sprintf("gsutil ls gs://%s/%s/** 2>&1 || true", bucket, obj)) + r.NotContains(out, "gs://"+bucket+"/"+obj, "expected no blobs under gs://%s/%s, got: %s", bucket, obj, out) + }, + } +} + +func cosCleanBrokenRetentionCase() cleanBrokenRetentionCase { + // COS exposes an S3-compatible API on its regional endpoint. + const bucket = "clickhouse-backup-1336113806" + const endpoint = "https://cos.na-ashburn.myqcloud.com" + const image = "amazon/aws-cli:latest" + // Tencent COS rejects path-style addressing (PathStyleDomainForbidden); force virtual-hosted style. + const awsPrefix = "aws configure set default.s3.addressing_style virtual >/dev/null && " + awsRun := func(env *TestEnvironment, r *require.Assertions, sh string) string { + // --entrypoint sh overrides aws-cli's default `aws` entrypoint. + out, err := utils.ExecCmdOut(context.Background(), dockerExecTimeout, "docker", + "run", "--rm", "--network", env.tc.networkName, + "-e", "AWS_ACCESS_KEY_ID="+os.Getenv("QA_TENCENT_SECRET_ID"), + "-e", "AWS_SECRET_ACCESS_KEY="+os.Getenv("QA_TENCENT_SECRET_KEY"), + "-e", "AWS_DEFAULT_REGION=na-ashburn", + "--entrypoint", "sh", image, "-c", awsPrefix+sh) + r.NoError(err, "aws-cli failed: %s", out) + return out + } + return cleanBrokenRetentionCase{ + name: "COS", + configFile: "config-cos.yml", + pathRoot: "backup/cluster/0", + objRoot: "object_disk/cluster/0", + skip: func() bool { + return os.Getenv("QA_TENCENT_SECRET_KEY") == "" || os.Getenv("QA_TENCENT_SECRET_ID") == "" + }, + skipReason: "Skipping COS integration tests (QA_TENCENT_SECRET_ID / QA_TENCENT_SECRET_KEY not set)", + setup: func(env *TestEnvironment, r *require.Assertions) { + env.tc.pullImageIfNeeded(context.Background(), image) + env.InstallDebIfNotExists(r, "clickhouse-backup", "gettext-base") + // config.yml was copied raw and still has ${QA_TENCENT_SECRET_*} placeholders. + env.DockerExecNoError(r, "clickhouse-backup", "bash", "-xec", + "envsubst < /etc/clickhouse-backup/config.yml > /tmp/c.yml && mv /tmp/c.yml /etc/clickhouse-backup/config.yml") + }, + plant: func(env *TestEnvironment, r *require.Assertions, root, name string) { + obj := root + "/" + name + awsRun(env, r, fmt.Sprintf( + "echo garbage > /tmp/data.bin && aws --endpoint-url=%s s3 cp /tmp/data.bin s3://%s/%s/data.bin >/dev/null && aws --endpoint-url=%s s3 cp /tmp/data.bin s3://%s/%s/sub/nested.bin >/dev/null", + endpoint, bucket, obj, endpoint, bucket, obj)) + }, + assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) { + obj := root + "/" + name + out := awsRun(env, r, fmt.Sprintf("aws --endpoint-url=%s s3 ls s3://%s/%s/", endpoint, bucket, obj)) + r.Contains(out, "data.bin", "expected data.bin under %s, got: %s", obj, out) + }, + assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) { + obj := root + "/" + name + out := awsRun(env, r, fmt.Sprintf("aws --endpoint-url=%s s3 ls s3://%s/%s/ 2>&1 || true", endpoint, bucket, obj)) + r.NotContains(out, "data.bin", "expected no blobs under s3://%s/%s, got: %s", bucket, obj, out) + r.NotContains(out, "nested.bin", "expected no blobs under s3://%s/%s, got: %s", bucket, obj, out) + }, + } +} + +func azblobCleanBrokenRetentionCase() cleanBrokenRetentionCase { + const container = "container1" + const connectionString = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://devstoreaccount1.blob.azure:10000/devstoreaccount1;" + // Pinned: azure-cli :latest ships an SDK whose REST API version is newer than + // what current Azurite supports ("API version 2026-02-06 is not supported by Azurite"). + const image = "mcr.microsoft.com/azure-cli:2.65.0" + azEnv := "AZURE_STORAGE_CONNECTION_STRING=" + connectionString + + azList := func(env *TestEnvironment, r *require.Assertions, prefix string) string { + out, err := dockerRunSh(env, image, + fmt.Sprintf("az storage blob list --container-name %s --prefix %s/ --num-results 1 --query '[].name' -o tsv", container, prefix), + azEnv) + r.NoError(err, "azblob list failed: %s", out) + return strings.TrimSpace(out) + } + blobPath := func(root, name string) string { return strings.TrimPrefix(root+"/"+name, "/") } + + return cleanBrokenRetentionCase{ + name: "AZBLOB", + configFile: "config-azblob.yml", + // config-azblob.yml: path=backup, object_disk_path=object_disks (no macros). + pathRoot: "backup", + objRoot: "object_disks", + skip: func() bool { return isTestShouldSkip("AZURE_TESTS") }, + skipReason: "Skipping AZBLOB integration tests (AZURE_TESTS not set)", + setup: func(env *TestEnvironment, r *require.Assertions) { + env.tc.pullImageIfNeeded(context.Background(), image) + }, + plant: func(env *TestEnvironment, r *require.Assertions, root, name string) { + p := blobPath(root, name) + out, err := dockerRunSh(env, image, + fmt.Sprintf("echo garbage > /tmp/data.bin && az storage blob upload --container-name %s --name %s/data.bin --file /tmp/data.bin --overwrite >/dev/null && az storage blob upload --container-name %s --name %s/sub/nested.bin --file /tmp/data.bin --overwrite >/dev/null", + container, p, container, p), + azEnv) + r.NoError(err, "azblob plant failed: %s", out) + }, + assertExists: func(env *TestEnvironment, r *require.Assertions, root, name string) { + r.NotEmpty(azList(env, r, blobPath(root, name)), "expected blobs under %s/", blobPath(root, name)) + }, + assertGone: func(env *TestEnvironment, r *require.Assertions, root, name string) { + r.Empty(azList(env, r, blobPath(root, name)), "expected no blobs under %s/", blobPath(root, name)) + }, + } +} + diff --git a/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot b/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot index e7762e84..518413f1 100644 --- a/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot +++ b/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot @@ -1,6 +1,6 @@ default_config = r"""'[\'general:\', \' remote_storage: none\', \' backups_to_keep_local: 0\', \' backups_to_keep_remote: 0\', \' log_level: info\', \' allow_empty_backups: false\', \' allow_object_disk_streaming: false\', \' use_resumable_state: true\', \' restore_schema_on_cluster: ""\', \' upload_by_part: true\', \' download_by_part: true\', \' restore_database_mapping: {}\', \' restore_table_mapping: {}\', \' retries_on_failure: 3\', \' retries_pause: 5s\', \' retries_jitter: 0\', \' watch_interval: 1h\', \' full_interval: 24h\', \' watch_backup_name_template: shard{shard}-{type}-{time:20060102150405}\', \' sharded_operation_mode: ""\', \' cpu_nice_priority: 15\', \' io_nice_priority: idle\', \' rbac_backup_always: true\', \' rbac_conflict_resolution: recreate\', \' config_backup_always: false\', \' named_collections_backup_always: false\', \' delete_batch_size: 1000\', \' retriesduration: 5s\', \' watchduration: 1h0m0s\', \' fullduration: 24h0m0s\', \'clickhouse:\', \' username: default\', \' password: ""\', \' host: localhost\', \' port: 9000\', \' disk_mapping: {}\', \' skip_tables:\', \' - system.*\', \' - INFORMATION_SCHEMA.*\', \' - information_schema.*\', \' - _temporary_and_external_tables.*\', \' skip_table_engines: []\', \' skip_disks: []\', \' skip_disk_types: []\', \' timeout: 30m\', \' freeze_by_part: false\', \' freeze_by_part_where: ""\', \' use_embedded_backup_restore: false\', \' use_embedded_backup_restore_cluster: ""\', \' embedded_backup_disk: ""\', \' backup_mutations: true\', \' restore_as_attach: false\', \' restore_distributed_cluster: ""\', \' check_parts_columns: true\', \' secure: false\', \' skip_verify: false\', \' sync_replicated_tables: false\', \' log_sql_queries: true\', \' config_dir: /etc/clickhouse-server/\', \' restart_command: exec:systemctl restart clickhouse-server\', \' ignore_not_exists_error_during_freeze: true\', \' check_replicas_before_attach: true\', \' default_replica_path: /clickhouse/tables/{cluster}/{shard}/{database}/{table}\', " default_replica_name: \'{replica}\'", \' tls_key: ""\', \' tls_cert: ""\', \' tls_ca: ""\', \' debug: false\', \' force_rebalance: false\', \'s3:\', \' access_key: ""\', \' secret_key: ""\', \' bucket: ""\', \' endpoint: ""\', \' region: us-east-1\', \' acl: private\', \' assume_role_arn: ""\', \' force_path_style: false\', \' path: ""\', \' object_disk_path: ""\', \' disable_ssl: false\', \' compression_level: 1\', \' compression_format: tar\', \' sse: ""\', \' sse_kms_key_id: ""\', \' sse_customer_algorithm: ""\', \' sse_customer_key: ""\', \' sse_customer_key_md5: ""\', \' sse_kms_encryption_context: ""\', \' disable_cert_verification: false\', \' use_custom_storage_class: false\', \' storage_class: STANDARD\', \' custom_storage_class_map: {}\', \' allow_multipart_download: false\', \' object_labels: {}\', \' request_payer: ""\', \' check_sum_algorithm: ""\', \' request_content_md5: false\', \' retry_mode: standard\', \' chunk_size: 5242880\', \' debug: false\', \'gcs:\', \' credentials_file: ""\', \' credentials_json: ""\', \' credentials_json_encoded: ""\', \' sa_email: ""\', \' embedded_access_key: ""\', \' embedded_secret_key: ""\', \' skip_credentials: false\', \' bucket: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_level: 1\', \' compression_format: tar\', \' debug: false\', \' force_http: false\', \' endpoint: ""\', \' storage_class: STANDARD\', \' object_labels: {}\', \' custom_storage_class_map: {}\', \' chunk_size: 16777216\', \' encryption_key: ""\', \'cos:\', \' url: ""\', \' timeout: 2m\', \' secret_id: ""\', \' secret_key: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' allow_multipart_download: false\', \' debug: false\', \'api:\', \' listen: localhost:7171\', \' enable_metrics: true\', \' enable_pprof: false\', \' username: ""\', \' password: ""\', \' secure: false\', \' certificate_file: ""\', \' private_key_file: ""\', \' ca_cert_file: ""\', \' ca_key_file: ""\', \' create_integration_tables: false\', \' integration_tables_host: ""\', \' allow_parallel: false\', \' complete_resumable_after_restart: true\', \' watch_is_main_process: false\', \'ftp:\', \' address: ""\', \' timeout: 2m\', \' username: ""\', \' password: ""\', \' tls: false\', \' skip_tls_verify: false\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'sftp:\', \' address: ""\', \' port: 22\', \' username: ""\', \' password: ""\', \' key: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'azblob:\', \' endpoint_schema: https\', \' endpoint_suffix: core.windows.net\', \' account_name: ""\', \' account_key: ""\', \' sas: ""\', \' use_managed_identity: false\', \' container: ""\', \' assume_container_exists: false\', \' path: ""\', \' object_disk_path: ""\', \' compression_level: 1\', \' compression_format: tar\', \' sse_key: ""\', \' buffer_count: 3\', \' timeout: 4h\', \' debug: false\', \'custom:\', \' upload_command: ""\', \' download_command: ""\', \' list_command: ""\', \' delete_command: ""\', \' command_timeout: 4h\', \' commandtimeoutduration: 4h0m0s\']'""" -help_flag = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n acvp Run ACVP wrapper protocol over stdin/stdout\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'""" +help_flag = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.
] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n clean_broken_retention Remove orphan entries under remote `path` and `object_disks_path` that are not in the live backup list\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n acvp Run ACVP wrapper protocol over stdin/stdout\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'""" -cli_usage = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.
] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n acvp Run ACVP wrapper protocol over stdin/stdout\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'""" +cli_usage = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.
] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n clean_local_broken Remove all broken local backups\n clean_broken_retention Remove orphan entries under remote `path` and `object_disks_path` that are not in the live backup list\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n acvp Run ACVP wrapper protocol over stdin/stdout\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --environment-override value, --env value override any environment variable via CLI parameter\n --help, -h show help\n --version, -v print the version'"""