Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# v2.6.44

BUG FIXES
- fix [1356](https://github.com/Altinity/clickhouse-backup/issues/1356), retry batch deletion in `cleanBackupObjectDisks` so transient errors (e.g. GCS 503) during retention no longer leave orphaned objects in `object_disks_path`

# v2.6.43

NEW FEATURES
Expand Down
11 changes: 9 additions & 2 deletions pkg/backup/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"strings"
"time"

"github.com/Altinity/clickhouse-backup/v2/pkg/common"
"github.com/Altinity/clickhouse-backup/v2/pkg/pidlock"

"github.com/Altinity/clickhouse-backup/v2/pkg/clickhouse"
Expand All @@ -19,6 +20,7 @@ import (
"github.com/Altinity/clickhouse-backup/v2/pkg/storage/object_disk"
"github.com/Altinity/clickhouse-backup/v2/pkg/utils"

"github.com/eapache/go-resiliency/retrier"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
)
Expand Down Expand Up @@ -405,6 +407,7 @@ func (b *Backuper) cleanBackupObjectDisks(ctx context.Context, backupName string
batchSize := b.cfg.General.DeleteBatchSize

log.Info().Msgf("cleanBackupObjectDisks: starting batch deletion for object disk backup %s using %s (batch_size=%d)", backupName, b.dst.Kind(), batchSize)
retry := retrier.New(retrier.ExponentialBackoff(b.cfg.General.RetriesOnFailure, common.AddRandomJitter(b.cfg.General.RetriesDuration, b.cfg.General.RetriesJitter)), b)

// Process deletion in batches to avoid loading all keys in memory
var keysToDelete []string
Expand All @@ -420,7 +423,9 @@ func (b *Backuper) cleanBackupObjectDisks(ctx context.Context, backupName string

// When we've collected enough keys, delete them as a batch
if len(keysToDelete) >= batchSize {
deleteErr := batchDeleter.DeleteKeysFromObjectDiskBackupBatch(ctx, keysToDelete)
deleteErr := retry.RunCtx(ctx, func(ctx context.Context) error {
return batchDeleter.DeleteKeysFromObjectDiskBackupBatch(ctx, keysToDelete)
})
if deleteErr != nil {
return deleteErr
}
Expand All @@ -436,7 +441,9 @@ func (b *Backuper) cleanBackupObjectDisks(ctx context.Context, backupName string

// Delete remaining keys
if len(keysToDelete) > 0 {
deleteErr := batchDeleter.DeleteKeysFromObjectDiskBackupBatch(ctx, keysToDelete)
deleteErr := retry.RunCtx(ctx, func(ctx context.Context) error {
return batchDeleter.DeleteKeysFromObjectDiskBackupBatch(ctx, keysToDelete)
})
if deleteErr != nil {
return totalDeleted, deleteErr
}
Expand Down
59 changes: 33 additions & 26 deletions pkg/storage/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,19 +358,19 @@ func (s *S3) deleteKey(ctx context.Context, key string) error {
params.RequestPayer = s3types.RequestPayer(s.Config.RequestPayer)
}
if s.versioning {
objVersions, err := s.getObjectAllVersions(ctx, key)
identifiers, err := s.getObjectAllVersions(ctx, key)
if err != nil {
return errors.Wrapf(err, "deleteKey, obtaining object version bucket: %s key: %s", s.Config.Bucket, key)
}
for _, objVersion := range objVersions {
params.VersionId = &objVersion
for _, id := range identifiers {
params.VersionId = id.VersionId
if _, err := s.client.DeleteObject(ctx, params); err != nil {
return errors.Wrapf(err, "deleteKey, deleting object bucket: %s key: %s version: %v", s.Config.Bucket, key, params.VersionId)
}
}
if len(objVersions) > 0 {
return nil
}
// Either we drained all versions+markers, or there was nothing to delete.
// In both cases avoid a key-only DeleteObject which would create a fresh delete-marker.
return nil
}
if _, err := s.client.DeleteObject(ctx, params); err != nil {
return errors.Wrapf(err, "deleteKey, deleting object bucket: %s key: %s version: %v", s.Config.Bucket, key, params.VersionId)
Expand Down Expand Up @@ -434,7 +434,7 @@ func (s *S3) deleteKeys(ctx context.Context, keys []string) error {
for _, key := range keys {
key := key
g.Go(func() error {
versions, err := s.getObjectAllVersions(ctx, key)
identifiers, err := s.getObjectAllVersions(ctx, key)
if err != nil {
// If we can't list versions, try deleting without version ID
log.Warn().Msgf("S3 deleteKeys: can't get versions for %s: %v, will try without version", key, err)
Expand All @@ -445,21 +445,13 @@ func (s *S3) deleteKeys(ctx context.Context, keys []string) error {
mu.Unlock()
return nil
}
mu.Lock()
if len(versions) == 0 {
// No versions found, delete without version ID
objectsToDelete = append(objectsToDelete, s3types.ObjectIdentifier{
Key: aws.String(key),
})
} else {
// Add each version as a separate object to delete
for _, version := range versions {
objectsToDelete = append(objectsToDelete, s3types.ObjectIdentifier{
Key: aws.String(key),
VersionId: aws.String(version),
})
}
if len(identifiers) == 0 {
// Already absent (e.g. retry of an earlier successful delete).
// Skip — issuing a key-only delete here would create a new delete-marker.
return nil
}
mu.Lock()
objectsToDelete = append(objectsToDelete, identifiers...)
mu.Unlock()
return nil
})
Expand Down Expand Up @@ -593,28 +585,43 @@ func (s *S3) isVersioningEnabled(ctx context.Context) bool {
return output.Status == s3types.BucketVersioningStatusEnabled
}

func (s *S3) getObjectAllVersions(ctx context.Context, key string) ([]string, error) {
// getObjectAllVersions returns ObjectIdentifier entries for every object version
// AND every delete-marker that belongs exactly to the given key.
// Deleting delete-markers by VersionId removes them, preventing accumulation
// of empty delete-markers across retries and prior partial deletions.
func (s *S3) getObjectAllVersions(ctx context.Context, key string) ([]s3types.ObjectIdentifier, error) {
listParams := &s3.ListObjectVersionsInput{
Bucket: aws.String(s.Config.Bucket),
Prefix: aws.String(key),
}
if s.Config.RequestPayer != "" {
listParams.RequestPayer = s3types.RequestPayer(s.Config.RequestPayer)
}
var versions []string
var identifiers []s3types.ObjectIdentifier
pager := s3.NewListObjectVersionsPaginator(s.client, listParams)
for pager.HasMorePages() {
page, err := pager.NextPage(ctx)
if err != nil {
return nil, errors.Wrapf(err, "listing object versions bucket: %s key: %s", s.Config.Bucket, key)
}
for _, version := range page.Versions {
if *version.Key == key {
versions = append(versions, *version.VersionId)
if version.Key != nil && *version.Key == key {
identifiers = append(identifiers, s3types.ObjectIdentifier{
Key: aws.String(key),
VersionId: version.VersionId,
})
}
}
for _, marker := range page.DeleteMarkers {
if marker.Key != nil && *marker.Key == key {
identifiers = append(identifiers, s3types.ObjectIdentifier{
Key: aws.String(key),
VersionId: marker.VersionId,
})
}
}
}
return versions, nil
return identifiers, nil
}

func (s *S3) StatFile(ctx context.Context, key string) (RemoteFile, error) {
Expand Down
66 changes: 63 additions & 3 deletions test/integration/fips_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

func TestFIPS(t *testing.T) {
if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "19.17") <= 0 {
t.Skip("go 1.25 with boringcrypto stop works for 19.17, works only for 20.1+")
t.Skip("go 1.26 with boringcrypto stop works for 19.17, works only for 20.1+")
}
if os.Getenv("QA_AWS_ACCESS_KEY") == "" {
t.Skip("QA_AWS_ACCESS_KEY is empty, TestFIPS will skip")
Expand All @@ -27,12 +27,38 @@ func TestFIPS(t *testing.T) {
env.connectWithWait(t, r, 1*time.Second, 1*time.Second, 1*time.Minute)
fipsBackupName := fmt.Sprintf("fips_backup_%d", rand.Int())
env.DockerExecNoError(r, "clickhouse", "rm", "-fv", "/etc/apt/sources.list.d/clickhouse.list")
env.InstallDebIfNotExists(r, "clickhouse", "ca-certificates", "curl", "gettext-base", "bsdmainutils", "dnsutils", "git")
env.InstallDebIfNotExists(r, "clickhouse", "ca-certificates", "curl", "gettext-base", "binutils", "bsdmainutils", "dnsutils", "git")
env.DockerExecNoError(r, "clickhouse", "update-ca-certificates")
r.NoError(env.DockerCP("configs/config-s3-fips.yml", "clickhouse:/etc/clickhouse-backup/config.yml.fips-template"))
env.DockerExecNoError(r, "clickhouse", "git", "clone", "--depth", "1", "--branch", "v3.2rc3", "https://github.com/drwetter/testssl.sh.git", "/opt/testssl")
env.DockerExecNoError(r, "clickhouse", "chmod", "+x", "/opt/testssl/testssl.sh")

// P0: Verify binary version contains -fips suffix
fipsVersion, err := env.DockerExecOut("clickhouse", "bash", "-ce", "clickhouse-backup-fips --version 2>&1")
r.NoError(err, "unexpected clickhouse-backup-fips --version error: %v", err)
r.Contains(fipsVersion, "FIPS 140-3:\t true", "FIPS binary version should contain 'FIPS 140-3: true' suffix, got: %s", fipsVersion)

// P0: Integrity self-check — binary starts without panic in FIPS mode
fipsSelfCheck, err := env.DockerExecOut("clickhouse", "bash", "-ce", "GODEBUG=fips140=on clickhouse-backup-fips --version 2>&1")
r.NoError(err, "unexpected FIPS self-check error: %v", err)
r.NotContains(fipsSelfCheck, "panic", "FIPS binary should not panic during integrity self-check")

// P0: Verify crypto/fips140.Enabled() reports true via version output
r.Contains(fipsSelfCheck, "FIPS 140-3:\t true", "FIPS 140-3 should be enabled when GODEBUG=fips140=on, got: %s", fipsSelfCheck)

// P1: Verify GODEBUG=fips140=only (strict mode) — non-FIPS crypto returns errors
fipsOnlyCheck, err := env.DockerExecOut("clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips --version 2>&1")
r.NoError(err, "unexpected FIPS only mode error: %v", err)
r.NotContains(fipsOnlyCheck, "panic", "FIPS binary should not panic in fips140=only mode")
r.Contains(fipsOnlyCheck, "FIPS 140-3:\t true", "FIPS 140-3 should be enabled in fips140=only mode, got: %s", fipsOnlyCheck)

// P2: Verify binary contains fips140 symbols
fipsSymbols, err := env.DockerExecOut("clickhouse", "bash", "-ce", "strings /usr/bin/clickhouse-backup-fips | grep -c 'crypto/internal/fips140'")
r.NoError(err, "unexpected strings/grep error: %v", err)
fipsSymbolCount, convErr := strconv.Atoi(strings.TrimSpace(fipsSymbols))
r.NoError(convErr, "unexpected Atoi error for fipsSymbols=%q: %v", fipsSymbols, convErr)
r.Greater(fipsSymbolCount, 0, "binary should contain crypto/internal/fips140 symbols")

generateCerts := func(certType, keyLength, curveType string) {
env.DockerExecNoError(r, "clickhouse", "bash", "-xce", "openssl rand -out /root/.rnd 2048")
switch certType {
Expand Down Expand Up @@ -62,7 +88,26 @@ func TestFIPS(t *testing.T) {

log.Debug().Msg("Run `clickhouse-backup-fips server` in background")
env.DockerExecBackgroundNoError(r, "clickhouse", "bash", "-ce", "AWS_USE_FIPS_ENDPOINT=true clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml server &>>/tmp/clickhouse-backup-server-fips.log")
time.Sleep(1 * time.Second)

// Wait until the API server is fully ready: TLS listener on :7172 is bound AND
// `system.backup_actions` integration table has been registered. The server is
// HTTPS with required client cert auth, so we only probe TCP reachability (a
// successful TCP connect proves ListenAndServeTLS has started). Without this
// probe, the client `INSERT INTO system.backup_actions` below races server
// startup and fails with `Code 60: UNKNOWN_TABLE` — table creation runs before
// the listener is bound.
fipsReadyDeadline := time.Now().Add(30 * time.Second)
for {
tcpOut, _ := env.DockerExecOut("clickhouse", "bash", "-ce", "if timeout 2 bash -c '</dev/tcp/localhost/7172' 2>/dev/null; then echo open; else echo closed; fi")
tblOut, _ := env.DockerExecOut("clickhouse", "bash", "-ce", "clickhouse client -q 'EXISTS TABLE system.backup_actions' 2>/dev/null || true")
if strings.TrimSpace(tcpOut) == "open" && strings.TrimSpace(tblOut) == "1" {
break
}
if time.Now().After(fipsReadyDeadline) {
r.FailNow("clickhouse-backup-fips server did not become ready in 30s", "tcp=%q table_exists=%q", tcpOut, tblOut)
}
time.Sleep(200 * time.Millisecond)
}

runClickHouseClientInsertSystemBackupActions(r, env, []string{fmt.Sprintf("create_remote --tables="+t.Name()+".fips_table %s", fipsBackupName)}, true)
runClickHouseClientInsertSystemBackupActions(r, env, []string{fmt.Sprintf("delete local %s", fipsBackupName)}, false)
Expand Down Expand Up @@ -93,6 +138,12 @@ func TestFIPS(t *testing.T) {
r.NoError(err, "%s\nunexpected grep testssl.csv error: %v", out, err)
r.Equal(strconv.Itoa(len(cipherList)), strings.Trim(out, " \t\r\n"))

// P1: Negative test — non-FIPS ciphers (RC4, DES, 3DES, CHACHA, NULL) should NOT be offered
nonFipsOut, nonFipsErr := env.DockerExecOut("clickhouse", "bash", "-ce", "grep -v 'not offered' /tmp/testssl.csv | grep -c -i -E '(RC4|TRIPLEDES|DES-CBC|CHACHA|NULL)' || true")
r.NoError(nonFipsErr, "%s\nunexpected grep non-FIPS ciphers error: %v", nonFipsOut, nonFipsErr)
nonFipsCount, _ := strconv.Atoi(strings.TrimSpace(nonFipsOut))
r.Equal(0, nonFipsCount, "non-FIPS ciphers (RC4/DES/3DES/CHACHA/NULL) should not be offered by FIPS server, found %d matches", nonFipsCount)

inProgressActions := make([]struct {
Command string `ch:"command"`
Status string `ch:"status"`
Expand All @@ -104,6 +155,15 @@ func TestFIPS(t *testing.T) {
r.Equal(0, len(inProgressActions), "inProgressActions=%+v", inProgressActions)
env.DockerExecNoError(r, "clickhouse", "pkill", "-n", "-f", "clickhouse-backup-fips")
}
// P1: Test create_remote + restore_remote in strict GODEBUG=fips140=only mode
// @todo think about FIPS clickhouse-server, which not supported by GODEBUG=fips140=only
// fipsOnlyBackupName := fmt.Sprintf("fips_only_backup_%d", rand.Int())
//env.DockerExecNoError(r, "clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml create_remote --tables="+t.Name()+".fips_table "+fipsOnlyBackupName)
//env.DockerExecNoError(r, "clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml delete local "+fipsOnlyBackupName)
//env.DockerExecNoError(r, "clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml restore_remote --tables="+t.Name()+".fips_table "+fipsOnlyBackupName)
//env.DockerExecNoError(r, "clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml delete local "+fipsOnlyBackupName)
//env.DockerExecNoError(r, "clickhouse", "bash", "-ce", "GODEBUG=fips140=only clickhouse-backup-fips -c /etc/clickhouse-backup/config-s3-fips.yml delete remote "+fipsOnlyBackupName)

// https://www.perplexity.ai/search/0920f1e8-59ec-4e14-b779-ba7b2e037196
testTLSCerts("rsa", "4096", "", "ECDHE-RSA-AES128-GCM-SHA256", "ECDHE-RSA-AES256-GCM-SHA384", "AES_128_GCM_SHA256", "AES_256_GCM_SHA384")
testTLSCerts("ecdsa", "", "prime256v1", "ECDHE-ECDSA-AES128-GCM-SHA256", "ECDHE-ECDSA-AES256-GCM-SHA384")
Expand Down