Skip to content
3 changes: 3 additions & 0 deletions api/v2/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ func (c *ReplicaConfig) toInternalReplicaConfigWithOriginConfig(
FileCleanupCronSpec: c.Sink.CloudStorageConfig.FileCleanupCronSpec,
FlushConcurrency: c.Sink.CloudStorageConfig.FlushConcurrency,
OutputRawChangeEvent: c.Sink.CloudStorageConfig.OutputRawChangeEvent,
UseTableIDAsPath: c.Sink.CloudStorageConfig.UseTableIDAsPath,
}
}
var debeziumConfig *config.DebeziumConfig
Expand Down Expand Up @@ -861,6 +862,7 @@ func ToAPIReplicaConfig(c *config.ReplicaConfig) *ReplicaConfig {
FileCleanupCronSpec: cloned.Sink.CloudStorageConfig.FileCleanupCronSpec,
FlushConcurrency: cloned.Sink.CloudStorageConfig.FlushConcurrency,
OutputRawChangeEvent: cloned.Sink.CloudStorageConfig.OutputRawChangeEvent,
UseTableIDAsPath: cloned.Sink.CloudStorageConfig.UseTableIDAsPath,
}
}
var debeziumConfig *DebeziumConfig
Expand Down Expand Up @@ -1495,6 +1497,7 @@ type CloudStorageConfig struct {
FileCleanupCronSpec *string `json:"file_cleanup_cron_spec,omitempty"`
FlushConcurrency *int `json:"flush_concurrency,omitempty"`
OutputRawChangeEvent *bool `json:"output_raw_change_event,omitempty"`
UseTableIDAsPath *bool `json:"use_table_id_as_path,omitempty"`
}

// ChangefeedStatus holds common information of a changefeed in cdc
Expand Down
7 changes: 7 additions & 0 deletions api/v2/model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ func TestReplicaConfigConversion(t *testing.T) {
EnableSyncPoint: util.AddressOf(true),
EnableTableMonitor: util.AddressOf(true),
BDRMode: util.AddressOf(true),
Sink: &SinkConfig{
CloudStorageConfig: &CloudStorageConfig{
UseTableIDAsPath: util.AddressOf(true),
},
},
Mounter: &MounterConfig{
WorkerNum: util.AddressOf(16),
},
Expand Down Expand Up @@ -61,6 +66,7 @@ func TestReplicaConfigConversion(t *testing.T) {
require.True(t, util.GetOrZero(internalCfg.EnableSyncPoint))
require.True(t, util.GetOrZero(internalCfg.EnableTableMonitor))
require.True(t, util.GetOrZero(internalCfg.BDRMode))
require.True(t, util.GetOrZero(internalCfg.Sink.CloudStorageConfig.UseTableIDAsPath))
require.Equal(t, internalCfg.Mounter.WorkerNum, *apiCfg.Mounter.WorkerNum)
require.True(t, util.GetOrZero(internalCfg.Scheduler.EnableTableAcrossNodes))
require.Equal(t, 1000, util.GetOrZero(internalCfg.Scheduler.RegionThreshold))
Expand All @@ -85,6 +91,7 @@ func TestReplicaConfigConversion(t *testing.T) {
require.True(t, *apiCfgBack.CaseSensitive)
require.True(t, *apiCfgBack.ForceReplicate)
require.True(t, *apiCfgBack.IgnoreIneligibleTable)
require.True(t, *apiCfgBack.Sink.CloudStorageConfig.UseTableIDAsPath)
require.Equal(t, 16, *apiCfgBack.Mounter.WorkerNum)
require.True(t, *apiCfgBack.Scheduler.EnableTableAcrossNodes)
require.Equal(t, "correctness", *apiCfgBack.Integrity.IntegrityCheckLevel)
Expand Down
21 changes: 18 additions & 3 deletions downstreamadapter/sink/cloudstorage/sink.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,12 @@ func (s *sink) writeDDLEvent(event *commonEvent.DDLEvent) error {
// For exchange partition, we need to write the schema of the source table.
// write the previous table first
if event.GetDDLType() == model.ActionExchangeTablePartition {
if len(event.MultipleTableInfos) < 2 || event.MultipleTableInfos[1] == nil {
return errors.ErrInternalCheckFailed.GenWithStackByArgs(
"invalid exchange partition ddl event, source table info is missing")
}
sourceTableInfo := event.MultipleTableInfos[1]

var def cloudstorage.TableDefinition
def.FromTableInfo(event.ExtraSchemaName, event.ExtraTableName, event.TableInfo, event.FinishedTs, s.cfg.OutputColumnID)
def.Query = event.Query
Expand All @@ -210,8 +216,10 @@ func (s *sink) writeDDLEvent(event *commonEvent.DDLEvent) error {
return err
}
var sourceTableDef cloudstorage.TableDefinition
sourceTableDef.FromTableInfo(event.SchemaName, event.TableName, event.MultipleTableInfos[1], event.FinishedTs, s.cfg.OutputColumnID)
if err := s.writeFile(event, sourceTableDef); err != nil {
sourceTableDef.FromTableInfo(event.SchemaName, event.TableName, sourceTableInfo, event.FinishedTs, s.cfg.OutputColumnID)
sourceEvent := *event
sourceEvent.TableInfo = sourceTableInfo
if err := s.writeFile(&sourceEvent, sourceTableDef); err != nil {
return err
}
} else {
Expand All @@ -236,12 +244,19 @@ func (s *sink) writeDDLEvent(event *commonEvent.DDLEvent) error {
}

func (s *sink) writeFile(v *commonEvent.DDLEvent, def cloudstorage.TableDefinition) error {
// skip write database-level event for 'use-table-id-as-path' mode
if s.cfg.UseTableIDAsPath && def.Table == "" {
log.Debug("skip database schema for table id path",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will lead to the database level DDL not being replicated to the downstream

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is a breaking change

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason for this design is:

  • Database events are not processed by TCI;
  • Retaining database events would create a database directory and schema files, increasing the workload of subsequent GC S3 directory creation.

Note: The 'use-table-id-as-path' configuration option ONLY applies to TICI.

zap.String("schema", def.Schema),
zap.String("query", def.Query))
return nil
}
encodedDef, err := def.MarshalWithQuery()
if err != nil {
return errors.Trace(err)
}

path, err := def.GenerateSchemaFilePath()
path, err := def.GenerateSchemaFilePath(s.cfg.UseTableIDAsPath, v.GetTableID())
if err != nil {
return errors.Trace(err)
}
Expand Down
155 changes: 155 additions & 0 deletions downstreamadapter/sink/cloudstorage/sink_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,161 @@ func verifyWriteDDLEventFlushDMLBeforeBlock(t *testing.T) {
require.NoError(t, err)
}

func TestWriteDDLEventWithTableIDAsPath(t *testing.T) {
parentDir := t.TempDir()
uri := fmt.Sprintf("file:///%s?protocol=csv&use-table-id-as-path=true", parentDir)
sinkURI, err := url.Parse(uri)
require.NoError(t, err)

replicaConfig := config.GetDefaultReplicaConfig()
err = replicaConfig.ValidateAndAdjust(sinkURI)
require.NoError(t, err)

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

mockPDClock := pdutil.NewClock4Test()
appcontext.SetService(appcontext.DefaultPDClock, mockPDClock)

cloudStorageSink, err := newSinkForTest(ctx, replicaConfig, sinkURI, nil)
require.NoError(t, err)

go cloudStorageSink.Run(ctx)

tableInfo := common.WrapTableInfo("test", &timodel.TableInfo{
ID: 20,
Name: ast.NewCIStr("table1"),
Columns: []*timodel.ColumnInfo{
{
Name: ast.NewCIStr("col1"),
FieldType: *types.NewFieldType(mysql.TypeLong),
},
{
Name: ast.NewCIStr("col2"),
FieldType: *types.NewFieldType(mysql.TypeVarchar),
},
},
})
ddlEvent := &commonEvent.DDLEvent{
Query: "alter table test.table1 add col2 varchar(64)",
Type: byte(timodel.ActionAddColumn),
SchemaName: "test",
TableName: "table1",
FinishedTs: 100,
TableInfo: tableInfo,
}

err = cloudStorageSink.WriteBlockEvent(ddlEvent)
require.NoError(t, err)

tableDir := path.Join(parentDir, "20/meta/")
tableSchema, err := os.ReadFile(path.Join(tableDir, "schema_100_4192708364.json"))
require.NoError(t, err)
require.Contains(t, string(tableSchema), `"Table": "table1"`)
}

func TestSkipDatabaseSchemaWithTableIDAsPath(t *testing.T) {
parentDir := t.TempDir()
uri := fmt.Sprintf("file:///%s?protocol=csv&use-table-id-as-path=true", parentDir)
sinkURI, err := url.Parse(uri)
require.NoError(t, err)

replicaConfig := config.GetDefaultReplicaConfig()
err = replicaConfig.ValidateAndAdjust(sinkURI)
require.NoError(t, err)

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

mockPDClock := pdutil.NewClock4Test()
appcontext.SetService(appcontext.DefaultPDClock, mockPDClock)

cloudStorageSink, err := newSinkForTest(ctx, replicaConfig, sinkURI, nil)
require.NoError(t, err)

go cloudStorageSink.Run(ctx)

ddlEvent := &commonEvent.DDLEvent{
Query: "create database test_db",
Type: byte(timodel.ActionCreateSchema),
SchemaName: "test_db",
TableName: "",
FinishedTs: 100,
TableInfo: nil,
}

err = cloudStorageSink.WriteBlockEvent(ddlEvent)
require.NoError(t, err)

_, err = os.Stat(path.Join(parentDir, "test_db"))
require.Error(t, err)
require.True(t, os.IsNotExist(err))
}

func TestWriteDDLEventWithInvalidExchangePartitionEvent(t *testing.T) {
testCases := []struct {
name string
multipleTableInfos []*common.TableInfo
}{
{
name: "nil source table info",
multipleTableInfos: []*common.TableInfo{nil},
},
{
name: "short table infos",
multipleTableInfos: nil,
},
}

parentDir := t.TempDir()
uri := fmt.Sprintf("file:///%s?protocol=csv&use-table-id-as-path=true", parentDir)
sinkURI, err := url.Parse(uri)
require.NoError(t, err)

replicaConfig := config.GetDefaultReplicaConfig()
err = replicaConfig.ValidateAndAdjust(sinkURI)
require.NoError(t, err)

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

mockPDClock := pdutil.NewClock4Test()
appcontext.SetService(appcontext.DefaultPDClock, mockPDClock)

cloudStorageSink, err := newSinkForTest(ctx, replicaConfig, sinkURI, nil)
require.NoError(t, err)

tableInfo := common.WrapTableInfo("test", &timodel.TableInfo{
ID: 20,
Name: ast.NewCIStr("table1"),
Columns: []*timodel.ColumnInfo{
{
Name: ast.NewCIStr("col1"),
FieldType: *types.NewFieldType(mysql.TypeLong),
},
},
})

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ddlEvent := &commonEvent.DDLEvent{
Query: "alter table test.table1 exchange partition p0 with table test.table2",
Type: byte(timodel.ActionExchangeTablePartition),
SchemaName: "test",
TableName: "table1",
ExtraSchemaName: "test",
ExtraTableName: "table2",
FinishedTs: 100,
TableInfo: tableInfo,
}
ddlEvent.MultipleTableInfos = append([]*common.TableInfo{tableInfo}, tc.multipleTableInfos...)

err = cloudStorageSink.WriteBlockEvent(ddlEvent)
require.ErrorContains(t, err, "invalid exchange partition ddl event, source table info is missing")
})
}
}

func TestWriteCheckpointEvent(t *testing.T) {
parentDir := t.TempDir()
uri := fmt.Sprintf("file:///%s?protocol=csv", parentDir)
Expand Down
10 changes: 9 additions & 1 deletion downstreamadapter/sink/cloudstorage/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,15 @@ func (d *writer) flushMessages(ctx context.Context) error {
zap.Error(err))
return err
}
indexFilePath := d.filePathGenerator.GenerateIndexFilePath(table, date)
indexFilePath, err := d.filePathGenerator.GenerateIndexFilePath(table, date)
if err != nil {
log.Error("failed to generate index file path",
zap.Int("shardID", d.shardID),
zap.String("keyspace", d.changeFeedID.Keyspace()),
zap.Stringer("changefeed", d.changeFeedID.ID()),
zap.Error(err))
return errors.Trace(err)
}

if err := d.writeDataFile(ctx, dataFilePath, indexFilePath, singleTask); err != nil {
log.Error("failed to write data file to external storage",
Expand Down
73 changes: 71 additions & 2 deletions pkg/config/sink.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ const (

// TxnAtomicityKey specifies the key of the transaction-atomicity in the SinkURI.
TxnAtomicityKey = "transaction-atomicity"
// UseTableIDAsPathKey specifies the key of the use-table-id-as-path in the SinkURI.
UseTableIDAsPathKey = "use-table-id-as-path"
// defaultTxnAtomicity is the default atomicity level.
defaultTxnAtomicity = noneTxnAtomicity
// unknownTxnAtomicity is an invalid atomicity level and will be treated as
Expand Down Expand Up @@ -698,6 +700,8 @@ type CloudStorageConfig struct {
FileExpirationDays *int `toml:"file-expiration-days" json:"file-expiration-days,omitempty"`
FileCleanupCronSpec *string `toml:"file-cleanup-cron-spec" json:"file-cleanup-cron-spec,omitempty"`
FlushConcurrency *int `toml:"flush-concurrency" json:"flush-concurrency,omitempty"`
// UseTableIDAsPath is only available when the downstream is Storage (TiCI only).
UseTableIDAsPath *bool `toml:"use-table-id-as-path" json:"use-table-id-as-path,omitempty"`

// OutputRawChangeEvent controls whether to split the update pk/uk events.
OutputRawChangeEvent *bool `toml:"output-raw-change-event" json:"output-raw-change-event,omitempty"`
Expand All @@ -711,6 +715,27 @@ func (c *CloudStorageConfig) GetOutputRawChangeEvent() bool {
return *c.OutputRawChangeEvent
}

// CheckUseTableIDAsPathCompatibility checks the compatibility between sink config and sink URI.
func CheckUseTableIDAsPathCompatibility(
sinkConfig *SinkConfig,
useTableIDAsPathFromURI *bool,
) error {
if sinkConfig == nil ||
sinkConfig.CloudStorageConfig == nil ||
sinkConfig.CloudStorageConfig.UseTableIDAsPath == nil ||
useTableIDAsPathFromURI == nil {
return nil
}
useTableIDAsPathFromConfig := sinkConfig.CloudStorageConfig.UseTableIDAsPath
if util.GetOrZero(useTableIDAsPathFromConfig) == util.GetOrZero(useTableIDAsPathFromURI) {
return nil
}
return cerror.ErrIncompatibleSinkConfig.GenWithStackByArgs(
fmt.Sprintf("%s=%t", UseTableIDAsPathKey, util.GetOrZero(useTableIDAsPathFromURI)),
fmt.Sprintf("%s=%t", UseTableIDAsPathKey, util.GetOrZero(useTableIDAsPathFromConfig)),
)
}

func (s *SinkConfig) validateAndAdjust(sinkURI *url.URL) error {
if err := s.validateAndAdjustSinkURI(sinkURI); err != nil {
return err
Expand Down Expand Up @@ -977,15 +1002,59 @@ func (s *SinkConfig) CheckCompatibilityWithSinkURI(
return cerror.WrapError(cerror.ErrSinkURIInvalid, err)
}

var useTableIDAsPathFromURI *bool
if IsStorageScheme(sinkURI.Scheme) {
useTableIDAsPathValue := sinkURI.Query().Get(UseTableIDAsPathKey)
if useTableIDAsPathValue != "" {
enabled, parseErr := strconv.ParseBool(useTableIDAsPathValue)
if parseErr != nil {
return cerror.WrapError(cerror.ErrSinkURIInvalid, parseErr)
}
useTableIDAsPathFromURI = util.AddressOf(enabled)
}
}

getUseTableIDAsPath := func(cfg *SinkConfig) *bool {
if cfg == nil || cfg.CloudStorageConfig == nil {
return nil
}
return cfg.CloudStorageConfig.UseTableIDAsPath
}

useTableIDAsPathChanged := func() bool {
newVal := getUseTableIDAsPath(s)
oldVal := getUseTableIDAsPath(oldSinkConfig)
if newVal == nil && oldVal == nil {
return false
}
if newVal == nil || oldVal == nil {
return true
}
return *newVal != *oldVal
}

cfgParamsChanged := s.Protocol != oldSinkConfig.Protocol ||
s.TxnAtomicity != oldSinkConfig.TxnAtomicity
s.TxnAtomicity != oldSinkConfig.TxnAtomicity ||
useTableIDAsPathChanged()

isURIParamsChanged := func(oldCfg SinkConfig) bool {
err := oldCfg.applyParameterBySinkURI(sinkURI)
return cerror.ErrIncompatibleSinkConfig.Equal(err)
if cerror.ErrIncompatibleSinkConfig.Equal(err) {
return true
}
if useTableIDAsPathFromURI == nil {
return false
}
return CheckUseTableIDAsPathCompatibility(&oldCfg, useTableIDAsPathFromURI) != nil
}
uriParamsChanged := isURIParamsChanged(*oldSinkConfig)

if !uriParamsChanged && IsStorageScheme(sinkURI.Scheme) {
if err := CheckUseTableIDAsPathCompatibility(s, useTableIDAsPathFromURI); err != nil {
return err
}
}

if !uriParamsChanged && !cfgParamsChanged {
return nil
}
Expand Down
Loading
Loading