From 4985cf32ae93f62861b1a10a6b00e9a72c134be8 Mon Sep 17 00:00:00 2001 From: Max Zanko Date: Wed, 4 Feb 2026 17:17:03 -0800 Subject: [PATCH] Replace hardcoded use of MD5 in sync-diff-inspector with configurable option to support FIPS-compliant environments. (#6) Context: TiDB FIPS build mode introduced in 7.6.0: https://github.com/pingcap/tidb/pull/47949. TiDB binaries built with FIPS 140-3 compliance mode disable MD5 hashing in OpenSSL library used by TiKV. Problem: sync-diff-inspector relies on hardcoded MD5() for chunk checksumming. For performance reasons, TiDB may push expression evaluation down to TiKV coprocessor (tidb_query_expr), which uses OpenSSL for cryptographic functions. In FIPS mode, TiKV's OpenSSL inner_evp_generic_fetch() tries to load MD5 algorithm and fails with error code 50856204 (EVP_R_UNSUPPORTED). As a result, sync-diff-inspector fails because TiDB rejected all MD5-based checksum queries due to OpenSSL security policy restrictions. Changes: - Added a new `checksum-algorithm` configuration flag: - Supported options: md5" and "sha256" hash functions for checksumming - Default: md5 for backwards compatibility. --- sync_diff_inspector/config/config.go | 56 +++++++++++++++++------ sync_diff_inspector/config/config_test.go | 27 +++++++++-- sync_diff_inspector/source/mysql_shard.go | 8 +++- sync_diff_inspector/source/tidb.go | 6 ++- sync_diff_inspector/utils/utils.go | 32 +++++++++---- sync_diff_inspector/utils/utils_test.go | 25 +++++++++- 6 files changed, 121 insertions(+), 33 deletions(-) diff --git a/sync_diff_inspector/config/config.go b/sync_diff_inspector/config/config.go index 6c5c0dd48..ea49bc99a 100644 --- a/sync_diff_inspector/config/config.go +++ b/sync_diff_inspector/config/config.go @@ -54,6 +54,16 @@ const ( UnifiedTimeZone string = "+0:00" ) +// ChecksumAlgorithm specifies the hash function to use for chunk checksumming. +type ChecksumAlgorithm string + +const ( + // MD5 uses MD5 hash function (default for backwards compatibility) + MD5 ChecksumAlgorithm = "md5" + // SHA256 uses SHA256 hash function (for FIPS-compliant environments) + SHA256 ChecksumAlgorithm = "sha256" +) + // TableConfig is the config of table. type TableConfig struct { // table's filter to tell us which table should adapt to this config. @@ -124,6 +134,8 @@ type DataSource struct { Conn *sql.DB SessionConfig SessionConfig `toml:"session" json:"session"` + + ChecksumAlgorithm ChecksumAlgorithm `toml:"checksum-algorithm" json:"checksum-algorithm"` } // IsAutoSnapshot returns true if the tidb_snapshot is expected to automatically @@ -393,6 +405,10 @@ type Config struct { DMAddr string `toml:"dm-addr" json:"dm-addr"` // DMTask string `toml:"dm-task" json:"dm-task"` DMTask string `toml:"dm-task" json:"dm-task"` + // ChecksumAlgorithm specifies the hash function to use for chunk checksumming. + // Options: MD5 or SHA256. Default: MD5 (for backwards compatibility) + // Set to SHA256 for FIPS-compliant environments. + ChecksumAlgorithm ChecksumAlgorithm `toml:"checksum-algorithm" json:"checksum-algorithm"` DataSources map[string]*DataSource `toml:"data-sources" json:"data-sources"` @@ -428,8 +444,10 @@ func NewConfig() *Config { fs.BoolVar(&cfg.CheckStructOnly, "check-struct-only", false, "ignore check table's data") fs.BoolVar(&cfg.SkipNonExistingTable, "skip-non-existing-table", false, "skip validation for tables that don't exist upstream or downstream") fs.BoolVar(&cfg.CheckDataOnly, "check-data-only", false, "ignore check table's struct") + fs.StringVar((*string)(&cfg.ChecksumAlgorithm), "checksum-algorithm", string(MD5), "checksum function: md5, sha256") _ = fs.MarkHidden("check-data-only") + _ = fs.MarkHidden("checksum-algorithm") fs.SortFlags = false return cfg @@ -531,12 +549,13 @@ func (c *Config) adjustConfigByDMSubTasks() (err error) { } dataSources := make(map[string]*DataSource) dataSources["target"] = &DataSource{ - Host: subTaskCfgs[0].To.Host, - Port: subTaskCfgs[0].To.Port, - User: subTaskCfgs[0].To.User, - Password: utils.SecretString(subTaskCfgs[0].To.Password), - SqlMode: sqlMode, - Security: parseTLSFromDMConfig(subTaskCfgs[0].To.Security), + Host: subTaskCfgs[0].To.Host, + Port: subTaskCfgs[0].To.Port, + User: subTaskCfgs[0].To.User, + Password: utils.SecretString(subTaskCfgs[0].To.Password), + SqlMode: sqlMode, + Security: parseTLSFromDMConfig(subTaskCfgs[0].To.Security), + ChecksumAlgorithm: c.ChecksumAlgorithm, } for _, subTaskCfg := range subTaskCfgs { tableRouter, err := router.NewTableRouter(subTaskCfg.CaseSensitive, []*router.TableRule{}) @@ -552,15 +571,15 @@ func (c *Config) adjustConfigByDMSubTasks() (err error) { routeTargetSet[dbutil.TableName(rule.TargetSchema, rule.TargetTable)] = struct{}{} } dataSources[subTaskCfg.SourceID] = &DataSource{ - Host: subTaskCfg.From.Host, - Port: subTaskCfg.From.Port, - User: subTaskCfg.From.User, - Password: utils.SecretString(subTaskCfg.From.Password), - SqlMode: sqlMode, - Security: parseTLSFromDMConfig(subTaskCfg.From.Security), - Router: tableRouter, - - RouteTargetSet: routeTargetSet, + Host: subTaskCfg.From.Host, + Port: subTaskCfg.From.Port, + User: subTaskCfg.From.User, + Password: utils.SecretString(subTaskCfg.From.Password), + SqlMode: sqlMode, + Security: parseTLSFromDMConfig(subTaskCfg.From.Security), + Router: tableRouter, + RouteTargetSet: routeTargetSet, + ChecksumAlgorithm: c.ChecksumAlgorithm, } } c.DataSources = dataSources @@ -575,6 +594,12 @@ func (c *Config) adjustConfigByDMSubTasks() (err error) { } func (c *Config) Init() (err error) { + checksumAlgo := ChecksumAlgorithm(strings.ToLower(string(c.ChecksumAlgorithm))) + if checksumAlgo != MD5 && checksumAlgo != SHA256 { + return errors.Errorf("checksum-algorithm must be 'md5' or 'sha256', got: %s", c.ChecksumAlgorithm) + } + c.ChecksumAlgorithm = checksumAlgo + if len(c.DMAddr) > 0 { err := c.adjustConfigByDMSubTasks() if err != nil { @@ -587,6 +612,7 @@ func (c *Config) Init() (err error) { return nil } for _, d := range c.DataSources { + d.ChecksumAlgorithm = c.ChecksumAlgorithm routeRuleList := make([]*router.TableRule, 0, len(c.Routes)) d.RouteTargetSet = make(map[string]struct{}) // if we had rules diff --git a/sync_diff_inspector/config/config_test.go b/sync_diff_inspector/config/config_test.go index 331c90fe2..a4b584b84 100644 --- a/sync_diff_inspector/config/config_test.go +++ b/sync_diff_inspector/config/config_test.go @@ -50,10 +50,10 @@ func TestParseConfig(t *testing.T) { // we might not use the same config to run this test. e.g. MYSQL_PORT can be 4000 require.JSONEq(t, cfg.String(), - "{\"check-thread-count\":4,\"split-thread-count\":5,\"export-fix-sql\":true,\"check-struct-only\":false,\"dm-addr\":\"\",\"dm-task\":\"\",\"data-sources\":{\"mysql1\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null},\"mysql2\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null},\"mysql3\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule3\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null},\"tidb0\":{\"host\":\"127.0.0.1\",\"port\":4000,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":null,\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":{\"max_execution_time\":86400,\"tidb_opt_prefer_range_scan\":\"ON\"}}},\"routes\":{\"rule1\":{\"schema-pattern\":\"test_*\",\"table-pattern\":\"t_*\",\"target-schema\":\"test\",\"target-table\":\"t\"},\"rule2\":{\"schema-pattern\":\"test2_*\",\"table-pattern\":\"t2_*\",\"target-schema\":\"test2\",\"target-table\":\"t2\"},\"rule3\":{\"schema-pattern\":\"test2_*\",\"table-pattern\":\"t2_*\",\"target-schema\":\"test\",\"target-table\":\"t\"}},\"table-configs\":{\"config1\":{\"target-tables\":[\"schema*.table*\",\"test2.t2\"],\"Schema\":\"\",\"Table\":\"\",\"ConfigIndex\":0,\"HasMatched\":false,\"IgnoreColumns\":[\"\",\"\"],\"Fields\":[\"\"],\"Range\":\"age \\u003e 10 AND age \\u003c 20\",\"TargetTableInfo\":null,\"Collation\":\"\",\"chunk-size\":0}},\"task\":{\"source-instances\":[\"mysql1\",\"mysql2\",\"mysql3\"],\"source-routes\":null,\"target-instance\":\"tidb0\",\"target-check-tables\":[\"schema*.table*\",\"!c.*\",\"test2.t2\"],\"target-configs\":[\"config1\"],\"output-dir\":\"/tmp/output/config\",\"SourceInstances\":[{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null},{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null},{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule3\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null}],\"TargetInstance\":{\"host\":\"127.0.0.1\",\"port\":4000,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":null,\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":{\"max_execution_time\":86400,\"tidb_opt_prefer_range_scan\":\"ON\"}},\"TargetTableConfigs\":[{\"target-tables\":[\"schema*.table*\",\"test2.t2\"],\"Schema\":\"\",\"Table\":\"\",\"ConfigIndex\":0,\"HasMatched\":false,\"IgnoreColumns\":[\"\",\"\"],\"Fields\":[\"\"],\"Range\":\"age \\u003e 10 AND age \\u003c 20\",\"TargetTableInfo\":null,\"Collation\":\"\",\"chunk-size\":0}],\"TargetCheckTables\":[{},{},{}],\"FixDir\":\"/tmp/output/config/fix-on-tidb0\",\"CheckpointDir\":\"/tmp/output/config/checkpoint\",\"HashFile\":\"\"},\"ConfigFile\":\"config_sharding.toml\",\"PrintVersion\":false}") + "{\"check-thread-count\":4,\"split-thread-count\":5,\"export-fix-sql\":true,\"check-struct-only\":false,\"dm-addr\":\"\",\"dm-task\":\"\",\"checksum-algorithm\":\"md5\",\"data-sources\":{\"mysql1\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null,\"checksum-algorithm\":\"md5\"},\"mysql2\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null,\"checksum-algorithm\":\"md5\"},\"mysql3\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule3\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null,\"checksum-algorithm\":\"md5\"},\"tidb0\":{\"host\":\"127.0.0.1\",\"port\":4000,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":null,\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":{\"max_execution_time\":86400,\"tidb_opt_prefer_range_scan\":\"ON\"},\"checksum-algorithm\":\"md5\"}},\"routes\":{\"rule1\":{\"schema-pattern\":\"test_*\",\"table-pattern\":\"t_*\",\"target-schema\":\"test\",\"target-table\":\"t\"},\"rule2\":{\"schema-pattern\":\"test2_*\",\"table-pattern\":\"t2_*\",\"target-schema\":\"test2\",\"target-table\":\"t2\"},\"rule3\":{\"schema-pattern\":\"test2_*\",\"table-pattern\":\"t2_*\",\"target-schema\":\"test\",\"target-table\":\"t\"}},\"table-configs\":{\"config1\":{\"target-tables\":[\"schema*.table*\",\"test2.t2\"],\"Schema\":\"\",\"Table\":\"\",\"ConfigIndex\":0,\"HasMatched\":false,\"IgnoreColumns\":[\"\",\"\"],\"Fields\":[\"\"],\"Range\":\"age \\u003e 10 AND age \\u003c 20\",\"TargetTableInfo\":null,\"Collation\":\"\",\"chunk-size\":0}},\"task\":{\"source-instances\":[\"mysql1\",\"mysql2\",\"mysql3\"],\"source-routes\":null,\"target-instance\":\"tidb0\",\"target-check-tables\":[\"schema*.table*\",\"!c.*\",\"test2.t2\"],\"target-configs\":[\"config1\"],\"output-dir\":\"/tmp/output/config\",\"SourceInstances\":[{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null,\"checksum-algorithm\":\"md5\"},{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null,\"checksum-algorithm\":\"md5\"},{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule3\"],\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":null,\"checksum-algorithm\":\"md5\"}],\"TargetInstance\":{\"host\":\"127.0.0.1\",\"port\":4000,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"sql-hint-use-index\":\"\",\"security\":null,\"route-rules\":null,\"Router\":{\"Selector\":{}},\"Conn\":null,\"session\":{\"max_execution_time\":86400,\"tidb_opt_prefer_range_scan\":\"ON\"},\"checksum-algorithm\":\"md5\"},\"TargetTableConfigs\":[{\"target-tables\":[\"schema*.table*\",\"test2.t2\"],\"Schema\":\"\",\"Table\":\"\",\"ConfigIndex\":0,\"HasMatched\":false,\"IgnoreColumns\":[\"\",\"\"],\"Fields\":[\"\"],\"Range\":\"age \\u003e 10 AND age \\u003c 20\",\"TargetTableInfo\":null,\"Collation\":\"\",\"chunk-size\":0}],\"TargetCheckTables\":[{},{},{}],\"FixDir\":\"/tmp/output/config/fix-on-tidb0\",\"CheckpointDir\":\"/tmp/output/config/checkpoint\",\"HashFile\":\"\"},\"ConfigFile\":\"config_sharding.toml\",\"PrintVersion\":false}") hash, err := cfg.Task.ComputeConfigHash() require.NoError(t, err) - require.Equal(t, hash, "5a978bf48039d41b81403d635332493f031bb890a6d4e4d7df77f75e0ccc29f3") + require.Equal(t, "09c59e9563f2f03ec970420b9df37bb06b8b7d31228e65ef9bc8f997ce9a9e20", hash) require.True(t, cfg.TableConfigs["config1"].Valid()) @@ -77,12 +77,31 @@ func TestError(t *testing.T) { cfg.CheckThreadCount = 1 require.True(t, cfg.CheckConfig()) - // Init + // Checksum algorithm - invalid + cfg.ChecksumAlgorithm = "invalid" + err := cfg.Init() + require.Contains(t, err.Error(), "checksum-algorithm must be 'md5' or 'sha256'") + + // Valid checksum algorithm - sha256 + cfg.ChecksumAlgorithm = "sha256" + cfg.DataSources = nil + err = cfg.Init() + require.NotContains(t, err.Error(), "checksum-algorithm") + require.Equal(t, SHA256, cfg.ChecksumAlgorithm) + + // Valid checksum algorithm - MD5 + cfg.ChecksumAlgorithm = "MD5" + err = cfg.Init() + require.NotContains(t, err.Error(), "checksum-algorithm") + require.Equal(t, MD5, cfg.ChecksumAlgorithm) // normalized to lowercase + + cfg.ChecksumAlgorithm = MD5 cfg.DataSources = make(map[string]*DataSource) + // Init - invalid route cfg.DataSources["123"] = &DataSource{ RouteRules: []string{"111"}, } - err := cfg.Init() + err = cfg.Init() require.Contains(t, err.Error(), "not found source routes for rule 111, please correct the config") } diff --git a/sync_diff_inspector/source/mysql_shard.go b/sync_diff_inspector/source/mysql_shard.go index 4ae410d83..328ff8aaf 100644 --- a/sync_diff_inspector/source/mysql_shard.go +++ b/sync_diff_inspector/source/mysql_shard.go @@ -62,6 +62,7 @@ type MySQLSources struct { tableDiffs []*common.TableDiff sourceTablesMap map[string][]*common.TableShardSource + checksumAlgorithm config.ChecksumAlgorithm } func getMatchedSourcesForTable(sourceTablesMap map[string][]*common.TableShardSource, table *common.TableDiff) []*common.TableShardSource { @@ -103,7 +104,7 @@ func (s *MySQLSources) GetCountAndMd5(ctx context.Context, tableRange *splitter. for _, ms := range matchSources { go func(ms *common.TableShardSource) { - count, checksum, err := utils.GetCountAndMd5Checksum(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, table.Info, chunk.Where, "", chunk.Args) + count, checksum, err := utils.GetCountAndChecksum(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, table.Info, chunk.Where, "", chunk.Args, string(s.checksumAlgorithm)) infoCh <- &ChecksumInfo{ Checksum: checksum, Count: count, @@ -383,9 +384,14 @@ func NewMySQLSources(ctx context.Context, tableDiffs []*common.TableDiff, ds []* return nil, errors.Annotatef(err, "please make sure the filter is correct.") } + checksumAlgorithm := config.MD5 + if len(ds) > 0 { + checksumAlgorithm = ds[0].ChecksumAlgorithm + } mss := &MySQLSources{ tableDiffs: tableDiffs, sourceTablesMap: sourceTablesMap, + checksumAlgorithm: checksumAlgorithm, } return mss, nil } diff --git a/sync_diff_inspector/source/tidb.go b/sync_diff_inspector/source/tidb.go index 6a091862e..2f1c68a5e 100644 --- a/sync_diff_inspector/source/tidb.go +++ b/sync_diff_inspector/source/tidb.go @@ -87,6 +87,7 @@ type TiDBSource struct { sourceTableMap map[string]*common.TableSource snapshot string sqlHint string + checksumAlgorithm config.ChecksumAlgorithm // bucketSpliterPool is the shared pool to produce chunks using bucket bucketSpliterPool *utils.WorkerPool dbConn *sql.DB @@ -149,9 +150,9 @@ func (s *TiDBSource) GetCountAndMd5(ctx context.Context, tableRange *splitter.Ra } } - count, checksum, err := utils.GetCountAndMd5Checksum( + count, checksum, err := utils.GetCountAndChecksum( ctx, s.dbConn, matchSource.OriginSchema, matchSource.OriginTable, table.Info, - chunk.Where, indexHint, chunk.Args) + chunk.Where, indexHint, chunk.Args, string(s.checksumAlgorithm)) cost := time.Since(beginTime) return &ChecksumInfo{ @@ -301,6 +302,7 @@ func NewTiDBSource(ctx context.Context, tableDiffs []*common.TableDiff, ds *conf bucketSpliterPool: bucketSpliterPool, version: utils.TryToGetVersion(ctx, ds.Conn), sqlHint: ds.SQLHintUseIndex, + checksumAlgorithm: ds.ChecksumAlgorithm, } return ts, nil } diff --git a/sync_diff_inspector/utils/utils.go b/sync_diff_inspector/utils/utils.go index 5587f9b81..668c01a2c 100644 --- a/sync_diff_inspector/utils/utils.go +++ b/sync_diff_inspector/utils/utils.go @@ -779,10 +779,10 @@ func GetTableSize(ctx context.Context, db *sql.DB, schemaName, tableName string) return dataSize.Int64, nil } -// GetCountAndMd5Checksum returns checksum code and count of some data by given condition -func GetCountAndMd5Checksum(ctx context.Context, db *sql.DB, schemaName, tableName string, tbInfo *model.TableInfo, limitRange string, indexHint string, args []any) (int64, uint64, error) { +// GetCountAndChecksum returns checksum code and count of some data by given condition +func GetCountAndChecksum(ctx context.Context, db *sql.DB, schemaName, tableName string, tbInfo *model.TableInfo, limitRange string, indexHint string, args []interface{}, checksumAlgorithm string) (int64, uint64, error) { /* - calculate MD5 checksum and count example: + calculate checksum and count example (MD5): mysql> SELECT COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`)))), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`)))), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM `a`.`t`; +--------+---------------------- | CNT | CHECKSUM | @@ -790,6 +790,9 @@ func GetCountAndMd5Checksum(ctx context.Context, db *sql.DB, schemaName, tableNa | 100000 | 3462532621352132810 | +--------+---------------------- 1 row in set (0.46 sec) + + calculate checksum and count example (SHA256): + mysql> SELECT COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(SHA2(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`))), 256), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(SHA2(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`))), 256), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM `a`.`t`; */ columnNames := make([]string, 0, len(tbInfo.Columns)) columnIsNull := make([]string, 0, len(tbInfo.Columns)) @@ -810,16 +813,27 @@ func GetCountAndMd5Checksum(ctx context.Context, db *sql.DB, schemaName, tableNa columnIsNull = append(columnIsNull, fmt.Sprintf("ISNULL(%s)", name)) } - query := fmt.Sprintf("SELECT %s COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', %s, CONCAT(%s))), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', %s, CONCAT(%s))), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM %s WHERE %s;", - indexHint, - strings.Join(columnNames, ", "), - strings.Join(columnIsNull, ", "), + var checksumFuncTemplate string + if checksumAlgorithm == "sha256" { + checksumFuncTemplate = "SHA2(%s, 256)" + } else { + checksumFuncTemplate = "MD5(%s)" + } + + concatExpr := fmt.Sprintf("CONCAT_WS(',', %s, CONCAT(%s))", strings.Join(columnNames, ", "), - strings.Join(columnIsNull, ", "), + strings.Join(columnIsNull, ", ")) + + checksumExpr := fmt.Sprintf(checksumFuncTemplate, concatExpr) + + query := fmt.Sprintf("SELECT %s COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(%s, 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(%s, 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM %s WHERE %s;", + indexHint, + checksumExpr, + checksumExpr, dbutil.TableName(schemaName, tableName), limitRange, ) - log.Debug("count and checksum", zap.String("sql", query), zap.Reflect("args", args)) + log.Debug("count and checksum", zap.String("sql", query), zap.Reflect("args", args), zap.String("checksum-algorithm", checksumAlgorithm)) var count sql.NullInt64 var checksum uint64 diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go index 96ac58c73..183f0965c 100644 --- a/sync_diff_inspector/utils/utils_test.go +++ b/sync_diff_inspector/utils/utils_test.go @@ -257,7 +257,7 @@ func TestBasicTableUtilOperation(t *testing.T) { require.Equal(t, tableInfo.Indices[0].Columns[1].Offset, 1) } -func TestGetCountAndMd5Checksum(t *testing.T) { +func TestGetCountAndChecksumMD5(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) defer cancel() @@ -271,12 +271,33 @@ func TestGetCountAndMd5Checksum(t *testing.T) { mock.ExpectQuery("SELECT COUNT.*FROM `test_schema`\\.`test_table` WHERE \\[23 45\\].*").WithArgs("123", "234").WillReturnRows(sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456)) - count, checksum, err := GetCountAndMd5Checksum(ctx, conn, "test_schema", "test_table", tableInfo, "[23 45]", "", []interface{}{"123", "234"}) + count, checksum, err := GetCountAndChecksum(ctx, conn, "test_schema", "test_table", tableInfo, "[23 45]", "", []interface{}{"123", "234"}, "md5") require.NoError(t, err) require.Equal(t, count, int64(123)) require.Equal(t, checksum, uint64(0x1c8)) } +func TestGetCountAndChecksumSHA256(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + conn, mock, err := sqlmock.New() + require.NoError(t, err) + defer conn.Close() + + createTableSQL := "create table `test`.`test`(`a` int, `c` float, `b` varchar(10), `d` datetime, primary key(`a`, `b`), key(`c`, `d`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + // Verify that SHA2 is used in the query + mock.ExpectQuery("SELECT COUNT.*SHA2.*FROM `test_schema`\\.`test_table` WHERE \\[23 45\\].*").WithArgs("123", "234").WillReturnRows(sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(456, 789)) + + count, checksum, err := GetCountAndChecksum(ctx, conn, "test_schema", "test_table", tableInfo, "[23 45]", "", []interface{}{"123", "234"}, "sha256") + require.NoError(t, err) + require.Equal(t, count, int64(456)) + require.Equal(t, checksum, uint64(789)) +} + func TestGetApproximateMid(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) defer cancel()