From d6ab6c87cb4170bb4d563cc4a8389977649e8a81 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Mon, 22 Jun 2026 13:39:16 +0000 Subject: [PATCH 01/25] wip --- go/base/context.go | 199 +++++++++++++++++++++++- go/cmd/gh-ost/main.go | 15 +- go/logic/applier.go | 332 +++++++++++++++++++++++++++++++++------- go/logic/hooks.go | 5 + go/logic/inspect.go | 68 +++++++-- go/logic/migrator.go | 343 +++++++++++++++++++++++++++++++++--------- 6 files changed, 817 insertions(+), 145 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index 747d23d01..51698ca4c 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -75,6 +75,106 @@ func NewThrottleCheckResult(throttle bool, reason string, reasonHint ThrottleRea } } +// MoveTable holds the per-table runtime state for a single table within a +// move-tables run. In move-tables mode the surrounding plumbing (one binlog +// stream, one applier connection, one throttler, one hooks executor) stays +// singular, but every migrated table carries its own schema, unique key, +// iteration progress, and counters keyed by table name. +// +// The range/iteration fields are guarded by the per-table rangeMutex. The +// applier-wide "current applied source coordinates" mutex stays single — there +// is one applied stream feeding all tables. +type MoveTable struct { + // Identity. + SourceDatabaseName string + SourceTableName string + TargetDatabaseName string + TargetTableName string + + // CreateTableStatement is the captured `SHOW CREATE TABLE` from the source, + // used to (re)create the table on the target. + CreateTableStatement string + + // Schema, captured from the source (or from the target, on resume). In + // move-tables mode source and target schemas match, so the shared columns are + // identical to the original columns. + OriginalTableColumns *sql.ColumnList + OriginalTableVirtualColumns *sql.ColumnList + OriginalTableUniqueKeys [](*sql.UniqueKey) + UniqueKey *sql.UniqueKey + SharedColumns *sql.ColumnList + MappedSharedColumns *sql.ColumnList + + // RowsEstimate is the estimated row count for this table. + RowsEstimate int64 + + // Iteration / range state. Guarded by rangeMutex (except Iteration, which is + // accessed atomically so status readers don't need the lock). + MigrationRangeMinValues *sql.ColumnValues + MigrationRangeMaxValues *sql.ColumnValues + MigrationIterationRangeMinValues *sql.ColumnValues + MigrationIterationRangeMaxValues *sql.ColumnValues + Iteration int64 + + // LastIterationRange* record the last successfully-copied chunk range, used + // for checkpointing. Guarded by rangeMutex. + LastIterationRangeMinValues *sql.ColumnValues + LastIterationRangeMaxValues *sql.ColumnValues + + // RowsCopied is the number of rows copied for this table (accessed atomically). + RowsCopied int64 + + // rowCopyComplete is set (1) once this table's row copy finishes. The + // on-row-copy-complete hook and the cutover only proceed once every table is + // complete. Accessed atomically. + rowCopyComplete int64 + + // rangeMutex guards this table's range/iteration fields. + rangeMutex sync.Mutex +} + +// GetIteration returns the table's current iteration counter. +func (mt *MoveTable) GetIteration() int64 { + return atomic.LoadInt64(&mt.Iteration) +} + +// IncrementIteration advances the table's iteration counter by one. +func (mt *MoveTable) IncrementIteration() { + atomic.AddInt64(&mt.Iteration, 1) +} + +// SetNextIterationRangeMinValues advances the iteration window: the next chunk's +// min becomes the previous chunk's max (or the table min for the first chunk). +func (mt *MoveTable) SetNextIterationRangeMinValues() { + mt.rangeMutex.Lock() + defer mt.rangeMutex.Unlock() + mt.MigrationIterationRangeMinValues = mt.MigrationIterationRangeMaxValues + if mt.MigrationIterationRangeMinValues == nil { + mt.MigrationIterationRangeMinValues = mt.MigrationRangeMinValues + } +} + +// IsRowCopyComplete reports whether this table has finished its row copy. +func (mt *MoveTable) IsRowCopyComplete() bool { + return atomic.LoadInt64(&mt.rowCopyComplete) > 0 +} + +// SetRowCopyComplete marks this table's row copy as finished. +func (mt *MoveTable) SetRowCopyComplete() { + atomic.StoreInt64(&mt.rowCopyComplete, 1) +} + +// RecordLastIterationRange stores the last successfully-copied chunk range for +// checkpointing. +func (mt *MoveTable) RecordLastIterationRange() { + mt.rangeMutex.Lock() + defer mt.rangeMutex.Unlock() + if mt.MigrationIterationRangeMinValues != nil && mt.MigrationIterationRangeMaxValues != nil { + mt.LastIterationRangeMinValues = mt.MigrationIterationRangeMinValues.Clone() + mt.LastIterationRangeMaxValues = mt.MigrationIterationRangeMaxValues.Clone() + } +} + // MigrationContext has the general, global state of migration. It is used by // all components throughout the migration process. type MigrationContext struct { @@ -277,12 +377,15 @@ type MigrationContext struct { // move tables: MoveTables struct { - TableNames []string // List of table names to be moved. - TargetHost string // Target hostname for the move. This must be a primary/writable host. - TargetPort int // Target MySQL port for the move. - TargetUser string // Target username for the move. If not specified, it will default to the source user. - TargetPass string // Target password for the move. If not specified, it will default to the source password. - TargetDatabase string // Target database name for the move. If not specified, it will default to the source database name. + TableNames []string // Ordered list of table names to be moved (order from --move-tables). Iteration is deterministic over this slice, never over the Tables map. + // Tables holds the per-table runtime state, keyed by source table name. + // Populated by InitMoveTableContainers() once per-table schema is known. + Tables map[string]*MoveTable + TargetHost string // Target hostname for the move. This must be a primary/writable host. + TargetPort int // Target MySQL port for the move. + TargetUser string // Target username for the move. If not specified, it will default to the source user. + TargetPass string // Target password for the move. If not specified, it will default to the source password. + TargetDatabase string // Target database name for the move. If not specified, it will default to the source database name. // AllowOnSourcePrimary opts in to running the move-tables read path (schema // inspection, the full row copy, binlog streaming) directly against the @@ -459,6 +562,23 @@ func (mctx *MigrationContext) GetOldTableName() string { return getSafeTableName(tableName, suffix) } +// MoveTableDelName returns the `__del` rollback-handle table name for a +// specific migrated table in move-tables mode. It mirrors GetOldTableName but +// for an explicit table name, so a multi-table cutover can rename every source +// table in one atomic RENAME. Revert is disallowed in move-tables mode, so the +// suffix is always "del". +func (mctx *MigrationContext) MoveTableDelName(tableName string) string { + suffix := "del" + if mctx.TimestampOldTable { + t := mctx.StartTime + timestamp := fmt.Sprintf("%d%02d%02d%02d%02d%02d", + t.Year(), t.Month(), t.Day(), + t.Hour(), t.Minute(), t.Second()) + return getSafeTableName(tableName, fmt.Sprintf("%s_%s", timestamp, suffix)) + } + return getSafeTableName(tableName, suffix) +} + // GetChangelogTableName generates the name of changelog table, based on original table name // or a given table name. func (mctx *MigrationContext) GetChangelogTableName() string { @@ -1130,6 +1250,73 @@ func (mctx *MigrationContext) IsMoveTablesMode() bool { return len(mctx.MoveTables.TableNames) > 0 } +// InitMoveTableContainers builds (or rebuilds) the per-table runtime containers +// from the ordered MoveTables.TableNames list. It is idempotent: tables already +// present in the map keep their existing container so callers may invoke it +// after partially populating state. Source and target table names match in +// move-tables mode; only the database may differ. +func (mctx *MigrationContext) InitMoveTableContainers() { + if mctx.MoveTables.Tables == nil { + mctx.MoveTables.Tables = make(map[string]*MoveTable, len(mctx.MoveTables.TableNames)) + } + for _, tableName := range mctx.MoveTables.TableNames { + if _, ok := mctx.MoveTables.Tables[tableName]; ok { + continue + } + mctx.MoveTables.Tables[tableName] = &MoveTable{ + SourceDatabaseName: mctx.DatabaseName, + SourceTableName: tableName, + TargetDatabaseName: mctx.GetTargetDatabaseName(), + TargetTableName: tableName, + } + } +} + +// GetMoveTable returns the per-table container for the given source table name, +// or nil if it has not been initialized. +func (mctx *MigrationContext) GetMoveTable(tableName string) *MoveTable { + if mctx.MoveTables.Tables == nil { + return nil + } + return mctx.MoveTables.Tables[tableName] +} + +// OrderedMoveTables returns the per-table containers in --move-tables order. +// Iteration must always use this deterministic order, never the Tables map's +// (random) iteration order. +func (mctx *MigrationContext) OrderedMoveTables() []*MoveTable { + tables := make([]*MoveTable, 0, len(mctx.MoveTables.TableNames)) + for _, tableName := range mctx.MoveTables.TableNames { + if mt := mctx.GetMoveTable(tableName); mt != nil { + tables = append(tables, mt) + } + } + return tables +} + +// MoveTablePrimaryName returns the first table in --move-tables order. Several +// run-wide artifacts (checkpoint table name, changelog/old-table naming) are +// derived from a single "primary" table to keep one set of housekeeping objects +// per run; the primary is simply the first listed table. +func (mctx *MigrationContext) MoveTablePrimaryName() string { + if len(mctx.MoveTables.TableNames) == 0 { + return "" + } + return mctx.MoveTables.TableNames[0] +} + +// AllMoveTablesRowCopyComplete reports whether every migrated table has finished +// its row copy. The on-row-copy-complete hook and the cutover only proceed once +// this is true. +func (mctx *MigrationContext) AllMoveTablesRowCopyComplete() bool { + for _, mt := range mctx.OrderedMoveTables() { + if !mt.IsRowCopyComplete() { + return false + } + } + return true +} + // SendWithContext attempts to send a value to a channel, but returns early // if the context is cancelled. This prevents goroutine deadlocks when the // channel receiver has exited due to an error. diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index 0cf4f6121..2e054f520 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -387,10 +387,17 @@ func main() { migrationContext.MoveTables.TableNames[i] = strings.TrimSpace(migrationContext.MoveTables.TableNames[i]) } migrationContext.MoveTables.TableNames = slices.DeleteFunc(migrationContext.MoveTables.TableNames, func(s string) bool { return s == "" }) - if len(migrationContext.MoveTables.TableNames) > 1 { - // Future version will support moving multiple tables at the same time. - // For now, we only support moving a single table at a time. - log.Fatal("--move-tables currently supports only a single table") + if len(migrationContext.MoveTables.TableNames) == 0 { + log.Fatal("--move-tables requires at least one table") + } + // Reject duplicate table names: a table listed twice would register two + // listeners and two row-copy loops for the same data. + seenMoveTables := make(map[string]bool, len(migrationContext.MoveTables.TableNames)) + for _, tableName := range migrationContext.MoveTables.TableNames { + if seenMoveTables[tableName] { + log.Fatalf("--move-tables lists table %q more than once", tableName) + } + seenMoveTables[tableName] = true } if migrationContext.MoveTables.TargetDatabase == "" { migrationContext.MoveTables.TargetDatabase = migrationContext.DatabaseName diff --git a/go/logic/applier.go b/go/logic/applier.go index cccf01631..ec1413e39 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -97,6 +97,26 @@ type Applier struct { moveTablesCopySelectFirstQueryBuilder *sql.MoveTableCopySelectQueryBuilder moveTablesCopySelectNextQueryBuilder *sql.MoveTableCopySelectQueryBuilder moveTablesCopyInsertQueryBuilder *sql.MoveTableCopyInsertQueryBuilder + + // moveTablesBuilders holds the per-table query builders, keyed by source + // table name. In move-tables mode there is one entry per migrated table; DML + // is routed to the right set at apply time using the TableName already on each + // binlog DML event. Empty in standard (single-table) mode. + moveTablesBuilders map[string]*moveTableBuilders +} + +// moveTableBuilders holds the query builders and schema needed to copy and apply +// DML for a single migrated table in move-tables mode. One instance exists per +// table; the applier selects the right instance by source table name. +type moveTableBuilders struct { + uniqueKey *sql.UniqueKey + originalTableColumns *sql.ColumnList + dmlDeleteQueryBuilder *sql.DMLDeleteQueryBuilder + dmlInsertQueryBuilder *sql.DMLInsertQueryBuilder + dmlUpdateQueryBuilder *sql.DMLUpdateQueryBuilder + copySelectFirstQueryBuilder *sql.MoveTableCopySelectQueryBuilder + copySelectNextQueryBuilder *sql.MoveTableCopySelectQueryBuilder + copyInsertQueryBuilder *sql.MoveTableCopyInsertQueryBuilder } func NewApplier(migrationContext *base.MigrationContext) *Applier { @@ -144,8 +164,15 @@ func (apl *Applier) checkpointRangeColumnNames() (minColumnNames []string, maxCo // hence the optional table name prefix. Metacharacters in table/index names are escaped to avoid // regex syntax errors. func (apl *Applier) compileMigrationKeyWarningRegex() (*regexp.Regexp, error) { - escapedTable := regexp.QuoteMeta(apl.migrationContext.GetTargetTableName()) - escapedKey := regexp.QuoteMeta(apl.migrationContext.UniqueKey.NameInGhostTable) + return compileKeyWarningRegex(apl.migrationContext.GetTargetTableName(), apl.migrationContext.UniqueKey.NameInGhostTable) +} + +// compileKeyWarningRegex compiles the duplicate-key warning regex for a specific +// target table + unique key name. In move-tables mode each table has its own +// unique key, so the duplicate-key filter must be compiled per table. +func compileKeyWarningRegex(targetTableName, uniqueKeyName string) (*regexp.Regexp, error) { + escapedTable := regexp.QuoteMeta(targetTableName) + escapedKey := regexp.QuoteMeta(uniqueKeyName) migrationUniqueKeyPattern := fmt.Sprintf(`for key '(%s\.)?%s'`, escapedTable, escapedKey) migrationKeyRegex, err := regexp.Compile(migrationUniqueKeyPattern) if err != nil { @@ -394,32 +421,83 @@ func (apl *Applier) prepareQueries() (err error) { } } if apl.migrationContext.IsMoveTablesMode() { - if apl.moveTablesCopySelectFirstQueryBuilder, err = sql.NewMoveTableCopySelectQueryBuilder( - apl.migrationContext.DatabaseName, - apl.originalTableName(), - apl.migrationContext.OriginalTableColumns, - apl.migrationContext.UniqueKey.Name, - &apl.migrationContext.UniqueKey.Columns, - true, // <-- include start range values for first select query - ); err != nil { - return err - } - if apl.moveTablesCopySelectNextQueryBuilder, err = sql.NewMoveTableCopySelectQueryBuilder( - apl.migrationContext.DatabaseName, - apl.originalTableName(), - apl.migrationContext.OriginalTableColumns, - apl.migrationContext.UniqueKey.Name, - &apl.migrationContext.UniqueKey.Columns, - false, - ); err != nil { - return err + // Build one set of query builders per migrated table. DML is routed to the + // right set at apply time by source table name (§2.1). The top-level DML + // builders above remain bound to the primary table for backward-compat with + // the single-table paths (checkpoint, status), but binlog DML application + // goes through moveTablesBuilders. + apl.moveTablesBuilders = make(map[string]*moveTableBuilders, len(apl.migrationContext.MoveTables.TableNames)) + for _, mt := range apl.migrationContext.OrderedMoveTables() { + if mt.UniqueKey == nil { + return fmt.Errorf("move-table %s.%s has no unique key; cannot prepare queries", mt.SourceDatabaseName, mt.SourceTableName) + } + b := &moveTableBuilders{ + uniqueKey: mt.UniqueKey, + originalTableColumns: mt.OriginalTableColumns, + } + if b.dmlDeleteQueryBuilder, err = sql.NewDMLDeleteQueryBuilder( + mt.TargetDatabaseName, + mt.TargetTableName, + mt.OriginalTableColumns, + &mt.UniqueKey.Columns, + ); err != nil { + return err + } + if b.dmlInsertQueryBuilder, err = sql.NewDMLInsertQueryBuilder( + mt.TargetDatabaseName, + mt.TargetTableName, + mt.OriginalTableColumns, + mt.SharedColumns, + mt.MappedSharedColumns, + ); err != nil { + return err + } + if b.dmlUpdateQueryBuilder, err = sql.NewDMLUpdateQueryBuilder( + mt.TargetDatabaseName, + mt.TargetTableName, + mt.OriginalTableColumns, + mt.SharedColumns, + mt.MappedSharedColumns, + &mt.UniqueKey.Columns, + ); err != nil { + return err + } + if b.copySelectFirstQueryBuilder, err = sql.NewMoveTableCopySelectQueryBuilder( + mt.SourceDatabaseName, + mt.SourceTableName, + mt.OriginalTableColumns, + mt.UniqueKey.Name, + &mt.UniqueKey.Columns, + true, // <-- include start range values for first select query + ); err != nil { + return err + } + if b.copySelectNextQueryBuilder, err = sql.NewMoveTableCopySelectQueryBuilder( + mt.SourceDatabaseName, + mt.SourceTableName, + mt.OriginalTableColumns, + mt.UniqueKey.Name, + &mt.UniqueKey.Columns, + false, + ); err != nil { + return err + } + if b.copyInsertQueryBuilder, err = sql.NewMoveTableCopyInsertQueryBuilder( + mt.TargetDatabaseName, + mt.TargetTableName, + mt.OriginalTableColumns, + ); err != nil { + return err + } + apl.moveTablesBuilders[mt.SourceTableName] = b } - if apl.moveTablesCopyInsertQueryBuilder, err = sql.NewMoveTableCopyInsertQueryBuilder( - targetDatabaseName, - targetTableName, - apl.migrationContext.OriginalTableColumns, - ); err != nil { - return err + // Keep the legacy single-table builders pointing at the primary table for + // any single-table code path that still reads them directly. + primary := apl.moveTablesBuilders[apl.migrationContext.MoveTablePrimaryName()] + if primary != nil { + apl.moveTablesCopySelectFirstQueryBuilder = primary.copySelectFirstQueryBuilder + apl.moveTablesCopySelectNextQueryBuilder = primary.copySelectNextQueryBuilder + apl.moveTablesCopyInsertQueryBuilder = primary.copyInsertQueryBuilder } } return nil @@ -683,7 +761,16 @@ func (apl *Applier) CreateTargetTable(createStatement string) error { if !apl.migrationContext.IsMoveTablesMode() { return errors.New("CreateTargetTable is only available in MoveTables mode") } - targetTableName := apl.originalTableName() + return apl.CreateTargetTableForName(apl.originalTableName(), createStatement) +} + +// CreateTargetTableForName creates the named target table on the target host +// from the given CREATE statement. In multi-table move-tables mode it is called +// once per migrated table. +func (apl *Applier) CreateTargetTableForName(targetTableName, createStatement string) error { + if !apl.migrationContext.IsMoveTablesMode() { + return errors.New("CreateTargetTableForName is only available in MoveTables mode") + } targetDatabase := apl.migrationContext.GetTargetDatabaseName() // Explicit pre-check: abort before any data is copied if the target table @@ -1353,8 +1440,120 @@ func (apl *Applier) CalculateNextIterationRangeEndValues(db *gosql.DB) (hasFurth return hasFurtherRange, nil } -// ApplyIterationInsertQuery issues a chunk-INSERT query on the ghost table. It is where -// data actually gets copied from original table. +// ReadMoveTableMigrationRangeValues reads the min/max unique-key values for a +// single migrated table into its per-table container. It is the move-tables +// analogue of ReadMigrationRangeValues; each table has its own range. +func (apl *Applier) ReadMoveTableMigrationRangeValues(db *gosql.DB, mt *base.MoveTable) error { + if db == nil { + db = apl.db + } + tx, err := db.Begin() + if err != nil { + return err + } + defer tx.Rollback() + + minQuery, err := sql.BuildUniqueKeyMinValuesPreparedQuery(mt.SourceDatabaseName, mt.SourceTableName, mt.UniqueKey) + if err != nil { + return err + } + minRows, err := tx.Query(minQuery) + if err != nil { + return err + } + for minRows.Next() { + mt.MigrationRangeMinValues = sql.NewColumnValues(mt.UniqueKey.Len()) + if err = minRows.Scan(mt.MigrationRangeMinValues.ValuesPointers...); err != nil { + minRows.Close() + return err + } + } + if err = minRows.Err(); err != nil { + minRows.Close() + return err + } + minRows.Close() + + maxQuery, err := sql.BuildUniqueKeyMaxValuesPreparedQuery(mt.SourceDatabaseName, mt.SourceTableName, mt.UniqueKey) + if err != nil { + return err + } + maxRows, err := tx.Query(maxQuery) + if err != nil { + return err + } + for maxRows.Next() { + mt.MigrationRangeMaxValues = sql.NewColumnValues(mt.UniqueKey.Len()) + if err = maxRows.Scan(mt.MigrationRangeMaxValues.ValuesPointers...); err != nil { + maxRows.Close() + return err + } + } + if err = maxRows.Err(); err != nil { + maxRows.Close() + return err + } + maxRows.Close() + + apl.migrationContext.Log.Infof("Move-table %s.%s migration range: [%s]..[%s]", + mt.SourceDatabaseName, mt.SourceTableName, mt.MigrationRangeMinValues, mt.MigrationRangeMaxValues) + return tx.Commit() +} + +// CalculateMoveTableNextIterationRangeEndValues computes the next chunk's +// range-end for a single migrated table, storing it in the table's container. +// It returns false when the table has no further range to iterate (row copy +// complete for that table). It is the move-tables analogue of +// CalculateNextIterationRangeEndValues. +func (apl *Applier) CalculateMoveTableNextIterationRangeEndValues(db *gosql.DB, mt *base.MoveTable) (hasFurtherRange bool, err error) { + if db == nil { + db = apl.db + } + for i := 0; i < 2; i++ { + buildFunc := sql.BuildUniqueKeyRangeEndPreparedQueryViaOffset + if i == 1 { + buildFunc = sql.BuildUniqueKeyRangeEndPreparedQueryViaTemptable + } + query, explodedArgs, err := buildFunc( + mt.SourceDatabaseName, + mt.SourceTableName, + &mt.UniqueKey.Columns, + mt.MigrationIterationRangeMinValues.AbstractValues(), + mt.MigrationRangeMaxValues.AbstractValues(), + atomic.LoadInt64(&apl.migrationContext.ChunkSize), + mt.GetIteration() == 0, + fmt.Sprintf("iteration:%d", mt.GetIteration()), + ) + if err != nil { + return hasFurtherRange, err + } + + rows, err := db.Query(query, explodedArgs...) + if err != nil { + return hasFurtherRange, err + } + iterationRangeMaxValues := sql.NewColumnValues(mt.UniqueKey.Len()) + for rows.Next() { + if err = rows.Scan(iterationRangeMaxValues.ValuesPointers...); err != nil { + rows.Close() + return hasFurtherRange, err + } + hasFurtherRange = true + } + if err = rows.Err(); err != nil { + rows.Close() + return hasFurtherRange, err + } + rows.Close() + if hasFurtherRange { + mt.MigrationIterationRangeMaxValues = iterationRangeMaxValues + return hasFurtherRange, nil + } + } + apl.migrationContext.Log.Debugf("Move-table %s.%s iteration complete: no further range", mt.SourceDatabaseName, mt.SourceTableName) + return hasFurtherRange, nil +} + func (apl *Applier) ApplyIterationInsertQuery() (chunkSize int64, rowsAffected int64, duration time.Duration, err error) { startTime := time.Now() chunkSize = atomic.LoadInt64(&apl.migrationContext.ChunkSize) @@ -1450,28 +1649,31 @@ func (apl *Applier) ApplyIterationInsertQuery() (chunkSize int64, rowsAffected i // ApplyIterationMoveTableCopyQueries issues a SELECT query on the original table and an INSERT query on the target table, // copying a chunk of rows. It is used when `--move-tables` is specified, instead of ApplyIterationInsertQuery. -func (apl *Applier) ApplyIterationMoveTableCopyQueries(sourceDB *gosql.DB) (chunkSize int64, rowsAffected int64, duration time.Duration, err error) { +func (apl *Applier) ApplyIterationMoveTableCopyQueries(sourceDB *gosql.DB, mt *base.MoveTable) (chunkSize int64, rowsAffected int64, duration time.Duration, err error) { startTime := time.Now() chunkSize = atomic.LoadInt64(&apl.migrationContext.ChunkSize) if sourceDB == nil { return chunkSize, rowsAffected, duration, errors.New("source DB is required for move-tables copy") } + if mt == nil { + return chunkSize, rowsAffected, duration, errors.New("move-table container is required for move-tables copy") + } + builders := apl.moveTablesBuilders[mt.SourceTableName] + if builders == nil { + return chunkSize, rowsAffected, duration, fmt.Errorf("no query builders registered for move-table %s.%s", mt.SourceDatabaseName, mt.SourceTableName) + } // First, select data from the source database: rows, err := func() ([]*sql.ColumnValues, error) { var qb *sql.MoveTableCopySelectQueryBuilder - apl.migrationContext.Log.Debugf("Building SELECT query for move-tables; first: %v; rest: %v", - apl.moveTablesCopySelectFirstQueryBuilder, - apl.moveTablesCopySelectNextQueryBuilder) - - if apl.migrationContext.GetIteration() == 0 { - qb = apl.moveTablesCopySelectFirstQueryBuilder + if mt.GetIteration() == 0 { + qb = builders.copySelectFirstQueryBuilder } else { - qb = apl.moveTablesCopySelectNextQueryBuilder + qb = builders.copySelectNextQueryBuilder } query, explodedArgs, err := qb.BuildQuery( - apl.migrationContext.MigrationIterationRangeMinValues.AbstractValues(), - apl.migrationContext.MigrationIterationRangeMaxValues.AbstractValues(), + mt.MigrationIterationRangeMinValues.AbstractValues(), + mt.MigrationIterationRangeMaxValues.AbstractValues(), ) if err != nil { return nil, err @@ -1483,7 +1685,7 @@ func (apl *Applier) ApplyIterationMoveTableCopyQueries(sourceDB *gosql.DB) (chun defer sqlRows.Close() chunkRows := make([]*sql.ColumnValues, 0, chunkSize) for sqlRows.Next() { - row := sql.NewColumnValues(apl.migrationContext.SharedColumns.Len()) + row := sql.NewColumnValues(mt.SharedColumns.Len()) err := sqlRows.Scan(row.ValuesPointers...) if err != nil { return nil, err @@ -1507,7 +1709,7 @@ func (apl *Applier) ApplyIterationMoveTableCopyQueries(sourceDB *gosql.DB) (chun // Then, insert data into the destination database: sqlResult, err := func() (gosql.Result, error) { - query, explodedArgs, err := apl.moveTablesCopyInsertQueryBuilder.BuildQuery(rows) + query, explodedArgs, err := builders.copyInsertQueryBuilder.BuildQuery(rows) if err != nil { return nil, err } @@ -1538,7 +1740,7 @@ func (apl *Applier) ApplyIterationMoveTableCopyQueries(sourceDB *gosql.DB) (chun if err = rows.Err(); err != nil { return nil, err } - migrationKeyRegex, err := apl.compileMigrationKeyWarningRegex() + migrationKeyRegex, err := compileKeyWarningRegex(mt.TargetTableName, mt.UniqueKey.NameInGhostTable) if err != nil { return nil, err } @@ -1569,10 +1771,11 @@ func (apl *Applier) ApplyIterationMoveTableCopyQueries(sourceDB *gosql.DB) (chun rowsAffected, _ = sqlResult.RowsAffected() duration = time.Since(startTime) apl.migrationContext.Log.Debugf( - "Issued SELECT+INSERT on range: [%s]..[%s]; iteration: %d; chunk-size: %d", - apl.migrationContext.MigrationIterationRangeMinValues, - apl.migrationContext.MigrationIterationRangeMaxValues, - apl.migrationContext.GetIteration(), + "Issued SELECT+INSERT on %s.%s range: [%s]..[%s]; iteration: %d; chunk-size: %d", + mt.SourceDatabaseName, mt.SourceTableName, + mt.MigrationIterationRangeMinValues, + mt.MigrationIterationRangeMaxValues, + mt.GetIteration(), chunkSize, ) @@ -2049,9 +2252,9 @@ func (apl *Applier) ShowStatusVariable(variableName string) (result int64, err e // updateModifiesUniqueKeyColumns checks whether a UPDATE DML event actually // modifies values of the migration's unique key (the iterated key). This will call // for special handling. -func (apl *Applier) updateModifiesUniqueKeyColumns(dmlEvent *binlog.BinlogDMLEvent) (modifiedColumn string, isModified bool) { - for _, column := range apl.migrationContext.UniqueKey.Columns.Columns() { - tableOrdinal := apl.migrationContext.OriginalTableColumns.Ordinals[column.Name] +func (apl *Applier) updateModifiesUniqueKeyColumns(dmlEvent *binlog.BinlogDMLEvent, uniqueKey *sql.UniqueKey, originalTableColumns *sql.ColumnList) (modifiedColumn string, isModified bool) { + for _, column := range uniqueKey.Columns.Columns() { + tableOrdinal := originalTableColumns.Ordinals[column.Name] whereColumnValue := dmlEvent.WhereColumnValues.AbstractValues()[tableOrdinal] newColumnValue := dmlEvent.NewColumnValues.AbstractValues()[tableOrdinal] @@ -2065,20 +2268,41 @@ func (apl *Applier) updateModifiesUniqueKeyColumns(dmlEvent *binlog.BinlogDMLEve // buildDMLEventQuery creates a query to operate on the ghost table, based on an intercepted binlog // event entry on the original table. func (apl *Applier) buildDMLEventQuery(dmlEvent *binlog.BinlogDMLEvent) []*dmlBuildResult { + // Resolve the query builders + schema for the table this event targets. In + // move-tables mode the set is selected by source table name (one binlog + // stream feeds every table; routing happens here, §2.1). In standard mode + // there is a single set on the applier. + deleteBuilder := apl.dmlDeleteQueryBuilder + insertBuilder := apl.dmlInsertQueryBuilder + updateBuilder := apl.dmlUpdateQueryBuilder + uniqueKey := apl.migrationContext.UniqueKey + originalTableColumns := apl.migrationContext.OriginalTableColumns + if apl.migrationContext.IsMoveTablesMode() { + b := apl.moveTablesBuilders[dmlEvent.TableName] + if b == nil { + return []*dmlBuildResult{newDmlBuildResultError(fmt.Errorf("no query builder registered for move-table %s.%s", dmlEvent.DatabaseName, dmlEvent.TableName))} + } + deleteBuilder = b.dmlDeleteQueryBuilder + insertBuilder = b.dmlInsertQueryBuilder + updateBuilder = b.dmlUpdateQueryBuilder + uniqueKey = b.uniqueKey + originalTableColumns = b.originalTableColumns + } + switch dmlEvent.DML { case binlog.DeleteDML: { - query, uniqueKeyArgs, err := apl.dmlDeleteQueryBuilder.BuildQuery(dmlEvent.WhereColumnValues.AbstractValues()) + query, uniqueKeyArgs, err := deleteBuilder.BuildQuery(dmlEvent.WhereColumnValues.AbstractValues()) return []*dmlBuildResult{newDmlBuildResult(query, uniqueKeyArgs, -1, err)} } case binlog.InsertDML: { - query, sharedArgs, err := apl.dmlInsertQueryBuilder.BuildQuery(dmlEvent.NewColumnValues.AbstractValues()) + query, sharedArgs, err := insertBuilder.BuildQuery(dmlEvent.NewColumnValues.AbstractValues()) return []*dmlBuildResult{newDmlBuildResult(query, sharedArgs, 1, err)} } case binlog.UpdateDML: { - if _, isModified := apl.updateModifiesUniqueKeyColumns(dmlEvent); isModified { + if _, isModified := apl.updateModifiesUniqueKeyColumns(dmlEvent, uniqueKey, originalTableColumns); isModified { results := make([]*dmlBuildResult, 0, 2) dmlEvent.DML = binlog.DeleteDML results = append(results, apl.buildDMLEventQuery(dmlEvent)...) @@ -2086,7 +2310,7 @@ func (apl *Applier) buildDMLEventQuery(dmlEvent *binlog.BinlogDMLEvent) []*dmlBu results = append(results, apl.buildDMLEventQuery(dmlEvent)...) return results } - query, updateArgs, err := apl.dmlUpdateQueryBuilder.BuildQuery(dmlEvent.NewColumnValues.AbstractValues(), dmlEvent.WhereColumnValues.AbstractValues()) + query, updateArgs, err := updateBuilder.BuildQuery(dmlEvent.NewColumnValues.AbstractValues(), dmlEvent.WhereColumnValues.AbstractValues()) args := sqlutils.Args() args = append(args, updateArgs...) return []*dmlBuildResult{newDmlBuildResult(query, args, 0, err)} diff --git a/go/logic/hooks.go b/go/logic/hooks.go index ceec4b6b9..383700ca1 100644 --- a/go/logic/hooks.go +++ b/go/logic/hooks.go @@ -244,6 +244,11 @@ func (he *HooksExecutor) applyEnvironmentVariables(extraVariables ...string) []s env = append(env, fmt.Sprintf("GH_OST_DRY_RUN=%t", he.migrationContext.Noop)) env = append(env, fmt.Sprintf("GH_OST_REVERT=%t", he.migrationContext.Revert)) env = append(env, fmt.Sprintf("GH_OST_MOVE_TABLES=%t", he.migrationContext.IsMoveTablesMode())) + if he.migrationContext.IsMoveTablesMode() { + // Comma-joined list of all migrated tables (§2.4). GH_OST_TABLE_NAME stays + // the primary table for backward compatibility. + env = append(env, fmt.Sprintf("GH_OST_TABLES=%s", strings.Join(he.migrationContext.MoveTables.TableNames, ","))) + } env = append(env, fmt.Sprintf("GH_OST_TARGET_DATABASE_NAME=%s", he.migrationContext.GetTargetDatabaseName())) env = append(env, fmt.Sprintf("GH_OST_TARGET_TABLE_NAME=%s", he.migrationContext.GetTargetTableName())) diff --git a/go/logic/inspect.go b/go/logic/inspect.go index 59eb22282..52d380348 100644 --- a/go/logic/inspect.go +++ b/go/logic/inspect.go @@ -150,7 +150,7 @@ func (isp *Inspector) inspectOriginalAndGhostTables() (err error) { return err } sharedUniqueKeys := isp.getSharedUniqueKeys(isp.migrationContext.OriginalTableUniqueKeys, isp.migrationContext.GhostTableUniqueKeys) - isp.migrationContext.UniqueKey = isp.selectUniqueKey(sharedUniqueKeys) + isp.migrationContext.UniqueKey = isp.selectUniqueKey(isp.originalTableName(), sharedUniqueKeys) if isp.migrationContext.UniqueKey == nil { return fmt.Errorf("no shared unique key can be found after ALTER! Bailing out") } @@ -201,9 +201,9 @@ func (isp *Inspector) inspectOriginalAndGhostTables() (err error) { return nil } -func (isp *Inspector) selectUniqueKey(candidateKeys []*sql.UniqueKey) *sql.UniqueKey { +func (isp *Inspector) selectUniqueKey(tableName string, candidateKeys []*sql.UniqueKey) *sql.UniqueKey { for i, candidateKey := range candidateKeys { - isp.applyColumnTypes(isp.migrationContext.DatabaseName, isp.originalTableName(), &candidateKey.Columns) + isp.applyColumnTypes(isp.migrationContext.DatabaseName, tableName, &candidateKey.Columns) uniqueKeyIsValid := true for _, column := range candidateKey.Columns.Columns() { switch column.Type { @@ -509,6 +509,10 @@ func (isp *Inspector) validateTable() error { // validateTableForeignKeys makes sure no foreign keys exist on the migrated table func (isp *Inspector) validateTableForeignKeys(allowChildForeignKeys bool) error { + return isp.validateTableForeignKeysFor(isp.originalTableName(), allowChildForeignKeys) +} + +func (isp *Inspector) validateTableForeignKeysFor(tableName string, allowChildForeignKeys bool) error { if isp.migrationContext.SkipForeignKeyChecks { isp.migrationContext.Log.Warning("--skip-foreign-key-checks provided: will not check for foreign keys") return nil @@ -534,26 +538,26 @@ func (isp *Inspector) validateTableForeignKeys(allowChildForeignKeys bool) error return nil }, isp.migrationContext.DatabaseName, - isp.originalTableName(), + tableName, isp.migrationContext.DatabaseName, - isp.originalTableName(), + tableName, isp.migrationContext.DatabaseName, - isp.originalTableName(), + tableName, isp.migrationContext.DatabaseName, - isp.originalTableName(), + tableName, ) if err != nil { return err } if numParentForeignKeys > 0 { - return isp.migrationContext.Log.Errorf("found %d parent-side foreign keys on %s.%s. Parent-side foreign keys are not supported. Bailing out", numParentForeignKeys, sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(isp.originalTableName())) + return isp.migrationContext.Log.Errorf("found %d parent-side foreign keys on %s.%s. Parent-side foreign keys are not supported. Bailing out", numParentForeignKeys, sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName)) } if numChildForeignKeys > 0 { if allowChildForeignKeys { isp.migrationContext.Log.Debugf("Foreign keys found and will be dropped, as per given --discard-foreign-keys flag") return nil } - return isp.migrationContext.Log.Errorf("found %d child-side foreign keys on %s.%s. Child-side foreign keys are not supported. Bailing out", numChildForeignKeys, sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(isp.originalTableName())) + return isp.migrationContext.Log.Errorf("found %d child-side foreign keys on %s.%s. Child-side foreign keys are not supported. Bailing out", numChildForeignKeys, sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName)) } isp.migrationContext.Log.Debugf("Validated no foreign keys exist on table") return nil @@ -668,6 +672,52 @@ func (isp *Inspector) estimateTableRowsViaExplain() error { return nil } +// estimateTableRows estimates the number of rows in the given source table via +// EXPLAIN, returning the estimate rather than mutating shared context state. It +// is used to estimate each migrated table independently in move-tables mode. +func (isp *Inspector) estimateTableRows(tableName string) (int64, error) { + query := fmt.Sprintf(`explain select /* gh-ost */ * from %s.%s where 1=1`, + sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName)) + var rowsEstimate int64 + outputFound := false + err := sqlutils.QueryRowsMap(isp.db, query, func(rowMap sqlutils.RowMap) error { + rowsEstimate = rowMap.GetInt64("rows") + outputFound = true + return nil + }) + if err != nil { + return 0, err + } + if !outputFound { + return 0, isp.migrationContext.Log.Errorf("cannot run EXPLAIN on %s.%s!", + sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName)) + } + return rowsEstimate, nil +} + +// InspectMoveTable inspects a single source table for move-tables mode and +// returns its columns, virtual columns, chosen unique key, and row estimate. +// Unlike InspectOriginalTable it does not mutate shared migration context +// fields, so each migrated table can be inspected independently into its own +// per-table container. +func (isp *Inspector) InspectMoveTable(tableName string) (columns *sql.ColumnList, virtualColumns *sql.ColumnList, uniqueKeys [](*sql.UniqueKey), uniqueKey *sql.UniqueKey, rowsEstimate int64, err error) { + columns, virtualColumns, uniqueKeys, err = isp.InspectTableColumnsAndUniqueKeys(tableName) + if err != nil { + return nil, nil, nil, nil, 0, err + } + uniqueKey = isp.selectUniqueKey(tableName, uniqueKeys) + if uniqueKey == nil { + return nil, nil, nil, nil, 0, fmt.Errorf("no valid PRIMARY nor UNIQUE key found for table %s.%s; Bailing out", + sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName)) + } + rowsEstimate, err = isp.estimateTableRows(tableName) + if err != nil { + return nil, nil, nil, nil, 0, fmt.Errorf("failed to estimate rows for table %s.%s: %w", + sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName), err) + } + return columns, virtualColumns, uniqueKeys, uniqueKey, rowsEstimate, nil +} + // CountTableRows counts exact number of rows on the original table func (isp *Inspector) CountTableRows(ctx context.Context) error { atomic.StoreInt64(&isp.migrationContext.CountingRowsFlag, 1) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 20bc372c1..55a723868 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -804,12 +804,45 @@ func (mgtr *Migrator) Revert() error { // prepareMoveTablesCopyState initializes state for row copy in move-tables mode. // for move-tables functionality, the source and target tables are identical so we just need to grab any valid UNIQUE key constraint. -func (mgtr *Migrator) prepareMoveTablesCopyState() { - mgtr.migrationContext.UniqueKey = mgtr.inspector.selectUniqueKey(mgtr.migrationContext.OriginalTableUniqueKeys) - +func (mgtr *Migrator) prepareMoveTablesCopyState() error { + mctx := mgtr.migrationContext + mctx.InitMoveTableContainers() + + // Keep the primary table's top-level state populated (the inspector already + // inspected it via InspectOriginalTable) for the single-table code paths that + // still read it directly: checkpoint table creation, status, and naming. + mctx.UniqueKey = mgtr.inspector.selectUniqueKey(mctx.MoveTablePrimaryName(), mctx.OriginalTableUniqueKeys) // In move-tables mode source and target schemas match, so shared columns are identical. - mgtr.migrationContext.SharedColumns = mgtr.migrationContext.OriginalTableColumns - mgtr.migrationContext.MappedSharedColumns = mgtr.migrationContext.OriginalTableColumns + mctx.SharedColumns = mctx.OriginalTableColumns + mctx.MappedSharedColumns = mctx.OriginalTableColumns + + // Inspect every migrated table independently into its own container (§2.1). + var totalRowsEstimate int64 + for _, mt := range mctx.OrderedMoveTables() { + columns, virtualColumns, uniqueKeys, uniqueKey, rowsEstimate, err := mgtr.inspector.InspectMoveTable(mt.SourceTableName) + if err != nil { + return fmt.Errorf("failed to inspect move-table %s.%s: %w", mt.SourceDatabaseName, mt.SourceTableName, err) + } + // Validate each entry like a standard single-table run (§2.1): reject + // unsupported foreign keys. + if err := mgtr.inspector.validateTableForeignKeysFor(mt.SourceTableName, mctx.DiscardForeignKeys); err != nil { + return fmt.Errorf("failed to validate foreign keys on move-table %s.%s: %w", mt.SourceDatabaseName, mt.SourceTableName, err) + } + mt.OriginalTableColumns = columns + mt.OriginalTableVirtualColumns = virtualColumns + mt.OriginalTableUniqueKeys = uniqueKeys + mt.UniqueKey = uniqueKey + // Source and target schemas match in move-tables mode. + mt.SharedColumns = columns + mt.MappedSharedColumns = columns + atomic.StoreInt64(&mt.RowsEstimate, rowsEstimate) + totalRowsEstimate += rowsEstimate + mctx.Log.Infof("Move-table %s.%s ready: unique key %s, ~%d rows", + mt.SourceDatabaseName, mt.SourceTableName, mt.UniqueKey.Name, rowsEstimate) + } + // Aggregate row estimate across all tables for status/ETA. + atomic.StoreInt64(&mctx.RowsEstimate, totalRowsEstimate) + return nil } func (mgtr *Migrator) hydrateMoveTablesStateFromTarget() error { @@ -817,17 +850,38 @@ func (mgtr *Migrator) hydrateMoveTablesStateFromTarget() error { probeContext.DatabaseName = mgtr.migrationContext.GetTargetDatabaseName() targetInspector := &Inspector{db: mgtr.applier.moveTablesTargetDB, migrationContext: probeContext} - columns, virtualColumns, uniqueKeys, err := targetInspector.InspectTableColumnsAndUniqueKeys(mgtr.migrationContext.GetTargetTableName()) - if err != nil { - return err - } + mgtr.migrationContext.InitMoveTableContainers() - mgtr.migrationContext.OriginalTableColumns = columns - mgtr.migrationContext.OriginalTableVirtualColumns = virtualColumns - mgtr.migrationContext.OriginalTableUniqueKeys = uniqueKeys - mgtr.migrationContext.UniqueKey = targetInspector.selectUniqueKey(uniqueKeys) - mgtr.migrationContext.SharedColumns = columns - mgtr.migrationContext.MappedSharedColumns = columns + var totalRowsEstimate int64 + for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + columns, virtualColumns, uniqueKeys, err := targetInspector.InspectTableColumnsAndUniqueKeys(mt.TargetTableName) + if err != nil { + return err + } + uniqueKey := targetInspector.selectUniqueKey(mt.TargetTableName, uniqueKeys) + if uniqueKey == nil { + return fmt.Errorf("no valid unique key on target table %s.%s", mt.TargetDatabaseName, mt.TargetTableName) + } + mt.OriginalTableColumns = columns + mt.OriginalTableVirtualColumns = virtualColumns + mt.OriginalTableUniqueKeys = uniqueKeys + mt.UniqueKey = uniqueKey + mt.SharedColumns = columns + mt.MappedSharedColumns = columns + totalRowsEstimate += atomic.LoadInt64(&mt.RowsEstimate) + } + + // Primary top-level state for single-table code paths (checkpoint/status). + primary := mgtr.migrationContext.GetMoveTable(mgtr.migrationContext.MoveTablePrimaryName()) + if primary != nil { + mgtr.migrationContext.OriginalTableColumns = primary.OriginalTableColumns + mgtr.migrationContext.OriginalTableVirtualColumns = primary.OriginalTableVirtualColumns + mgtr.migrationContext.OriginalTableUniqueKeys = primary.OriginalTableUniqueKeys + mgtr.migrationContext.UniqueKey = primary.UniqueKey + mgtr.migrationContext.SharedColumns = primary.SharedColumns + mgtr.migrationContext.MappedSharedColumns = primary.MappedSharedColumns + } + atomic.StoreInt64(&mgtr.migrationContext.RowsEstimate, totalRowsEstimate) return nil } @@ -1113,7 +1167,9 @@ func (mgtr *Migrator) MoveTables() (err error) { if err := mgtr.checkAbort(); err != nil { return err } - mgtr.prepareMoveTablesCopyState() + if err := mgtr.prepareMoveTablesCopyState(); err != nil { + return err + } if err := mgtr.initiateApplier(); err != nil { return err } @@ -1174,7 +1230,7 @@ func (mgtr *Migrator) MoveTables() (err error) { if err := mgtr.addDMLEventsListener(); err != nil { return err } - if err := mgtr.applier.ReadMigrationRangeValues(mgtr.inspector.db); err != nil { + if err := mgtr.readMoveTablesMigrationRanges(); err != nil { return err } @@ -1190,7 +1246,7 @@ func (mgtr *Migrator) MoveTables() (err error) { _ = base.SendWithContext(mgtr.migrationContext.GetContext(), mgtr.migrationContext.PanicAbort, err) } }() - go mgtr.iterateChunks() + go mgtr.iterateChunksMoveTables() mgtr.migrationContext.MarkRowCopyStartTime() go mgtr.initiateStatus() if mgtr.migrationContext.Checkpoint { @@ -1304,12 +1360,23 @@ func (mgtr *Migrator) moveTablesCutOver() (err error) { } sourceDB := mgtr.migrationContext.DatabaseName - sourceTable := mgtr.migrationContext.OriginalTableName - delTable := mgtr.migrationContext.GetOldTableName() - renameAndCaptureQuery := fmt.Sprintf("rename /* gh-ost */ table %s.%s to %s.%s;\nselect @@global.gtid_executed", - sql.EscapeName(sourceDB), sql.EscapeName(sourceTable), - sql.EscapeName(sourceDB), sql.EscapeName(delTable)) - mgtr.migrationContext.Log.Infof("T1+T2: renaming source table and capturing drain GTID: %s", renameAndCaptureQuery) + // Build a single atomic multi-table RENAME covering every migrated table + // (§2.4). MySQL executes `RENAME TABLE a TO b, c TO d` atomically at the + // storage-engine and binlog level: either all renames are visible or none, + // and they appear in the binlog as one event group with one GTID. That single + // drain GTID therefore covers the whole move set, and combined with the shared + // binlog stream the existing Stage 1 drain mechanism works unchanged. + renameClauses := make([]string, 0, len(mgtr.migrationContext.MoveTables.TableNames)) + for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + delTable := mgtr.migrationContext.MoveTableDelName(mt.SourceTableName) + renameClauses = append(renameClauses, fmt.Sprintf("%s.%s to %s.%s", + sql.EscapeName(sourceDB), sql.EscapeName(mt.SourceTableName), + sql.EscapeName(sourceDB), sql.EscapeName(delTable))) + } + renameAndCaptureQuery := fmt.Sprintf("rename /* gh-ost */ table %s;\nselect @@global.gtid_executed", + strings.Join(renameClauses, ", ")) + mgtr.migrationContext.Log.Infof("T1+T2: renaming %d source table(s) and capturing drain GTID: %s", + len(renameClauses), renameAndCaptureQuery) // @@GLOBAL scope is explicit so the intent is unambiguous in the SQL itself. // Design: https://github.com/github/gh-ost-tablemove-poc/blob/9dc6df75c4c88ff473906a497836c7518f5614ec/design/coop_cutover.md#32-correctness-verification-for-p4 @@ -1807,28 +1874,31 @@ func (mgtr *Migrator) validateMoveTablesSourceReadHost() error { return fmt.Errorf("move-tables source --host %+v is the cluster primary; reading the full table copy from the primary is the load move-tables is meant to avoid. Point --host at a replica so reads come off the primary, or pass --allow-on-source-primary to proceed against the primary anyway", spc.Key) } -// dropSourceOldTable drops the source `__del` rollback handle on the source +// dropSourceOldTable drops the source `__del` rollback handles on the source // primary. The inspector/streamer source connections may be a read replica, so -// the drop cannot go through them; it must use the writable source-primary handle. +// the drop cannot go through them; it must use the writable source-primary +// handle. In multi-table mode every migrated table's `__del` is dropped. func (mgtr *Migrator) dropSourceOldTable() error { if mgtr.sourcePrimaryDB == nil { return errors.New("source primary connection not initialized; cannot drop source __del table") } databaseName := mgtr.migrationContext.DatabaseName - tableName := mgtr.migrationContext.GetOldTableName() - query := fmt.Sprintf(`drop /* gh-ost */ table if exists %s.%s`, - sql.EscapeName(databaseName), - sql.EscapeName(tableName), - ) - mgtr.migrationContext.Log.Infof("Dropping source table %s.%s on primary %+v", - sql.EscapeName(databaseName), - sql.EscapeName(tableName), - mgtr.migrationContext.MoveTables.SourcePrimaryConnectionConfig.Key, - ) - if _, err := mgtr.sourcePrimaryDB.Exec(query); err != nil { - return err + for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + tableName := mgtr.migrationContext.MoveTableDelName(mt.SourceTableName) + query := fmt.Sprintf(`drop /* gh-ost */ table if exists %s.%s`, + sql.EscapeName(databaseName), + sql.EscapeName(tableName), + ) + mgtr.migrationContext.Log.Infof("Dropping source table %s.%s on primary %+v", + sql.EscapeName(databaseName), + sql.EscapeName(tableName), + mgtr.migrationContext.MoveTables.SourcePrimaryConnectionConfig.Key, + ) + if _, err := mgtr.sourcePrimaryDB.Exec(query); err != nil { + return err + } + mgtr.migrationContext.Log.Infof("Source table dropped") } - mgtr.migrationContext.Log.Infof("Source table dropped") return nil } @@ -2228,25 +2298,39 @@ func (mgtr *Migrator) initiateStreaming() error { return nil } -// addDMLEventsListener begins listening for binlog events on the original table, -// and creates & enqueues a write task per such event. +// addDMLEventsListener begins listening for binlog events on the migrated +// table(s), and creates & enqueues a write task per such event. In move-tables +// mode it registers one listener per migrated table on the single shared events +// streamer (§2.2); every listener uses the same callback (enqueue onto the +// shared apply queue) and the applier routes each event to the right table by +// name. There is still exactly one binlog connection. func (mgtr *Migrator) addDMLEventsListener() error { - originalTableName := mgtr.migrationContext.OriginalTableName + enqueue := func(dmlEntry *binlog.BinlogEntry) error { + // Use helper to prevent deadlock if buffer fills and executeWriteFuncs exits. + // This is critical because this callback blocks the event streamer. + return base.SendWithContext(mgtr.migrationContext.GetContext(), mgtr.applyEventsQueue, newApplyEventStructByDML(dmlEntry)) + } + if mgtr.migrationContext.IsMoveTablesMode() { - originalTableName = mgtr.migrationContext.MoveTables.TableNames[0] + for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + if err := mgtr.eventsStreamer.AddListener( + false, + mt.SourceDatabaseName, + mt.SourceTableName, + enqueue, + ); err != nil { + return err + } + } + return nil } - err := mgtr.eventsStreamer.AddListener( + return mgtr.eventsStreamer.AddListener( false, mgtr.migrationContext.DatabaseName, - originalTableName, - func(dmlEntry *binlog.BinlogEntry) error { - // Use helper to prevent deadlock if buffer fills and executeWriteFuncs exits - // This is critical because this callback blocks the event streamer - return base.SendWithContext(mgtr.migrationContext.GetContext(), mgtr.applyEventsQueue, newApplyEventStructByDML(dmlEntry)) - }, + mgtr.migrationContext.OriginalTableName, + enqueue, ) - return err } // initiateThrottler kicks in the throttling collection and the throttling checks. @@ -2275,18 +2359,22 @@ func (mgtr *Migrator) initiateApplier() error { if mgtr.migrationContext.IsMoveTablesMode() { if !mgtr.migrationContext.Resume { - createTableStatement, err := mgtr.inspector.showCreateTable(mgtr.migrationContext.MoveTables.TableNames[0]) - if err != nil { - return fmt.Errorf("failed to fetch create table statement: %w", err) - } - if err := mgtr.applier.CreateTargetTable(createTableStatement); err != nil { - mgtr.migrationContext.Log.Errorf("unable to create target table, see further error details. Perhaps a previous migration failed without dropping the table? Bailing out") - return err + // Create every target table from its source CREATE statement (§2.1). + for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + createTableStatement, err := mgtr.inspector.showCreateTable(mt.SourceTableName) + if err != nil { + return fmt.Errorf("failed to fetch create table statement for %s.%s: %w", mt.SourceDatabaseName, mt.SourceTableName, err) + } + mt.CreateTableStatement = createTableStatement + if err := mgtr.applier.CreateTargetTableForName(mt.TargetTableName, createTableStatement); err != nil { + mgtr.migrationContext.Log.Errorf("unable to create target table %s.%s, see further error details. Perhaps a previous migration failed without dropping the table? Bailing out", mt.TargetDatabaseName, mt.TargetTableName) + return err + } } } else { - mgtr.migrationContext.Log.Infof("Resuming move-tables; reusing existing target table %s.%s", + mgtr.migrationContext.Log.Infof("Resuming move-tables; reusing existing target tables %v in %s", + mgtr.migrationContext.MoveTables.TableNames, sql.EscapeName(mgtr.migrationContext.GetTargetDatabaseName()), - sql.EscapeName(mgtr.migrationContext.GetTargetTableName()), ) } } else { @@ -2381,13 +2469,7 @@ func (mgtr *Migrator) iterateChunks() error { } // When hasFurtherRange is false, original table might be write locked and CalculateNextIterationRangeEndValues would hangs forever - var hasFurtherRange bool - var err error - if mgtr.migrationContext.IsMoveTablesMode() { - hasFurtherRange, err = mgtr.applier.CalculateNextIterationRangeEndValues(mgtr.inspector.db) - } else { - hasFurtherRange, err = mgtr.applier.CalculateNextIterationRangeEndValues(nil) - } + hasFurtherRange, err := mgtr.applier.CalculateNextIterationRangeEndValues(nil) if err != nil { return err // wrapping call will retry } @@ -2407,11 +2489,7 @@ func (mgtr *Migrator) iterateChunks() error { return nil } var rowsAffected int64 - if mgtr.migrationContext.IsMoveTablesMode() { - _, rowsAffected, _, err = mgtr.applier.ApplyIterationMoveTableCopyQueries(mgtr.inspector.db) - } else { - _, rowsAffected, _, err = mgtr.applier.ApplyIterationInsertQuery() - } + _, rowsAffected, _, err = mgtr.applier.ApplyIterationInsertQuery() if err != nil { return err // wrapping call will retry } @@ -2457,6 +2535,127 @@ func (mgtr *Migrator) iterateChunks() error { } } +// readMoveTablesMigrationRanges reads the per-table min/max unique-key range for +// every migrated table (§2.3). Each table has its own range stored in its +// container. +func (mgtr *Migrator) readMoveTablesMigrationRanges() error { + for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + if err := mgtr.applier.ReadMoveTableMigrationRangeValues(mgtr.inspector.db, mt); err != nil { + return fmt.Errorf("failed to read migration range for %s.%s: %w", mt.SourceDatabaseName, mt.SourceTableName, err) + } + } + return nil +} + +// moveTableCopyChunkFunc returns a copy task that copies a single chunk of the +// given table from source to target. The task is enqueued onto the shared +// copyRowsQueue and executed (single-threaded) by executeWriteFuncs, just like +// the standard-mode copy task — but it advances only this table's per-table +// iteration state, so multiple tables make progress concurrently (§2.3). +func (mgtr *Migrator) moveTableCopyChunkFunc(mt *base.MoveTable) func() error { + return func() error { + if mt.IsRowCopyComplete() || atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { + return nil + } + mt.SetNextIterationRangeMinValues() + applyCopyRowsFunc := func() error { + if mt.IsRowCopyComplete() || atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { + return nil + } + hasFurtherRange, err := mgtr.applier.CalculateMoveTableNextIterationRangeEndValues(mgtr.inspector.db, mt) + if err != nil { + return err // wrapping call will retry + } + if !hasFurtherRange { + mt.SetRowCopyComplete() + mgtr.migrationContext.Log.Infof("Row copy complete for %s.%s", mt.SourceDatabaseName, mt.SourceTableName) + return nil + } + if atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { + return nil + } + _, rowsAffected, _, err := mgtr.applier.ApplyIterationMoveTableCopyQueries(mgtr.inspector.db, mt) + if err != nil { + return err // wrapping call will retry + } + mgtr.migrationContext.Log.Debugf("ApplyIterationMoveTableCopyQueries on %s.%s affected %d rows", mt.SourceDatabaseName, mt.SourceTableName, rowsAffected) + + if mgtr.migrationContext.PanicOnWarnings { + if len(mgtr.migrationContext.MigrationLastInsertSQLWarnings) > 0 { + for _, warning := range mgtr.migrationContext.MigrationLastInsertSQLWarnings { + mgtr.migrationContext.Log.Infof("move-table copy on %s.%s has SQL warnings! %s", mt.SourceDatabaseName, mt.SourceTableName, warning) + } + joinedWarnings := strings.Join(mgtr.migrationContext.MigrationLastInsertSQLWarnings, "; ") + return fmt.Errorf("move-table copy on %s.%s failed because of SQL warnings: [%s]", mt.SourceDatabaseName, mt.SourceTableName, joinedWarnings) + } + } + + atomic.AddInt64(&mt.RowsCopied, rowsAffected) + atomic.AddInt64(&mgtr.migrationContext.TotalRowsCopied, rowsAffected) + mt.IncrementIteration() + return nil + } + if err := mgtr.retryBatchCopyWithHooks(applyCopyRowsFunc); err != nil { + return err + } + mt.RecordLastIterationRange() + return nil + } +} + +// iterateChunksMoveTables drives the interleaved multi-table row copy (§2.3). It +// round-robins over the migrated tables in --move-tables order, enqueuing one +// chunk per not-yet-complete table per pass onto the shared copyRowsQueue, until +// every table reports row copy complete. There is still one apply pipeline; the +// per-table iteration state lives in each table's container. +func (mgtr *Migrator) iterateChunksMoveTables() error { + ctx := mgtr.migrationContext.GetContext() + terminateRowIteration := func(err error) error { + _ = base.SendWithContext(ctx, mgtr.rowCopyComplete, err) + return mgtr.migrationContext.Log.Errore(err) + } + if mgtr.migrationContext.Noop { + mgtr.migrationContext.Log.Debugf("Noop operation; not really copying data") + return terminateRowIteration(nil) + } + + tables := mgtr.migrationContext.OrderedMoveTables() + // A table with no rows is immediately complete. + for _, mt := range tables { + if mt.MigrationRangeMinValues == nil { + mgtr.migrationContext.Log.Debugf("No rows found in %s.%s. Row copy implicitly empty", mt.SourceDatabaseName, mt.SourceTableName) + mt.SetRowCopyComplete() + } + } + + for { + if err := mgtr.checkAbort(); err != nil { + return terminateRowIteration(err) + } + if atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { + return nil + } + // Row copy is done for the whole move set only once every table is + // complete. Signal completion exactly once (§2.3). + if mgtr.migrationContext.AllMoveTablesRowCopyComplete() { + return terminateRowIteration(nil) + } + // Enqueue one chunk per not-yet-complete table, in deterministic order. + for _, mt := range tables { + if mt.IsRowCopyComplete() { + continue + } + copyRowsFunc := mgtr.moveTableCopyChunkFunc(mt) + if err := base.SendWithContext(ctx, mgtr.copyRowsQueue, copyRowsFunc); err != nil { + if abortErr := mgtr.checkAbort(); abortErr != nil { + return terminateRowIteration(abortErr) + } + return terminateRowIteration(err) + } + } + } +} + func (mgtr *Migrator) onApplyEventStruct(eventStruct *applyEventStruct) error { atomic.AddInt64(&mgtr.applyEventsInFlight, 1) defer atomic.AddInt64(&mgtr.applyEventsInFlight, -1) From 7778d745be0b881e8644c6483810d6e4820f24cc Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Mon, 22 Jun 2026 19:15:29 +0000 Subject: [PATCH 02/25] stash --- go/logic/applier_test.go | 36 +- go/logic/hooks.go | 1 + go/logic/migrator.go | 385 ++++++++++-------- localtests/move-tables/multi/create.sql | 68 ++++ localtests/move-tables/multi/tables.txt | 2 + script/move-tables/README.md | 43 +- script/move-tables/insert-source-primary-loop | 36 +- script/move-tables/reset | 31 +- script/move-tables/setup | 2 +- 9 files changed, 392 insertions(+), 212 deletions(-) create mode 100644 localtests/move-tables/multi/create.sql create mode 100644 localtests/move-tables/multi/tables.txt diff --git a/go/logic/applier_test.go b/go/logic/applier_test.go index 7e6a48790..ebc4db08c 100644 --- a/go/logic/applier_test.go +++ b/go/logic/applier_test.go @@ -84,7 +84,7 @@ func TestApplierUpdateModifiesUniqueKeyColumns(t *testing.T) { DML: binlog.UpdateDML, NewColumnValues: columnValues, WhereColumnValues: columnValues, - }) + }, migrationContext.UniqueKey, migrationContext.OriginalTableColumns) require.Equal(t, "", modifiedColumn) require.False(t, isModified) }) @@ -95,7 +95,7 @@ func TestApplierUpdateModifiesUniqueKeyColumns(t *testing.T) { DML: binlog.UpdateDML, NewColumnValues: sql.ToColumnValues([]interface{}{123456, 24}), WhereColumnValues: columnValues, - }) + }, migrationContext.UniqueKey, migrationContext.OriginalTableColumns) require.Equal(t, "item_id", modifiedColumn) require.True(t, isModified) }) @@ -2112,6 +2112,15 @@ func (suite *ApplierTestSuite) TestApplyIterationMoveTableCopyQueries() { migrationContext.MoveTables.TableNames = []string{testMysqlTableName} migrationContext.MoveTables.TargetDatabase = testMysqlDatabaseOther + // Populate the per-table container the move-tables copy path operates on. + migrationContext.InitMoveTableContainers() + mt := migrationContext.GetMoveTable(testMysqlTableName) + suite.Require().NotNil(mt) + mt.OriginalTableColumns = migrationContext.OriginalTableColumns + mt.SharedColumns = migrationContext.SharedColumns + mt.MappedSharedColumns = migrationContext.MappedSharedColumns + mt.UniqueKey = migrationContext.UniqueKey + applier := NewApplier(migrationContext) applier.prepareQueries() defer applier.Teardown() @@ -2122,15 +2131,15 @@ func (suite *ApplierTestSuite) TestApplyIterationMoveTableCopyQueries() { err = applier.CreateChangelogTable() suite.Require().NoError(err) - err = applier.ReadMigrationRangeValues(nil) + err = applier.ReadMoveTableMigrationRangeValues(nil, mt) suite.Require().NoError(err) - migrationContext.SetNextIterationRangeMinValues() - hasFurtherRange, err := applier.CalculateNextIterationRangeEndValues(nil) + mt.SetNextIterationRangeMinValues() + hasFurtherRange, err := applier.CalculateMoveTableNextIterationRangeEndValues(applier.db, mt) suite.Require().NoError(err) suite.Require().True(hasFurtherRange) - chunkSize, rowsAffected, duration, err := applier.ApplyIterationMoveTableCopyQueries(applier.db) + chunkSize, rowsAffected, duration, err := applier.ApplyIterationMoveTableCopyQueries(applier.db, mt) suite.Require().NoError(err) suite.Require().Equal(int64(3), rowsAffected) suite.Require().Equal(int64(1000), chunkSize) @@ -2195,6 +2204,15 @@ func (suite *ApplierTestSuite) TestApplyIterationMoveTableCopyQueriesNoRows() { migrationContext.MoveTables.TableNames = []string{testMysqlTableName} migrationContext.MoveTables.TargetDatabase = testMysqlDatabaseOther + // Populate the per-table container the move-tables copy path operates on. + migrationContext.InitMoveTableContainers() + mt := migrationContext.GetMoveTable(testMysqlTableName) + suite.Require().NotNil(mt) + mt.OriginalTableColumns = migrationContext.OriginalTableColumns + mt.SharedColumns = migrationContext.SharedColumns + mt.MappedSharedColumns = migrationContext.MappedSharedColumns + mt.UniqueKey = migrationContext.UniqueKey + applier := NewApplier(migrationContext) applier.prepareQueries() defer applier.Teardown() @@ -2204,10 +2222,10 @@ func (suite *ApplierTestSuite) TestApplyIterationMoveTableCopyQueriesNoRows() { // Point the iteration range at a key range that contains no rows so the // SELECT returns an empty result set and the INSERT is skipped. - migrationContext.MigrationIterationRangeMinValues = sql.ToColumnValues([]interface{}{100}) - migrationContext.MigrationIterationRangeMaxValues = sql.ToColumnValues([]interface{}{200}) + mt.MigrationIterationRangeMinValues = sql.ToColumnValues([]interface{}{100}) + mt.MigrationIterationRangeMaxValues = sql.ToColumnValues([]interface{}{200}) - chunkSize, rowsAffected, duration, err := applier.ApplyIterationMoveTableCopyQueries(applier.db) + chunkSize, rowsAffected, duration, err := applier.ApplyIterationMoveTableCopyQueries(applier.db, mt) suite.Require().NoError(err) suite.Require().Equal(int64(0), rowsAffected) suite.Require().Equal(int64(1000), chunkSize) diff --git a/go/logic/hooks.go b/go/logic/hooks.go index 383700ca1..67220eac4 100644 --- a/go/logic/hooks.go +++ b/go/logic/hooks.go @@ -11,6 +11,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "sync/atomic" "github.com/github/gh-ost/go/base" diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 713badcd8..9bba159a1 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -802,46 +802,57 @@ func (mgtr *Migrator) Revert() error { return nil } -// prepareMoveTablesCopyState initializes state for row copy in move-tables mode. -// for move-tables functionality, the source and target tables are identical so we just need to grab any valid UNIQUE key constraint. +// prepareMoveTablesCopyState initializes per-table runtime state for row copy in +// move-tables mode (§2.1). Each migrated table is inspected independently into +// its own container (schema, unique key, row estimate, CREATE statement). The +// top-level migration-context fields stay bound to the primary table so the +// single-table code paths (checkpoint schema, status hint, naming) keep working; +// a single-entry --move-tables therefore behaves exactly as before. func (mgtr *Migrator) prepareMoveTablesCopyState() error { - mctx := mgtr.migrationContext - mctx.InitMoveTableContainers() - - // Keep the primary table's top-level state populated (the inspector already - // inspected it via InspectOriginalTable) for the single-table code paths that - // still read it directly: checkpoint table creation, status, and naming. - mctx.UniqueKey = mgtr.inspector.selectUniqueKey(mctx.MoveTablePrimaryName(), mctx.OriginalTableUniqueKeys) - // In move-tables mode source and target schemas match, so shared columns are identical. - mctx.SharedColumns = mctx.OriginalTableColumns - mctx.MappedSharedColumns = mctx.OriginalTableColumns - - // Inspect every migrated table independently into its own container (§2.1). + mgtr.migrationContext.InitMoveTableContainers() + var totalRowsEstimate int64 - for _, mt := range mctx.OrderedMoveTables() { + for _, mt := range mgtr.migrationContext.OrderedMoveTables() { columns, virtualColumns, uniqueKeys, uniqueKey, rowsEstimate, err := mgtr.inspector.InspectMoveTable(mt.SourceTableName) if err != nil { - return fmt.Errorf("failed to inspect move-table %s.%s: %w", mt.SourceDatabaseName, mt.SourceTableName, err) + return fmt.Errorf("failed to inspect move-table %s.%s: %w", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), err) + } + // Validate each entry like a standard single-table run. + if err := mgtr.inspector.validateTableForeignKeysFor(mt.SourceTableName, mgtr.migrationContext.DiscardForeignKeys); err != nil { + return fmt.Errorf("failed to validate foreign keys on move-table %s.%s: %w", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), err) } - // Validate each entry like a standard single-table run (§2.1): reject - // unsupported foreign keys. - if err := mgtr.inspector.validateTableForeignKeysFor(mt.SourceTableName, mctx.DiscardForeignKeys); err != nil { - return fmt.Errorf("failed to validate foreign keys on move-table %s.%s: %w", mt.SourceDatabaseName, mt.SourceTableName, err) + createStatement, err := mgtr.inspector.showCreateTable(mt.SourceTableName) + if err != nil { + return fmt.Errorf("failed to fetch create table statement for %s.%s: %w", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), err) } + mt.OriginalTableColumns = columns mt.OriginalTableVirtualColumns = virtualColumns mt.OriginalTableUniqueKeys = uniqueKeys mt.UniqueKey = uniqueKey - // Source and target schemas match in move-tables mode. + // In move-tables mode source and target schemas match, so shared columns are identical. mt.SharedColumns = columns mt.MappedSharedColumns = columns - atomic.StoreInt64(&mt.RowsEstimate, rowsEstimate) + mt.RowsEstimate = rowsEstimate + mt.CreateTableStatement = createStatement totalRowsEstimate += rowsEstimate - mctx.Log.Infof("Move-table %s.%s ready: unique key %s, ~%d rows", - mt.SourceDatabaseName, mt.SourceTableName, mt.UniqueKey.Name, rowsEstimate) } - // Aggregate row estimate across all tables for status/ETA. - atomic.StoreInt64(&mctx.RowsEstimate, totalRowsEstimate) + + // Keep top-level fields bound to the primary table for backward-compat with + // single-table code paths (checkpoint schema, status hint, naming). + if primary := mgtr.migrationContext.GetMoveTable(mgtr.migrationContext.MoveTablePrimaryName()); primary != nil { + mgtr.migrationContext.OriginalTableColumns = primary.OriginalTableColumns + mgtr.migrationContext.OriginalTableVirtualColumns = primary.OriginalTableVirtualColumns + mgtr.migrationContext.OriginalTableUniqueKeys = primary.OriginalTableUniqueKeys + mgtr.migrationContext.UniqueKey = primary.UniqueKey + mgtr.migrationContext.SharedColumns = primary.SharedColumns + mgtr.migrationContext.MappedSharedColumns = primary.MappedSharedColumns + } + // Aggregate the row estimate across all tables for overall progress reporting. + atomic.StoreInt64(&mgtr.migrationContext.RowsEstimate, totalRowsEstimate) return nil } @@ -851,8 +862,6 @@ func (mgtr *Migrator) hydrateMoveTablesStateFromTarget() error { targetInspector := &Inspector{db: mgtr.applier.moveTablesTargetDB, migrationContext: probeContext} mgtr.migrationContext.InitMoveTableContainers() - - var totalRowsEstimate int64 for _, mt := range mgtr.migrationContext.OrderedMoveTables() { columns, virtualColumns, uniqueKeys, err := targetInspector.InspectTableColumnsAndUniqueKeys(mt.TargetTableName) if err != nil { @@ -860,7 +869,8 @@ func (mgtr *Migrator) hydrateMoveTablesStateFromTarget() error { } uniqueKey := targetInspector.selectUniqueKey(mt.TargetTableName, uniqueKeys) if uniqueKey == nil { - return fmt.Errorf("no valid unique key on target table %s.%s", mt.TargetDatabaseName, mt.TargetTableName) + return fmt.Errorf("no valid unique key found on target table %s.%s while resuming", + sql.EscapeName(mt.TargetDatabaseName), sql.EscapeName(mt.TargetTableName)) } mt.OriginalTableColumns = columns mt.OriginalTableVirtualColumns = virtualColumns @@ -868,12 +878,9 @@ func (mgtr *Migrator) hydrateMoveTablesStateFromTarget() error { mt.UniqueKey = uniqueKey mt.SharedColumns = columns mt.MappedSharedColumns = columns - totalRowsEstimate += atomic.LoadInt64(&mt.RowsEstimate) } - // Primary top-level state for single-table code paths (checkpoint/status). - primary := mgtr.migrationContext.GetMoveTable(mgtr.migrationContext.MoveTablePrimaryName()) - if primary != nil { + if primary := mgtr.migrationContext.GetMoveTable(mgtr.migrationContext.MoveTablePrimaryName()); primary != nil { mgtr.migrationContext.OriginalTableColumns = primary.OriginalTableColumns mgtr.migrationContext.OriginalTableVirtualColumns = primary.OriginalTableVirtualColumns mgtr.migrationContext.OriginalTableUniqueKeys = primary.OriginalTableUniqueKeys @@ -881,7 +888,6 @@ func (mgtr *Migrator) hydrateMoveTablesStateFromTarget() error { mgtr.migrationContext.SharedColumns = primary.SharedColumns mgtr.migrationContext.MappedSharedColumns = primary.MappedSharedColumns } - atomic.StoreInt64(&mgtr.migrationContext.RowsEstimate, totalRowsEstimate) return nil } @@ -1057,7 +1063,6 @@ func (mgtr *Migrator) resumeMoveTablesCutOverFromCheckpoint(chk *Checkpoint) err } atomic.StoreInt64(&mgtr.migrationContext.CutOverCompleteFlag, 1) mgtr.migrationContext.Log.Debugf("T4: CutOverCompleteFlag set") - mgtr.migrationContext.MoveTables.DrainGTID = chk.MoveTablesCutOverDrainGTID if err := mgtr.hooksExecutor.OnSuccess(false); err != nil { return fmt.Errorf("on-success hook failed: %w", err) } @@ -1247,8 +1252,13 @@ func (mgtr *Migrator) MoveTables() (err error) { if err := mgtr.addDMLEventsListener(); err != nil { return err } - if err := mgtr.readMoveTablesMigrationRanges(); err != nil { - return err + // Read each migrated table's full row-copy range into its per-table container + // (§2.3). Ranges are read from the source via the inspector connection. + for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + if err := mgtr.applier.ReadMoveTableMigrationRangeValues(mgtr.inspector.db, mt); err != nil { + return fmt.Errorf("failed to read migration range for %s.%s: %w", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), err) + } } mgtr.initiateThrottler() @@ -1377,17 +1387,15 @@ func (mgtr *Migrator) moveTablesCutOver() (err error) { } sourceDB := mgtr.migrationContext.DatabaseName - // Build a single atomic multi-table RENAME covering every migrated table - // (§2.4). MySQL executes `RENAME TABLE a TO b, c TO d` atomically at the - // storage-engine and binlog level: either all renames are visible or none, - // and they appear in the binlog as one event group with one GTID. That single - // drain GTID therefore covers the whole move set, and combined with the shared - // binlog stream the existing Stage 1 drain mechanism works unchanged. + // Build a single atomic multi-table RENAME covering every table in + // --move-tables order (§2.4): `RENAME TABLE db.t1 TO db._t1_del, db.t2 TO + // db._t2_del, ...`. MySQL executes this as one event group with one GTID, so + // the existing single-drain-GTID mechanism covers the whole move set. renameClauses := make([]string, 0, len(mgtr.migrationContext.MoveTables.TableNames)) - for _, mt := range mgtr.migrationContext.OrderedMoveTables() { - delTable := mgtr.migrationContext.MoveTableDelName(mt.SourceTableName) + for _, tableName := range mgtr.migrationContext.MoveTables.TableNames { + delTable := mgtr.migrationContext.MoveTableDelName(tableName) renameClauses = append(renameClauses, fmt.Sprintf("%s.%s to %s.%s", - sql.EscapeName(sourceDB), sql.EscapeName(mt.SourceTableName), + sql.EscapeName(sourceDB), sql.EscapeName(tableName), sql.EscapeName(sourceDB), sql.EscapeName(delTable))) } renameAndCaptureQuery := fmt.Sprintf("rename /* gh-ost */ table %s;\nselect @@global.gtid_executed", @@ -1891,31 +1899,32 @@ func (mgtr *Migrator) validateMoveTablesSourceReadHost() error { return fmt.Errorf("move-tables source --host %+v is the cluster primary; reading the full table copy from the primary is the load move-tables is meant to avoid. Point --host at a replica so reads come off the primary, or pass --allow-on-source-primary to proceed against the primary anyway", spc.Key) } -// dropSourceOldTable drops the source `__del` rollback handles on the source -// primary. The inspector/streamer source connections may be a read replica, so -// the drop cannot go through them; it must use the writable source-primary -// handle. In multi-table mode every migrated table's `__del` is dropped. +// dropSourceOldTable drops the source `_
_del` rollback handle(s) on the +// source primary. The inspector/streamer source connections may be a read +// replica, so the drop cannot go through them; it must use the writable +// source-primary handle. In multi-table mode every renamed source table's `_del` +// handle is dropped. func (mgtr *Migrator) dropSourceOldTable() error { if mgtr.sourcePrimaryDB == nil { return errors.New("source primary connection not initialized; cannot drop source __del table") } databaseName := mgtr.migrationContext.DatabaseName - for _, mt := range mgtr.migrationContext.OrderedMoveTables() { - tableName := mgtr.migrationContext.MoveTableDelName(mt.SourceTableName) + for _, tableName := range mgtr.migrationContext.MoveTables.TableNames { + delTable := mgtr.migrationContext.MoveTableDelName(tableName) query := fmt.Sprintf(`drop /* gh-ost */ table if exists %s.%s`, sql.EscapeName(databaseName), - sql.EscapeName(tableName), + sql.EscapeName(delTable), ) mgtr.migrationContext.Log.Infof("Dropping source table %s.%s on primary %+v", sql.EscapeName(databaseName), - sql.EscapeName(tableName), + sql.EscapeName(delTable), mgtr.migrationContext.MoveTables.SourcePrimaryConnectionConfig.Key, ) if _, err := mgtr.sourcePrimaryDB.Exec(query); err != nil { return err } - mgtr.migrationContext.Log.Infof("Source table dropped") } + mgtr.migrationContext.Log.Infof("Source table(s) dropped") return nil } @@ -2258,6 +2267,26 @@ func (mgtr *Migrator) printStatus(rule PrintStatusRule, writers ...io.Writer) { w := io.MultiWriter(writers...) fmt.Fprintln(w, status) + // In move-tables mode, surface per-table row-copy progress so all migrated + // tables are visibly advancing concurrently (§2.3). + if mgtr.migrationContext.IsMoveTablesMode() { + for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + copied := atomic.LoadInt64(&mt.RowsCopied) + estimate := atomic.LoadInt64(&mt.RowsEstimate) + pct := 100.0 + if estimate > 0 { + pct = 100.0 * float64(copied) / float64(estimate) + } + tableState := "copying" + if mt.IsRowCopyComplete() { + tableState = "complete" + } + fmt.Fprintf(w, " - %s.%s: Copy %d/%d %.1f%%; iteration %d; %s\n", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), + copied, estimate, pct, mt.GetIteration(), tableState) + } + } + // This "hack" is required here because the underlying logging library // github.com/outbrain/golib/log provides two functions Info and Infof; but the arguments of // both these functions are eventually redirected to the same function, which internally calls @@ -2317,10 +2346,10 @@ func (mgtr *Migrator) initiateStreaming() error { // addDMLEventsListener begins listening for binlog events on the migrated // table(s), and creates & enqueues a write task per such event. In move-tables -// mode it registers one listener per migrated table on the single shared events -// streamer (§2.2); every listener uses the same callback (enqueue onto the -// shared apply queue) and the applier routes each event to the right table by -// name. There is still exactly one binlog connection. +// mode it registers one listener per migrated table on the shared events +// streamer (§2.2); all listeners feed the same apply queue, parameterized only +// by table name. The streamer already dispatches per (database, table), and the +// applier routes DML to the right per-table query builders by table name. func (mgtr *Migrator) addDMLEventsListener() error { enqueue := func(dmlEntry *binlog.BinlogEntry) error { // Use helper to prevent deadlock if buffer fills and executeWriteFuncs exits. @@ -2329,11 +2358,11 @@ func (mgtr *Migrator) addDMLEventsListener() error { } if mgtr.migrationContext.IsMoveTablesMode() { - for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + for _, tableName := range mgtr.migrationContext.MoveTables.TableNames { if err := mgtr.eventsStreamer.AddListener( false, - mt.SourceDatabaseName, - mt.SourceTableName, + mgtr.migrationContext.DatabaseName, + tableName, enqueue, ); err != nil { return err @@ -2376,15 +2405,20 @@ func (mgtr *Migrator) initiateApplier() error { if mgtr.migrationContext.IsMoveTablesMode() { if !mgtr.migrationContext.Resume { - // Create every target table from its source CREATE statement (§2.1). + // Create every migrated table on the target from its captured CREATE + // statement (§2.1). Containers were populated by prepareMoveTablesCopyState. for _, mt := range mgtr.migrationContext.OrderedMoveTables() { - createTableStatement, err := mgtr.inspector.showCreateTable(mt.SourceTableName) - if err != nil { - return fmt.Errorf("failed to fetch create table statement for %s.%s: %w", mt.SourceDatabaseName, mt.SourceTableName, err) + createTableStatement := mt.CreateTableStatement + if createTableStatement == "" { + var err error + if createTableStatement, err = mgtr.inspector.showCreateTable(mt.SourceTableName); err != nil { + return fmt.Errorf("failed to fetch create table statement for %s.%s: %w", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), err) + } } - mt.CreateTableStatement = createTableStatement if err := mgtr.applier.CreateTargetTableForName(mt.TargetTableName, createTableStatement); err != nil { - mgtr.migrationContext.Log.Errorf("unable to create target table %s.%s, see further error details. Perhaps a previous migration failed without dropping the table? Bailing out", mt.TargetDatabaseName, mt.TargetTableName) + mgtr.migrationContext.Log.Errorf("unable to create target table %s.%s, see further error details. Perhaps a previous migration failed without dropping the table? Bailing out", + sql.EscapeName(mt.TargetDatabaseName), sql.EscapeName(mt.TargetTableName)) return err } } @@ -2505,8 +2539,7 @@ func (mgtr *Migrator) iterateChunks() error { // _ghost_ table, which no longer exists. So, bothering error messages and all, but no damage. return nil } - var rowsAffected int64 - _, rowsAffected, _, err = mgtr.applier.ApplyIterationInsertQuery() + _, rowsAffected, _, err := mgtr.applier.ApplyIterationInsertQuery() if err != nil { return err // wrapping call will retry } @@ -2552,84 +2585,23 @@ func (mgtr *Migrator) iterateChunks() error { } } -// readMoveTablesMigrationRanges reads the per-table min/max unique-key range for -// every migrated table (§2.3). Each table has its own range stored in its -// container. -func (mgtr *Migrator) readMoveTablesMigrationRanges() error { - for _, mt := range mgtr.migrationContext.OrderedMoveTables() { - if err := mgtr.applier.ReadMoveTableMigrationRangeValues(mgtr.inspector.db, mt); err != nil { - return fmt.Errorf("failed to read migration range for %s.%s: %w", mt.SourceDatabaseName, mt.SourceTableName, err) - } - } - return nil -} - -// moveTableCopyChunkFunc returns a copy task that copies a single chunk of the -// given table from source to target. The task is enqueued onto the shared -// copyRowsQueue and executed (single-threaded) by executeWriteFuncs, just like -// the standard-mode copy task — but it advances only this table's per-table -// iteration state, so multiple tables make progress concurrently (§2.3). -func (mgtr *Migrator) moveTableCopyChunkFunc(mt *base.MoveTable) func() error { - return func() error { - if mt.IsRowCopyComplete() || atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { - return nil - } - mt.SetNextIterationRangeMinValues() - applyCopyRowsFunc := func() error { - if mt.IsRowCopyComplete() || atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { - return nil - } - hasFurtherRange, err := mgtr.applier.CalculateMoveTableNextIterationRangeEndValues(mgtr.inspector.db, mt) - if err != nil { - return err // wrapping call will retry - } - if !hasFurtherRange { - mt.SetRowCopyComplete() - mgtr.migrationContext.Log.Infof("Row copy complete for %s.%s", mt.SourceDatabaseName, mt.SourceTableName) - return nil - } - if atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { - return nil - } - _, rowsAffected, _, err := mgtr.applier.ApplyIterationMoveTableCopyQueries(mgtr.inspector.db, mt) - if err != nil { - return err // wrapping call will retry - } - mgtr.migrationContext.Log.Debugf("ApplyIterationMoveTableCopyQueries on %s.%s affected %d rows", mt.SourceDatabaseName, mt.SourceTableName, rowsAffected) - - if mgtr.migrationContext.PanicOnWarnings { - if len(mgtr.migrationContext.MigrationLastInsertSQLWarnings) > 0 { - for _, warning := range mgtr.migrationContext.MigrationLastInsertSQLWarnings { - mgtr.migrationContext.Log.Infof("move-table copy on %s.%s has SQL warnings! %s", mt.SourceDatabaseName, mt.SourceTableName, warning) - } - joinedWarnings := strings.Join(mgtr.migrationContext.MigrationLastInsertSQLWarnings, "; ") - return fmt.Errorf("move-table copy on %s.%s failed because of SQL warnings: [%s]", mt.SourceDatabaseName, mt.SourceTableName, joinedWarnings) - } - } - - atomic.AddInt64(&mt.RowsCopied, rowsAffected) - atomic.AddInt64(&mgtr.migrationContext.TotalRowsCopied, rowsAffected) - mt.IncrementIteration() - return nil - } - if err := mgtr.retryBatchCopyWithHooks(applyCopyRowsFunc); err != nil { - return err - } - mt.RecordLastIterationRange() - return nil - } -} - -// iterateChunksMoveTables drives the interleaved multi-table row copy (§2.3). It -// round-robins over the migrated tables in --move-tables order, enqueuing one -// chunk per not-yet-complete table per pass onto the shared copyRowsQueue, until -// every table reports row copy complete. There is still one apply pipeline; the -// per-table iteration state lives in each table's container. +// iterateChunksMoveTables drives the interleaved, multi-table row copy (§2.3). +// It round-robins over the migrated tables in --move-tables order, enqueuing one +// chunk-copy task per not-yet-complete table per cycle so all tables make +// progress concurrently through the single shared apply pipeline. Each task +// operates on its table's own per-table container; the single executeWriteFuncs +// consumer runs the tasks one at a time, so per-table range/iteration state is +// never accessed concurrently. Row copy is complete only once EVERY table +// reports complete, at which point the shared rowCopyComplete signal fires once +// (so the on-row-copy-complete hook and cutover fire exactly once, after the +// slowest table). func (mgtr *Migrator) iterateChunksMoveTables() error { - ctx := mgtr.migrationContext.GetContext() terminateRowIteration := func(err error) error { - _ = base.SendWithContext(ctx, mgtr.rowCopyComplete, err) - return mgtr.migrationContext.Log.Errore(err) + _ = base.SendWithContext(mgtr.migrationContext.GetContext(), mgtr.rowCopyComplete, err) + if err != nil { + return mgtr.migrationContext.Log.Errore(err) + } + return nil } if mgtr.migrationContext.Noop { mgtr.migrationContext.Log.Debugf("Noop operation; not really copying data") @@ -2640,35 +2612,94 @@ func (mgtr *Migrator) iterateChunksMoveTables() error { // A table with no rows is immediately complete. for _, mt := range tables { if mt.MigrationRangeMinValues == nil { - mgtr.migrationContext.Log.Debugf("No rows found in %s.%s. Row copy implicitly empty", mt.SourceDatabaseName, mt.SourceTableName) + mgtr.migrationContext.Log.Debugf("No rows found in %s.%s; row copy is implicitly empty", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName)) mt.SetRowCopyComplete() } } + primaryName := mgtr.migrationContext.MoveTablePrimaryName() + + // enqueueChunk builds and enqueues a single chunk-copy task bound to mt. + enqueueChunk := func(mt *base.MoveTable) error { + copyRowsFunc := func() error { + if mt.IsRowCopyComplete() || atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { + return nil + } + mt.SetNextIterationRangeMinValues() + applyCopyRowsFunc := func() error { + if mt.IsRowCopyComplete() || atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { + return nil + } + hasFurtherRange, err := mgtr.applier.CalculateMoveTableNextIterationRangeEndValues(mgtr.inspector.db, mt) + if err != nil { + return err // wrapping call will retry + } + if !hasFurtherRange { + mt.SetRowCopyComplete() + return nil + } + if atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { + return nil + } + _, rowsAffected, _, err := mgtr.applier.ApplyIterationMoveTableCopyQueries(mgtr.inspector.db, mt) + if err != nil { + return err // wrapping call will retry + } + if mgtr.migrationContext.PanicOnWarnings && len(mgtr.migrationContext.MigrationLastInsertSQLWarnings) > 0 { + for _, warning := range mgtr.migrationContext.MigrationLastInsertSQLWarnings { + mgtr.migrationContext.Log.Infof("move-table copy on %s.%s has SQL warnings! %s", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), warning) + } + joined := strings.Join(mgtr.migrationContext.MigrationLastInsertSQLWarnings, "; ") + return fmt.Errorf("move-table copy on %s.%s failed because of SQL warnings: [%s]", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), joined) + } + atomic.AddInt64(&mgtr.migrationContext.TotalRowsCopied, rowsAffected) + atomic.AddInt64(&mt.RowsCopied, rowsAffected) + mt.IncrementIteration() + return nil + } + if err := mgtr.retryBatchCopyWithHooks(applyCopyRowsFunc); err != nil { + return err + } + // Record this table's last successfully-copied range for checkpointing. + mt.RecordLastIterationRange() + // Keep the applier-level last range in sync for the primary table so the + // existing single-table checkpoint path keeps working unchanged. + if mt.SourceTableName == primaryName { + mgtr.applier.LastIterationRangeMutex.Lock() + if mt.LastIterationRangeMinValues != nil && mt.LastIterationRangeMaxValues != nil { + mgtr.applier.LastIterationRangeMinValues = mt.LastIterationRangeMinValues.Clone() + mgtr.applier.LastIterationRangeMaxValues = mt.LastIterationRangeMaxValues.Clone() + } + mgtr.applier.LastIterationRangeMutex.Unlock() + } + return nil + } + return base.SendWithContext(mgtr.migrationContext.GetContext(), mgtr.copyRowsQueue, copyRowsFunc) + } + for { if err := mgtr.checkAbort(); err != nil { return terminateRowIteration(err) } - if atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { - return nil - } - // Row copy is done for the whole move set only once every table is - // complete. Signal completion exactly once (§2.3). - if mgtr.migrationContext.AllMoveTablesRowCopyComplete() { + if atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 || mgtr.migrationContext.AllMoveTablesRowCopyComplete() { return terminateRowIteration(nil) } - // Enqueue one chunk per not-yet-complete table, in deterministic order. for _, mt := range tables { if mt.IsRowCopyComplete() { continue } - copyRowsFunc := mgtr.moveTableCopyChunkFunc(mt) - if err := base.SendWithContext(ctx, mgtr.copyRowsQueue, copyRowsFunc); err != nil { + if err := enqueueChunk(mt); err != nil { if abortErr := mgtr.checkAbort(); abortErr != nil { return terminateRowIteration(abortErr) } return terminateRowIteration(err) } + if atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { + return nil + } } } } @@ -2676,7 +2707,6 @@ func (mgtr *Migrator) iterateChunksMoveTables() error { func (mgtr *Migrator) onApplyEventStruct(eventStruct *applyEventStruct) error { atomic.AddInt64(&mgtr.applyEventsInFlight, 1) defer atomic.AddInt64(&mgtr.applyEventsInFlight, -1) - handleNonDMLEventStruct := func(eventStruct *applyEventStruct) error { if eventStruct.writeFunc != nil { if err := mgtr.retryOperation(*eventStruct.writeFunc); err != nil { @@ -2985,7 +3015,6 @@ func (mgtr *Migrator) finalCleanup() error { // survives as the rollback handle (see logMoveTablesRollbackHint). func (mgtr *Migrator) moveTablesFinalCleanup() error { sourceDatabaseName := mgtr.migrationContext.DatabaseName - delTableName := mgtr.migrationContext.GetOldTableName() targetDatabaseName := mgtr.migrationContext.GetTargetDatabaseName() checkpointTableName := mgtr.migrationContext.GetCheckpointTableName() @@ -3012,10 +3041,14 @@ func (mgtr *Migrator) moveTablesFinalCleanup() error { } // --ok-to-drop-table not set: log the artifacts left behind and the exact - // commands to drop them. + // commands to drop them. In multi-table mode every migrated table leaves its + // own `_
_del` rollback handle on the source. mgtr.migrationContext.Log.Infof("Am not dropping move-tables artifacts without `--ok-to-drop-table`. The following are left behind:") - mgtr.migrationContext.Log.Infof("- source rollback handle %s.%s. To drop it, issue:", sql.EscapeName(sourceDatabaseName), sql.EscapeName(delTableName)) - mgtr.migrationContext.Log.Infof("-- drop table %s.%s", sql.EscapeName(sourceDatabaseName), sql.EscapeName(delTableName)) + for _, tableName := range mgtr.migrationContext.MoveTables.TableNames { + delTableName := mgtr.migrationContext.MoveTableDelName(tableName) + mgtr.migrationContext.Log.Infof("- source rollback handle %s.%s. To drop it, issue:", sql.EscapeName(sourceDatabaseName), sql.EscapeName(delTableName)) + mgtr.migrationContext.Log.Infof("-- drop table %s.%s", sql.EscapeName(sourceDatabaseName), sql.EscapeName(delTableName)) + } if mgtr.migrationContext.Checkpoint { mgtr.migrationContext.Log.Infof("- target checkpoint table %s.%s. To drop it, issue:", sql.EscapeName(targetDatabaseName), sql.EscapeName(checkpointTableName)) mgtr.migrationContext.Log.Infof("-- drop table %s.%s", sql.EscapeName(targetDatabaseName), sql.EscapeName(checkpointTableName)) @@ -3024,20 +3057,26 @@ func (mgtr *Migrator) moveTablesFinalCleanup() error { } // logMoveTablesRollbackHint prints a clear rollback hint after a failed -// move-tables run in which the source RENAME already happened. The source -// `__del` table is intentionally left in place as the rollback handle -// and the operator rolls the source back by renaming -// `__del` to the original table name. We do NOT drop `__del` on a failure path. +// move-tables run in which the source RENAME already happened. Each migrated +// table's source `_
_del` table is intentionally left in place as the +// rollback handle and the operator rolls the source back by renaming every +// `_
_del` back to its original table name. We do NOT drop `__del` on a +// failure path. func (mgtr *Migrator) logMoveTablesRollbackHint() { sourceDatabaseName := mgtr.migrationContext.DatabaseName - originalTableName := mgtr.migrationContext.OriginalTableName - delTableName := mgtr.migrationContext.GetOldTableName() - mgtr.migrationContext.Log.Infof("move-tables run failed after the source rename; leaving %s.%s in place as the rollback handle.", - sql.EscapeName(sourceDatabaseName), sql.EscapeName(delTableName)) - mgtr.migrationContext.Log.Infof("To roll back the source table, issue:") - mgtr.migrationContext.Log.Infof("-- rename table %s.%s to %s.%s", - sql.EscapeName(sourceDatabaseName), sql.EscapeName(delTableName), - sql.EscapeName(sourceDatabaseName), sql.EscapeName(originalTableName)) + mgtr.migrationContext.Log.Infof("move-tables run failed after the source rename; leaving the following rollback handle(s) in place:") + rollbackClauses := make([]string, 0, len(mgtr.migrationContext.MoveTables.TableNames)) + for _, tableName := range mgtr.migrationContext.MoveTables.TableNames { + delTableName := mgtr.migrationContext.MoveTableDelName(tableName) + mgtr.migrationContext.Log.Infof("- %s.%s (rollback handle for %s.%s)", + sql.EscapeName(sourceDatabaseName), sql.EscapeName(delTableName), + sql.EscapeName(sourceDatabaseName), sql.EscapeName(tableName)) + rollbackClauses = append(rollbackClauses, fmt.Sprintf("%s.%s to %s.%s", + sql.EscapeName(sourceDatabaseName), sql.EscapeName(delTableName), + sql.EscapeName(sourceDatabaseName), sql.EscapeName(tableName))) + } + mgtr.migrationContext.Log.Infof("To roll back the source table(s), issue:") + mgtr.migrationContext.Log.Infof("-- rename table %s", strings.Join(rollbackClauses, ", ")) } func (mgtr *Migrator) teardown() { diff --git a/localtests/move-tables/multi/create.sql b/localtests/move-tables/multi/create.sql new file mode 100644 index 000000000..3d4a5d70b --- /dev/null +++ b/localtests/move-tables/multi/create.sql @@ -0,0 +1,68 @@ +-- Two tables with different schemas, primary-key types, and row counts. This +-- exercises the multi-table move-tables path (§2.1-2.4): per-table runtime +-- state, per-table query builders, interleaved row copy where the tables finish +-- at different times, and a single atomic multi-table RENAME at cutover. + +drop table if exists gh_ost_test; +create table gh_ost_test ( + id bigint(20) NOT NULL AUTO_INCREMENT, + column1 int(11) NOT NULL, + column2 smallint(5) unsigned NOT NULL, + column3 mediumint(8) unsigned NOT NULL, + column4 tinyint(3) unsigned NOT NULL, + column5 int(11) NOT NULL, + column6 int(11) NOT NULL, + PRIMARY KEY (id), + KEY c12_ix (column1, column2) +) auto_increment=1; + +insert into gh_ost_test values + (NULL, 1001, 100, 500000, 10, 1700000001, 1700000002), + (NULL, 1002, 200, 600000, 20, 1700000003, 1700000004), + (NULL, 1003, 300, 700000, 30, 1700000005, 1700000006), + (NULL, 1004, 400, 800000, 40, 1700000007, 1700000008), + (NULL, 1005, 500, 900000, 50, 1700000009, 1700000010), + (NULL, 1006, 600, 1000000, 60, 1700000011, 1700000012), + (NULL, 1007, 700, 1100000, 70, 1700000013, 1700000014), + (NULL, 1008, 800, 1200000, 80, 1700000015, 1700000016), + (NULL, 1009, 900, 1300000, 90, 1700000017, 1700000018), + (NULL, 1010, 1000, 1400000, 100, 1700000019, 1700000020), + (NULL, 1011, 1100, 1500000, 110, 1700000021, 1700000022), + (NULL, 1012, 1200, 1600000, 120, 1700000023, 1700000024), + (NULL, 1013, 1300, 1700000, 130, 1700000025, 1700000026), + (NULL, 1014, 1400, 1800000, 140, 1700000027, 1700000028), + (NULL, 1015, 1500, 1900000, 150, 1700000029, 1700000030), + (NULL, 1016, 1600, 2000000, 160, 1700000031, 1700000032), + (NULL, 1017, 1700, 2100000, 170, 1700000033, 1700000034), + (NULL, 1018, 1800, 2200000, 180, 1700000035, 1700000036), + (NULL, 1019, 1900, 2300000, 190, 1700000037, 1700000038), + (NULL, 1020, 2000, 2400000, 200, 1700000039, 1700000040), + (NULL, 1021, 2100, 2500000, 210, 1700000041, 1700000042), + (NULL, 1022, 2200, 2600000, 220, 1700000043, 1700000044), + (NULL, 1023, 2300, 2700000, 230, 1700000045, 1700000046), + (NULL, 1024, 2400, 2800000, 240, 1700000047, 1700000048), + (NULL, 1025, 2500, 2900000, 250, 1700000049, 1700000050); + +drop table if exists gh_ost_test_other; +create table gh_ost_test_other ( + uid int(11) NOT NULL, + name varchar(64) NOT NULL, + amount decimal(10,2) NOT NULL, + created_at datetime NOT NULL, + PRIMARY KEY (uid), + UNIQUE KEY name_uq (name) +); + +insert into gh_ost_test_other values + (1, 'alpha', 10.50, '2024-01-01 10:00:00'), + (2, 'bravo', 20.75, '2024-01-02 11:00:00'), + (3, 'charlie', 30.00, '2024-01-03 12:00:00'), + (4, 'delta', 40.25, '2024-01-04 13:00:00'), + (5, 'echo', 50.50, '2024-01-05 14:00:00'), + (6, 'foxtrot', 60.75, '2024-01-06 15:00:00'), + (7, 'golf', 70.00, '2024-01-07 16:00:00'), + (8, 'hotel', 80.25, '2024-01-08 17:00:00'), + (9, 'india', 90.50, '2024-01-09 18:00:00'), + (10, 'juliet', 100.75, '2024-01-10 19:00:00'), + (11, 'kilo', 110.00, '2024-01-11 20:00:00'), + (12, 'lima', 120.25, '2024-01-12 21:00:00'); diff --git a/localtests/move-tables/multi/tables.txt b/localtests/move-tables/multi/tables.txt new file mode 100644 index 000000000..30fa51c70 --- /dev/null +++ b/localtests/move-tables/multi/tables.txt @@ -0,0 +1,2 @@ +gh_ost_test +gh_ost_test_other diff --git a/script/move-tables/README.md b/script/move-tables/README.md index 07d1037a7..3d3b15881 100644 --- a/script/move-tables/README.md +++ b/script/move-tables/README.md @@ -1,18 +1,20 @@ ### Setup -Setup the multi-cluster topology and seed the data +Setup the multi-cluster topology and seed the data. This seeds two tables on the +source — `gh_ost_test` and `gh_ost_test_other` (see +`localtests/move-tables/multi/create.sql`) — into the `test` database. ```bash script/move-tables/setup ``` Verify data is present in the source cluster. ```bash -script/move-tables/mysql-source-primary -D gh_ost_test_db -e "SELECT * FROM gh_ost_test;" +script/move-tables/mysql-source-primary -D test -e "SELECT * FROM gh_ost_test; SELECT * FROM gh_ost_test_other;" ``` Verify the empty database is present in the target cluster. ```bash -script/move-tables/mysql-target-primary -D gh_ost_test_db -e "SHOW TABLES;" +script/move-tables/mysql-target-primary -D test -e "SHOW TABLES;" ``` ### Testing `gh-ost` @@ -22,19 +24,22 @@ Checkout your branch of `github/gh-ost` and build the binaries: script/build --cli ``` -Run gh-ost to move tables: +Run gh-ost to move both tables in a single atomic cutover: ```bash -./script/build --cli; ./bin/gh-ost --move-tables=gh_ost_test --host=localhost --port=3308 --user root --password opensesame --database=gh_ost_test_db --target-host=localhost --target-port=3309 --target-user root --target-password opensesame --target-database=gh_ost_test_db --postpone-cut-over-flag-file=/tmp/ghost-move-tables.postpone.flag --execute --verbose --checkpoint --checkpoint-seconds 10 --initially-drop-socket-file +./script/build --cli; ./bin/gh-ost --move-tables=gh_ost_test,gh_ost_test_other --host=localhost --port=3308 --user root --password opensesame --database=test --target-host=localhost --target-port=3309 --target-user root --target-password opensesame --target-database=test --postpone-cut-over-flag-file=/tmp/ghost-move-tables.postpone.flag --execute --verbose --checkpoint --checkpoint-seconds 10 --initially-drop-socket-file ``` -Start continuous inserts against the source. +You'll see per-table row-copy progress in the status output, with both tables +advancing concurrently. + +Start continuous inserts against the source. This writes to both tables. ```bash script/move-tables/insert-source-primary-loop ``` Check the target - it should have the initial data from the source and should be receiving the new data. ```bash -script/move-tables/mysql-target-primary -D gh_ost_test_db -e "SELECT * FROM gh_ost_test;" +script/move-tables/mysql-target-primary -D test -e "SELECT * FROM gh_ost_test; SELECT * FROM gh_ost_test_other;" ``` Remove the cutover flag file. @@ -42,14 +47,30 @@ Remove the cutover flag file. rm /tmp/ghost-move-tables.postpone.flag ``` -You'll see the continuous inserts will stop because of the table rename. +You'll see the continuous inserts will stop because of the table rename. Both +tables are renamed together in a single atomic `RENAME TABLE`. -Check the source - table has been renamed. +Check the source - both tables have been renamed to their `_..._del` rollback handles. ```bash -script/move-tables/mysql-source-primary -D gh_ost_test_db -e "SELECT * FROM _gh_ost_test_del;" +script/move-tables/mysql-source-primary -D test -e "SELECT * FROM _gh_ost_test_del; SELECT * FROM _gh_ost_test_other_del;" ``` Check the target has the same set of data. ```bash -script/move-tables/mysql-target-primary -D gh_ost_test_db -e "SELECT * FROM gh_ost_test;" +script/move-tables/mysql-target-primary -D test -e "SELECT * FROM gh_ost_test; SELECT * FROM gh_ost_test_other;" +``` + +### Resetting between runs + +Drop and re-seed both source tables (and clean up the target tables + checkpoint +table) so you can run again without a full teardown: +```bash +script/move-tables/reset +``` + +### Teardown + +Remove the docker containers: +```bash +script/move-tables/teardown ``` \ No newline at end of file diff --git a/script/move-tables/insert-source-primary-loop b/script/move-tables/insert-source-primary-loop index 490442ec4..3b949fc9a 100755 --- a/script/move-tables/insert-source-primary-loop +++ b/script/move-tables/insert-source-primary-loop @@ -1,9 +1,11 @@ #!/usr/bin/env bash set -euo pipefail -# Continuously insert new rows into gh_ost_test on source primary. +# Continuously insert new rows into BOTH move-tables fixtures (gh_ost_test and +# gh_ost_test_other) on the source primary, so a multi-table move-tables run sees +# live DML on every migrated table. # Usage: -# script/move-tables/insert-source-primary-loop [start_column1] [sleep_seconds] [rows_per_batch] +# script/move-tables/insert-source-primary-loop [start_id] [sleep_seconds] [rows_per_batch] # Example: # script/move-tables/insert-source-primary-loop 100000 0.2 1 # Fast example: @@ -13,34 +15,44 @@ start_i="${1:-100000}" delay="${2:-0.2}" rows_per_batch="${3:-1}" i="$start_i" -DATABASE="${DATABASE:-gh_ost_test_db} +# Match the database created/seeded by script/move-tables/setup. +DATABASE="${DATABASE:-test}" echo "Starting continuous inserts on source primary. Press Ctrl+C to stop." -echo "start_column1=$start_i sleep_seconds=$delay rows_per_batch=$rows_per_batch" +echo "start_id=$start_i sleep_seconds=$delay rows_per_batch=$rows_per_batch database=$DATABASE" trap 'echo; echo "Stopped."; exit 0' INT TERM while true; do ts="$(date +%s)" - values="" + test_values="" + other_values="" batch_start="$i" for ((n=0; n_ghk. +PRIMARY_TABLE="${TABLES[0]}" + +# Reset source table state regardless of whether a cutover renamed the originals +# to their `_
_del` rollback handles. +source_drop="" +for t in "${TABLES[@]}"; do + source_drop+="_${t}_del, ${t}, " +done +source_drop="${source_drop%, }" +${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" -e "DROP TABLE IF EXISTS ${source_drop};" # Recreate and seed source table data, same fixture as setup uses. -${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" < "${GH_OST_ROOT}/localtests/move-tables/create.sql" +${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" < "${GH_OST_ROOT}/localtests/move-tables/multi/create.sql" -${SCRIPT_PATH}/mysql-target-primary -D "${DATABASE_NAME}" -e "DROP TABLE IF EXISTS gh_ost_test, _gh_ost_test_ghk;" +# Drop the moved tables and the checkpoint table on the target cluster. +target_drop="" +for t in "${TABLES[@]}"; do + target_drop+="${t}, " +done +target_drop+="_${PRIMARY_TABLE}_ghk" +${SCRIPT_PATH}/mysql-target-primary -D "${DATABASE_NAME}" -e "DROP TABLE IF EXISTS ${target_drop};" -echo "Reset source and target tables in ${DATABASE_NAME}" \ No newline at end of file +echo "Reset source and target tables (${TABLES[*]}) in ${DATABASE_NAME}" \ No newline at end of file diff --git a/script/move-tables/setup b/script/move-tables/setup index 9bb7902cd..5dc52845c 100755 --- a/script/move-tables/setup +++ b/script/move-tables/setup @@ -155,7 +155,7 @@ setup() { echo "OK" echo -n "Seeding data in source cluster..." - exec-mysql-source-primary -D $DATABASE_NAME < "$GH_OST_ROOT/localtests/move-tables/single/create.sql" + exec-mysql-source-primary -D $DATABASE_NAME < "$GH_OST_ROOT/localtests/move-tables/multi/create.sql" echo "OK" } From f9ad349fefcb48ea46f7c0a12176b6276f0d3f1a Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Mon, 22 Jun 2026 19:30:38 +0000 Subject: [PATCH 03/25] tidy printMigrationStatusHint --- go/logic/migrator.go | 60 +++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 9bba159a1..fd5691b3f 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -2035,20 +2035,52 @@ func (mgtr *Migrator) initiateStatus() { // migration, and as response to the "status" interactive command. func (mgtr *Migrator) printMigrationStatusHint(writers ...io.Writer) { w := io.MultiWriter(writers...) - fmt.Fprintf(w, "# Migrating %s.%s; Target table is %s.%s\n", - sql.EscapeName(mgtr.migrationContext.DatabaseName), - sql.EscapeName(mgtr.migrationContext.OriginalTableName), - sql.EscapeName(mgtr.migrationContext.GetTargetDatabaseName()), - sql.EscapeName(mgtr.migrationContext.GetTargetTableName()), - ) - fmt.Fprintf(w, "# Migrating %+v; inspecting %+v; executing on %+v\n", - *mgtr.applier.connectionConfig.ImpliedKey, - *mgtr.inspector.connectionConfig.ImpliedKey, - mgtr.migrationContext.Hostname, - ) - fmt.Fprintf(w, "# Migration started at %+v\n", - mgtr.migrationContext.StartTime.Format(time.RubyDate), - ) + if mgtr.migrationContext.IsMoveTablesMode() { + // In move-tables mode there may be several migrated tables; list each + // source -> target mapping rather than a single primary table (§2.3). + // Table names match on source and target; only the database may differ. + sourceDatabaseName := mgtr.migrationContext.DatabaseName + targetDatabaseName := mgtr.migrationContext.GetTargetDatabaseName() + fmt.Fprintf(w, "# Moving %d table(s) from %s to %s:\n", + len(mgtr.migrationContext.MoveTables.TableNames), + sql.EscapeName(sourceDatabaseName), + sql.EscapeName(targetDatabaseName), + ) + for _, tableName := range mgtr.migrationContext.MoveTables.TableNames { + fmt.Fprintf(w, "# - %s.%s -> %s.%s\n", + sql.EscapeName(sourceDatabaseName), sql.EscapeName(tableName), + sql.EscapeName(targetDatabaseName), sql.EscapeName(tableName), + ) + } + + // In move-tables mode the applier writes the target cluster and the + // inspector reads the source cluster, so label them as such rather than + // reusing the single-server "migrating/inspecting" phrasing. + fmt.Fprintf(w, "# Applying on target %+v; reading source %+v; executing on %+v\n", + *mgtr.applier.connectionConfig.ImpliedKey, + *mgtr.inspector.connectionConfig.ImpliedKey, + mgtr.migrationContext.Hostname, + ) + fmt.Fprintf(w, "# Move started at %+v\n", + mgtr.migrationContext.StartTime.Format(time.RubyDate), + ) + } else { + fmt.Fprintf(w, "# Migrating %s.%s; Target table is %s.%s\n", + sql.EscapeName(mgtr.migrationContext.DatabaseName), + sql.EscapeName(mgtr.migrationContext.OriginalTableName), + sql.EscapeName(mgtr.migrationContext.GetTargetDatabaseName()), + sql.EscapeName(mgtr.migrationContext.GetTargetTableName()), + ) + fmt.Fprintf(w, "# Migrating %+v; inspecting %+v; executing on %+v\n", + *mgtr.applier.connectionConfig.ImpliedKey, + *mgtr.inspector.connectionConfig.ImpliedKey, + mgtr.migrationContext.Hostname, + ) + fmt.Fprintf(w, "# Migration started at %+v\n", + mgtr.migrationContext.StartTime.Format(time.RubyDate), + ) + } + maxLoad := mgtr.migrationContext.GetMaxLoad() criticalLoad := mgtr.migrationContext.GetCriticalLoad() fmt.Fprintf(w, "# chunk-size: %+v; max-lag-millis: %+vms; dml-batch-size: %+v; max-load: %s; critical-load: %s; nice-ratio: %f\n", From aadb5eead198b7040bb915850d5dc601132cd5fe Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Mon, 22 Jun 2026 19:40:55 +0000 Subject: [PATCH 04/25] three tables --- localtests/move-tables/three/create.sql | 92 +++++++++++++++ localtests/move-tables/three/tables.txt | 3 + script/move-tables/README.md | 63 +++++++--- script/move-tables/insert-source-primary-loop | 109 +++++++++++++----- script/move-tables/reset | 10 +- script/move-tables/setup | 2 +- 6 files changed, 231 insertions(+), 48 deletions(-) create mode 100644 localtests/move-tables/three/create.sql create mode 100644 localtests/move-tables/three/tables.txt diff --git a/localtests/move-tables/three/create.sql b/localtests/move-tables/three/create.sql new file mode 100644 index 000000000..349b8a27c --- /dev/null +++ b/localtests/move-tables/three/create.sql @@ -0,0 +1,92 @@ +-- Three tables with distinct schemas, primary-key types, and row counts. This +-- exercises the multi-table move-tables path (§2.1-2.4) at its widest: per-table +-- runtime state, per-table query builders, interleaved row copy where the tables +-- finish at different times, and a single atomic multi-table RENAME at cutover. +-- +-- These three tables are the canonical superset used by the manual harness +-- (script/move-tables/setup, reset, insert-source-primary-loop). The `single` +-- and `multi` localtest fixtures move subsets of them. + +drop table if exists gh_ost_test; +create table gh_ost_test ( + id bigint(20) NOT NULL AUTO_INCREMENT, + column1 int(11) NOT NULL, + column2 smallint(5) unsigned NOT NULL, + column3 mediumint(8) unsigned NOT NULL, + column4 tinyint(3) unsigned NOT NULL, + column5 int(11) NOT NULL, + column6 int(11) NOT NULL, + PRIMARY KEY (id), + KEY c12_ix (column1, column2) +) auto_increment=1; + +insert into gh_ost_test values + (NULL, 1001, 100, 500000, 10, 1700000001, 1700000002), + (NULL, 1002, 200, 600000, 20, 1700000003, 1700000004), + (NULL, 1003, 300, 700000, 30, 1700000005, 1700000006), + (NULL, 1004, 400, 800000, 40, 1700000007, 1700000008), + (NULL, 1005, 500, 900000, 50, 1700000009, 1700000010), + (NULL, 1006, 600, 1000000, 60, 1700000011, 1700000012), + (NULL, 1007, 700, 1100000, 70, 1700000013, 1700000014), + (NULL, 1008, 800, 1200000, 80, 1700000015, 1700000016), + (NULL, 1009, 900, 1300000, 90, 1700000017, 1700000018), + (NULL, 1010, 1000, 1400000, 100, 1700000019, 1700000020), + (NULL, 1011, 1100, 1500000, 110, 1700000021, 1700000022), + (NULL, 1012, 1200, 1600000, 120, 1700000023, 1700000024), + (NULL, 1013, 1300, 1700000, 130, 1700000025, 1700000026), + (NULL, 1014, 1400, 1800000, 140, 1700000027, 1700000028), + (NULL, 1015, 1500, 1900000, 150, 1700000029, 1700000030), + (NULL, 1016, 1600, 2000000, 160, 1700000031, 1700000032), + (NULL, 1017, 1700, 2100000, 170, 1700000033, 1700000034), + (NULL, 1018, 1800, 2200000, 180, 1700000035, 1700000036), + (NULL, 1019, 1900, 2300000, 190, 1700000037, 1700000038), + (NULL, 1020, 2000, 2400000, 200, 1700000039, 1700000040), + (NULL, 1021, 2100, 2500000, 210, 1700000041, 1700000042), + (NULL, 1022, 2200, 2600000, 220, 1700000043, 1700000044), + (NULL, 1023, 2300, 2700000, 230, 1700000045, 1700000046), + (NULL, 1024, 2400, 2800000, 240, 1700000047, 1700000048), + (NULL, 1025, 2500, 2900000, 250, 1700000049, 1700000050); + +drop table if exists gh_ost_test_other; +create table gh_ost_test_other ( + uid int(11) NOT NULL, + name varchar(64) NOT NULL, + amount decimal(10,2) NOT NULL, + created_at datetime NOT NULL, + PRIMARY KEY (uid), + UNIQUE KEY name_uq (name) +); + +insert into gh_ost_test_other values + (1, 'alpha', 10.50, '2024-01-01 10:00:00'), + (2, 'bravo', 20.75, '2024-01-02 11:00:00'), + (3, 'charlie', 30.00, '2024-01-03 12:00:00'), + (4, 'delta', 40.25, '2024-01-04 13:00:00'), + (5, 'echo', 50.50, '2024-01-05 14:00:00'), + (6, 'foxtrot', 60.75, '2024-01-06 15:00:00'), + (7, 'golf', 70.00, '2024-01-07 16:00:00'), + (8, 'hotel', 80.25, '2024-01-08 17:00:00'), + (9, 'india', 90.50, '2024-01-09 18:00:00'), + (10, 'juliet', 100.75, '2024-01-10 19:00:00'), + (11, 'kilo', 110.00, '2024-01-11 20:00:00'), + (12, 'lima', 120.25, '2024-01-12 21:00:00'); + +drop table if exists gh_ost_test_third; +create table gh_ost_test_third ( + code varchar(32) NOT NULL, + label varchar(128) NOT NULL, + score double NOT NULL, + updated_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (code), + KEY score_ix (score) +); + +insert into gh_ost_test_third (code, label, score) values + ('code_1', 'label_1', 1.5), + ('code_2', 'label_2', 2.5), + ('code_3', 'label_3', 3.5), + ('code_4', 'label_4', 4.5), + ('code_5', 'label_5', 5.5), + ('code_6', 'label_6', 6.5), + ('code_7', 'label_7', 7.5), + ('code_8', 'label_8', 8.5); diff --git a/localtests/move-tables/three/tables.txt b/localtests/move-tables/three/tables.txt new file mode 100644 index 000000000..72f7ba8f6 --- /dev/null +++ b/localtests/move-tables/three/tables.txt @@ -0,0 +1,3 @@ +gh_ost_test +gh_ost_test_other +gh_ost_test_third diff --git a/script/move-tables/README.md b/script/move-tables/README.md index 3d3b15881..66e74d3a7 100644 --- a/script/move-tables/README.md +++ b/script/move-tables/README.md @@ -1,15 +1,18 @@ ### Setup -Setup the multi-cluster topology and seed the data. This seeds two tables on the -source — `gh_ost_test` and `gh_ost_test_other` (see -`localtests/move-tables/multi/create.sql`) — into the `test` database. +Setup the multi-cluster topology and seed the data. This always seeds the same +canonical **three** tables on the source — `gh_ost_test`, `gh_ost_test_other`, +and `gh_ost_test_third` (see `localtests/move-tables/three/create.sql`) — into +the `test` database. You then choose how many of them to move via `--move-tables`, +so `setup`/`reset`/`teardown` behave identically regardless of which scenario you +run. ```bash script/move-tables/setup ``` Verify data is present in the source cluster. ```bash -script/move-tables/mysql-source-primary -D test -e "SELECT * FROM gh_ost_test; SELECT * FROM gh_ost_test_other;" +script/move-tables/mysql-source-primary -D test -e "SELECT * FROM gh_ost_test; SELECT * FROM gh_ost_test_other; SELECT * FROM gh_ost_test_third;" ``` Verify the empty database is present in the target cluster. @@ -24,22 +27,31 @@ Checkout your branch of `github/gh-ost` and build the binaries: script/build --cli ``` -Run gh-ost to move both tables in a single atomic cutover: +Run gh-ost to move tables. Pick **one**, **two**, or **three** tables by changing +the `--move-tables` list — everything else stays the same: ```bash -./script/build --cli; ./bin/gh-ost --move-tables=gh_ost_test,gh_ost_test_other --host=localhost --port=3308 --user root --password opensesame --database=test --target-host=localhost --target-port=3309 --target-user root --target-password opensesame --target-database=test --postpone-cut-over-flag-file=/tmp/ghost-move-tables.postpone.flag --execute --verbose --checkpoint --checkpoint-seconds 10 --initially-drop-socket-file +# one table +./bin/gh-ost --move-tables=gh_ost_test --host=localhost --port=3308 --user root --password opensesame --database=test --target-host=localhost --target-port=3309 --target-user root --target-password opensesame --target-database=test --postpone-cut-over-flag-file=/tmp/ghost-move-tables.postpone.flag --execute --verbose --checkpoint --checkpoint-seconds 10 --initially-drop-socket-file + +# two tables +./bin/gh-ost --move-tables=gh_ost_test,gh_ost_test_other ... (same flags) + +# three tables +./bin/gh-ost --move-tables=gh_ost_test,gh_ost_test_other,gh_ost_test_third ... (same flags) ``` -You'll see per-table row-copy progress in the status output, with both tables -advancing concurrently. +You'll see per-table row-copy progress in the status output, with all moved +tables advancing concurrently. -Start continuous inserts against the source. This writes to both tables. +Start continuous inserts against the source. No arguments required: it detects +which of the three fixtures exist and writes to all of them. ```bash script/move-tables/insert-source-primary-loop ``` Check the target - it should have the initial data from the source and should be receiving the new data. ```bash -script/move-tables/mysql-target-primary -D test -e "SELECT * FROM gh_ost_test; SELECT * FROM gh_ost_test_other;" +script/move-tables/mysql-target-primary -D test -e "SELECT * FROM gh_ost_test;" ``` Remove the cutover flag file. @@ -47,23 +59,25 @@ Remove the cutover flag file. rm /tmp/ghost-move-tables.postpone.flag ``` -You'll see the continuous inserts will stop because of the table rename. Both -tables are renamed together in a single atomic `RENAME TABLE`. +The continuous inserts stop because the moved tables are renamed. When you move +multiple tables, they are all renamed together in a single atomic `RENAME TABLE`. -Check the source - both tables have been renamed to their `_..._del` rollback handles. +Check the source - each moved table has been renamed to its `_
_del` +rollback handle (only the tables you moved are renamed): ```bash -script/move-tables/mysql-source-primary -D test -e "SELECT * FROM _gh_ost_test_del; SELECT * FROM _gh_ost_test_other_del;" +script/move-tables/mysql-source-primary -D test -e "SELECT * FROM _gh_ost_test_del;" ``` Check the target has the same set of data. ```bash -script/move-tables/mysql-target-primary -D test -e "SELECT * FROM gh_ost_test; SELECT * FROM gh_ost_test_other;" +script/move-tables/mysql-target-primary -D test -e "SELECT * FROM gh_ost_test;" ``` ### Resetting between runs -Drop and re-seed both source tables (and clean up the target tables + checkpoint -table) so you can run again without a full teardown: +Drop and re-seed all three source tables (and clean up the moved target tables + +checkpoint table) so you can run again without a full teardown. It works the same +no matter how many tables you just moved: ```bash script/move-tables/reset ``` @@ -73,4 +87,19 @@ script/move-tables/reset Remove the docker containers: ```bash script/move-tables/teardown +``` + +### CI integration tests + +The same fixtures back the CI integration tests, run via +`localtests/move-tables-test.sh [filter]`. Each test directory under +`localtests/move-tables/` is self-contained (its own `create.sql` + `tables.txt`): + +- `single` — moves 1 table (`gh_ost_test`) +- `multi` — moves 2 tables (`gh_ost_test`, `gh_ost_test_other`) +- `three` — moves 3 tables (`gh_ost_test`, `gh_ost_test_other`, `gh_ost_test_third`) + +Run a single scenario by name, e.g.: +```bash +localtests/move-tables-test.sh three ``` \ No newline at end of file diff --git a/script/move-tables/insert-source-primary-loop b/script/move-tables/insert-source-primary-loop index 3b949fc9a..eebf43e73 100755 --- a/script/move-tables/insert-source-primary-loop +++ b/script/move-tables/insert-source-primary-loop @@ -1,13 +1,19 @@ #!/usr/bin/env bash -set -euo pipefail +set -uo pipefail -# Continuously insert new rows into BOTH move-tables fixtures (gh_ost_test and -# gh_ost_test_other) on the source primary, so a multi-table move-tables run sees -# live DML on every migrated table. +# Continuously insert new rows into whichever of the canonical move-tables +# fixtures currently exist on the source primary (gh_ost_test, gh_ost_test_other, +# gh_ost_test_third), so a move-tables run sees live DML on every migrated table. +# +# No arguments are required. Existing tables are detected at startup; the loop +# only writes to the ones that are present, so it works for 1-, 2-, or 3-table +# runs without changes. When a cutover renames the tables away, the next insert +# fails and the loop stops cleanly (this is expected). +# # Usage: # script/move-tables/insert-source-primary-loop [start_id] [sleep_seconds] [rows_per_batch] # Example: -# script/move-tables/insert-source-primary-loop 100000 0.2 1 +# script/move-tables/insert-source-primary-loop # Fast example: # script/move-tables/insert-source-primary-loop 100000 0 50 @@ -18,41 +24,92 @@ i="$start_i" # Match the database created/seeded by script/move-tables/setup. DATABASE="${DATABASE:-test}" +GH_OST_ROOT="$(git rev-parse --show-toplevel)" +SCRIPT_PATH="${GH_OST_ROOT}/script/move-tables" + +# The canonical superset of move-tables fixtures. +ALL_TABLES=(gh_ost_test gh_ost_test_other gh_ost_test_third) + +table_exists() { + local table="$1" + local count + count="$(${SCRIPT_PATH}/mysql-source-primary -N -s -D "$DATABASE" -e \ + "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='${DATABASE}' AND table_name='${table}'" 2>/dev/null || echo 0)" + [[ "$count" == "1" ]] +} + +# Detect which fixtures exist so we only write to tables that are present. +active_tables=() +for t in "${ALL_TABLES[@]}"; do + if table_exists "$t"; then + active_tables+=("$t") + fi +done + +if [[ ${#active_tables[@]} -eq 0 ]]; then + echo "No move-tables fixtures found in database '${DATABASE}'. Did you run script/move-tables/setup?" + exit 1 +fi + echo "Starting continuous inserts on source primary. Press Ctrl+C to stop." echo "start_id=$start_i sleep_seconds=$delay rows_per_batch=$rows_per_batch database=$DATABASE" +echo "inserting into: ${active_tables[*]}" trap 'echo; echo "Stopped."; exit 0' INT TERM while true; do ts="$(date +%s)" - test_values="" - other_values="" + declare -A values=() batch_start="$i" - for ((n=0; n_ghk. PRIMARY_TABLE="${TABLES[0]}" @@ -23,7 +25,7 @@ source_drop="${source_drop%, }" ${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" -e "DROP TABLE IF EXISTS ${source_drop};" # Recreate and seed source table data, same fixture as setup uses. -${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" < "${GH_OST_ROOT}/localtests/move-tables/multi/create.sql" +${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" < "${GH_OST_ROOT}/localtests/move-tables/three/create.sql" # Drop the moved tables and the checkpoint table on the target cluster. target_drop="" diff --git a/script/move-tables/setup b/script/move-tables/setup index 5dc52845c..125900eb8 100755 --- a/script/move-tables/setup +++ b/script/move-tables/setup @@ -155,7 +155,7 @@ setup() { echo "OK" echo -n "Seeding data in source cluster..." - exec-mysql-source-primary -D $DATABASE_NAME < "$GH_OST_ROOT/localtests/move-tables/multi/create.sql" + exec-mysql-source-primary -D $DATABASE_NAME < "$GH_OST_ROOT/localtests/move-tables/three/create.sql" echo "OK" } From d4ab1bf0e2e021777d23a047f022a73c911c0b51 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Mon, 22 Jun 2026 20:39:39 +0000 Subject: [PATCH 05/25] Fix TableNames[0] refs and change checkpoint to row per table. Add hash of ordered table names for consistent identifier --- go/base/context.go | 77 ++- go/cmd/gh-ost/main.go | 9 +- go/logic/applier.go | 465 ++++++++++++------ go/logic/checkpoint.go | 60 +++ go/logic/inspect.go | 113 +++-- go/logic/migrator.go | 219 +++++---- go/logic/migrator_move_tables_cleanup_test.go | 2 +- 7 files changed, 658 insertions(+), 287 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index e83d11704..1b1b284d0 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -7,10 +7,13 @@ package base import ( "context" + "crypto/sha256" + "encoding/hex" "fmt" "math" "os" "regexp" + "sort" "strings" "sync" "sync/atomic" @@ -175,6 +178,39 @@ func (mt *MoveTable) RecordLastIterationRange() { } } +// GetLastIterationRange returns clones of the last successfully-copied chunk +// range for checkpointing. Either value may be nil if no chunk has completed. +func (mt *MoveTable) GetLastIterationRange() (minValues, maxValues *sql.ColumnValues) { + mt.rangeMutex.Lock() + defer mt.rangeMutex.Unlock() + if mt.LastIterationRangeMinValues != nil { + minValues = mt.LastIterationRangeMinValues.Clone() + } + if mt.LastIterationRangeMaxValues != nil { + maxValues = mt.LastIterationRangeMaxValues.Clone() + } + return minValues, maxValues +} + +// GetRowsCopied returns the number of rows copied for this table. +func (mt *MoveTable) GetRowsCopied() int64 { + return atomic.LoadInt64(&mt.RowsCopied) +} + +// RestoreFromCheckpoint rehydrates this table's row-copy state from a resumed +// checkpoint: the next chunk starts at the last-copied range, and the iteration +// counter and rows-copied total are restored. +func (mt *MoveTable) RestoreFromCheckpoint(rangeMin, rangeMax *sql.ColumnValues, iteration, rowsCopied int64) { + mt.rangeMutex.Lock() + mt.MigrationIterationRangeMinValues = rangeMin + mt.MigrationIterationRangeMaxValues = rangeMax + mt.LastIterationRangeMinValues = rangeMin + mt.LastIterationRangeMaxValues = rangeMax + mt.rangeMutex.Unlock() + atomic.StoreInt64(&mt.Iteration, iteration) + atomic.StoreInt64(&mt.RowsCopied, rowsCopied) +} + // MigrationContext has the general, global state of migration. It is used by // all components throughout the migration process. type MigrationContext struct { @@ -529,12 +565,10 @@ func (mctx *MigrationContext) GetGhostTableName() string { } } -// GetTargetTableName generates the name of the target table, based on original table name and -// the migration context (i.e. move-tables mode). +// GetTargetTableName generates the name of the target table. In move-tables mode +// each table keeps its own name on the target, so there is no single target +// table name; per-table code uses MoveTable.TargetTableName instead. func (mctx *MigrationContext) GetTargetTableName() string { - if mctx.IsMoveTablesMode() { - return mctx.MoveTables.TableNames[0] - } return mctx.GetGhostTableName() } @@ -601,9 +635,13 @@ func (mctx *MigrationContext) GetChangelogTableName() string { func (mctx *MigrationContext) GetCheckpointTableName() string { if mctx.ForceTmpTableName != "" { return getSafeTableName(mctx.ForceTmpTableName, "ghk") - } else { - return getSafeTableName(mctx.OriginalTableName, "ghk") } + if mctx.IsMoveTablesMode() { + // One checkpoint table per run, named from the set-derived run token so it + // does not depend on any single migrated table and is stable across resume. + return getSafeTableName("gho_"+mctx.MoveTablesRunToken(), "ghk") + } + return getSafeTableName(mctx.OriginalTableName, "ghk") } // GetVoluntaryLockName returns a name of a voluntary lock to be used throughout @@ -1350,15 +1388,26 @@ func (mctx *MigrationContext) OrderedMoveTables() []*MoveTable { return tables } -// MoveTablePrimaryName returns the first table in --move-tables order. Several -// run-wide artifacts (checkpoint table name, changelog/old-table naming) are -// derived from a single "primary" table to keep one set of housekeeping objects -// per run; the primary is simply the first listed table. -func (mctx *MigrationContext) MoveTablePrimaryName() string { - if len(mctx.MoveTables.TableNames) == 0 { +// MoveTablesRunToken returns a short, stable identifier for a move-tables run, +// derived from the (sorted) set of migrated table names. It is: +// - deterministic: the same table set always yields the same token, so a +// resumed run finds the same run-wide artifacts (e.g. the checkpoint table). +// - order-independent: --move-tables=a,b and --move-tables=b,a match. +// - fixed-length: independent of how many tables are moved (so it never blows +// past identifier length limits the way a concatenation of names would). +// +// It is used to name run-wide singular artifacts (checkpoint table, applier +// advisory lock, serve socket) so they never depend on any single migrated +// table name. Returns "" outside move-tables mode. +func (mctx *MigrationContext) MoveTablesRunToken() string { + if !mctx.IsMoveTablesMode() { return "" } - return mctx.MoveTables.TableNames[0] + names := append([]string(nil), mctx.MoveTables.TableNames...) + sort.Strings(names) + // NUL separator: table names cannot contain it, so the join is unambiguous. + sum := sha256.Sum256([]byte(strings.Join(names, "\x00"))) + return hex.EncodeToString(sum[:6]) // 12 hex chars / 48 bits } // AllMoveTablesRowCopyComplete reports whether every migrated table has finished diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index 2e054f520..6af2b7f69 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -435,7 +435,14 @@ func main() { migrationContext.Log.Fatale(err) } if migrationContext.ServeSocketFile == "" { - migrationContext.ServeSocketFile = fmt.Sprintf("/tmp/gh-ost.%s.%s.sock", migrationContext.DatabaseName, migrationContext.OriginalTableName) + if migrationContext.IsMoveTablesMode() { + // OriginalTableName is not set until MoveTables() runs and there is no + // single "primary" table, so name the socket from the set-derived run + // token (avoids an empty path component like /tmp/gh-ost.test..sock). + migrationContext.ServeSocketFile = fmt.Sprintf("/tmp/gh-ost.%s.movetables-%s.sock", migrationContext.DatabaseName, migrationContext.MoveTablesRunToken()) + } else { + migrationContext.ServeSocketFile = fmt.Sprintf("/tmp/gh-ost.%s.%s.sock", migrationContext.DatabaseName, migrationContext.OriginalTableName) + } } if *askPass { fmt.Println("Password:") diff --git a/go/logic/applier.go b/go/logic/applier.go index ec1413e39..2f48bb5a2 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -92,11 +92,8 @@ type Applier struct { migrationLockStop chan struct{} migrationLockDone chan struct{} - moveTablesTargetDB *gosql.DB - moveTablesConnectionConfig *mysql.ConnectionConfig - moveTablesCopySelectFirstQueryBuilder *sql.MoveTableCopySelectQueryBuilder - moveTablesCopySelectNextQueryBuilder *sql.MoveTableCopySelectQueryBuilder - moveTablesCopyInsertQueryBuilder *sql.MoveTableCopyInsertQueryBuilder + moveTablesTargetDB *gosql.DB + moveTablesConnectionConfig *mysql.ConnectionConfig // moveTablesBuilders holds the per-table query builders, keyed by source // table name. In move-tables mode there is one entry per migrated table; DML @@ -164,9 +161,38 @@ func (apl *Applier) checkpointRangeColumnNames() (minColumnNames []string, maxCo // hence the optional table name prefix. Metacharacters in table/index names are escaped to avoid // regex syntax errors. func (apl *Applier) compileMigrationKeyWarningRegex() (*regexp.Regexp, error) { + if apl.migrationContext.IsMoveTablesMode() { + return apl.compileMoveTablesKeyWarningRegex() + } return compileKeyWarningRegex(apl.migrationContext.GetTargetTableName(), apl.migrationContext.UniqueKey.NameInGhostTable) } +// compileMoveTablesKeyWarningRegex builds one duplicate-key warning regex +// covering every migrated table's unique key. A duplicate on any migrated +// table's key is an expected artifact of binlog replay after bulk copy, so a +// combined alternation is sufficient and avoids singling out a representative +// table (a DML batch may interleave statements for several tables). +func (apl *Applier) compileMoveTablesKeyWarningRegex() (*regexp.Regexp, error) { + var alternatives []string + for _, mt := range apl.migrationContext.OrderedMoveTables() { + if mt.UniqueKey == nil { + continue + } + escapedTable := regexp.QuoteMeta(mt.TargetTableName) + escapedKey := regexp.QuoteMeta(mt.UniqueKey.NameInGhostTable) + alternatives = append(alternatives, fmt.Sprintf(`(%s\.)?%s`, escapedTable, escapedKey)) + } + if len(alternatives) == 0 { + return regexp.Compile(`$.^`) // matches nothing + } + pattern := fmt.Sprintf(`for key '(%s)'`, strings.Join(alternatives, "|")) + migrationKeyRegex, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("failed to compile move-tables key pattern: %w", err) + } + return migrationKeyRegex, nil +} + // compileKeyWarningRegex compiles the duplicate-key warning regex for a specific // target table + unique key name. In move-tables mode each table has its own // unique key, so the duplicate-key filter must be compiled per table. @@ -251,7 +277,15 @@ func buildMigrationLockName(db, table string) string { // preventing two gh-ost processes from migrating the same table concurrently // on the same MySQL server. func (apl *Applier) AcquireMigrationLock(ctx context.Context) error { - lockName := buildMigrationLockName(apl.migrationContext.GetTargetDatabaseName(), apl.originalTableName()) + // One advisory lock per run. In move-tables mode it is keyed on the + // set-derived run token (not any single table) so two processes moving the + // same set of tables collide, while a single-table run keeps its table-keyed + // lock name. + lockTable := apl.originalTableName() + if apl.migrationContext.IsMoveTablesMode() { + lockTable = "movetables." + apl.migrationContext.MoveTablesRunToken() + } + lockName := buildMigrationLockName(apl.migrationContext.GetTargetDatabaseName(), lockTable) // Use a dedicated *sql.DB so the pinned connection does not consume a // slot in apl.db's small pool (mysql.MaxDBPoolConnections). @@ -381,124 +415,118 @@ func (apl *Applier) releaseMigrationLock() { func (apl *Applier) prepareQueries() (err error) { targetDatabaseName := apl.migrationContext.GetTargetDatabaseName() - targetTableName := apl.migrationContext.GetTargetTableName() - if apl.dmlDeleteQueryBuilder, err = sql.NewDMLDeleteQueryBuilder( - targetDatabaseName, - targetTableName, - apl.migrationContext.OriginalTableColumns, - &apl.migrationContext.UniqueKey.Columns, - ); err != nil { - return err - } - if apl.dmlInsertQueryBuilder, err = sql.NewDMLInsertQueryBuilder( - targetDatabaseName, - targetTableName, - apl.migrationContext.OriginalTableColumns, - apl.migrationContext.SharedColumns, - apl.migrationContext.MappedSharedColumns, - ); err != nil { - return err - } - if apl.dmlUpdateQueryBuilder, err = sql.NewDMLUpdateQueryBuilder( - targetDatabaseName, - targetTableName, - apl.migrationContext.OriginalTableColumns, - apl.migrationContext.SharedColumns, - apl.migrationContext.MappedSharedColumns, - &apl.migrationContext.UniqueKey.Columns, - ); err != nil { - return err - } - if apl.migrationContext.Checkpoint { - if apl.checkpointInsertQueryBuilder, err = sql.NewCheckpointQueryBuilder( - apl.checkpointDatabaseName(), - apl.migrationContext.GetCheckpointTableName(), + if !apl.migrationContext.IsMoveTablesMode() { + targetTableName := apl.migrationContext.GetTargetTableName() + if apl.dmlDeleteQueryBuilder, err = sql.NewDMLDeleteQueryBuilder( + targetDatabaseName, + targetTableName, + apl.migrationContext.OriginalTableColumns, &apl.migrationContext.UniqueKey.Columns, - apl.migrationContext.IsMoveTablesMode(), ); err != nil { return err } - } - if apl.migrationContext.IsMoveTablesMode() { - // Build one set of query builders per migrated table. DML is routed to the - // right set at apply time by source table name (§2.1). The top-level DML - // builders above remain bound to the primary table for backward-compat with - // the single-table paths (checkpoint, status), but binlog DML application - // goes through moveTablesBuilders. - apl.moveTablesBuilders = make(map[string]*moveTableBuilders, len(apl.migrationContext.MoveTables.TableNames)) - for _, mt := range apl.migrationContext.OrderedMoveTables() { - if mt.UniqueKey == nil { - return fmt.Errorf("move-table %s.%s has no unique key; cannot prepare queries", mt.SourceDatabaseName, mt.SourceTableName) - } - b := &moveTableBuilders{ - uniqueKey: mt.UniqueKey, - originalTableColumns: mt.OriginalTableColumns, - } - if b.dmlDeleteQueryBuilder, err = sql.NewDMLDeleteQueryBuilder( - mt.TargetDatabaseName, - mt.TargetTableName, - mt.OriginalTableColumns, - &mt.UniqueKey.Columns, - ); err != nil { - return err - } - if b.dmlInsertQueryBuilder, err = sql.NewDMLInsertQueryBuilder( - mt.TargetDatabaseName, - mt.TargetTableName, - mt.OriginalTableColumns, - mt.SharedColumns, - mt.MappedSharedColumns, - ); err != nil { - return err - } - if b.dmlUpdateQueryBuilder, err = sql.NewDMLUpdateQueryBuilder( - mt.TargetDatabaseName, - mt.TargetTableName, - mt.OriginalTableColumns, - mt.SharedColumns, - mt.MappedSharedColumns, - &mt.UniqueKey.Columns, - ); err != nil { - return err - } - if b.copySelectFirstQueryBuilder, err = sql.NewMoveTableCopySelectQueryBuilder( - mt.SourceDatabaseName, - mt.SourceTableName, - mt.OriginalTableColumns, - mt.UniqueKey.Name, - &mt.UniqueKey.Columns, - true, // <-- include start range values for first select query - ); err != nil { - return err - } - if b.copySelectNextQueryBuilder, err = sql.NewMoveTableCopySelectQueryBuilder( - mt.SourceDatabaseName, - mt.SourceTableName, - mt.OriginalTableColumns, - mt.UniqueKey.Name, - &mt.UniqueKey.Columns, + if apl.dmlInsertQueryBuilder, err = sql.NewDMLInsertQueryBuilder( + targetDatabaseName, + targetTableName, + apl.migrationContext.OriginalTableColumns, + apl.migrationContext.SharedColumns, + apl.migrationContext.MappedSharedColumns, + ); err != nil { + return err + } + if apl.dmlUpdateQueryBuilder, err = sql.NewDMLUpdateQueryBuilder( + targetDatabaseName, + targetTableName, + apl.migrationContext.OriginalTableColumns, + apl.migrationContext.SharedColumns, + apl.migrationContext.MappedSharedColumns, + &apl.migrationContext.UniqueKey.Columns, + ); err != nil { + return err + } + if apl.migrationContext.Checkpoint { + if apl.checkpointInsertQueryBuilder, err = sql.NewCheckpointQueryBuilder( + apl.checkpointDatabaseName(), + apl.migrationContext.GetCheckpointTableName(), + &apl.migrationContext.UniqueKey.Columns, false, ); err != nil { return err } - if b.copyInsertQueryBuilder, err = sql.NewMoveTableCopyInsertQueryBuilder( - mt.TargetDatabaseName, - mt.TargetTableName, - mt.OriginalTableColumns, - ); err != nil { - return err - } - apl.moveTablesBuilders[mt.SourceTableName] = b } - // Keep the legacy single-table builders pointing at the primary table for - // any single-table code path that still reads them directly. - primary := apl.moveTablesBuilders[apl.migrationContext.MoveTablePrimaryName()] - if primary != nil { - apl.moveTablesCopySelectFirstQueryBuilder = primary.copySelectFirstQueryBuilder - apl.moveTablesCopySelectNextQueryBuilder = primary.copySelectNextQueryBuilder - apl.moveTablesCopyInsertQueryBuilder = primary.copyInsertQueryBuilder + return nil + } + + // Move-tables mode: build one set of query builders per migrated table. DML is + // routed to the right set at apply time by source table name (§2.1). There is + // no representative/primary table: every table is handled identically through + // its own builders, and the checkpoint uses a table-agnostic schema written by + // WriteMoveTableCheckpoints (no checkpointInsertQueryBuilder). + apl.moveTablesBuilders = make(map[string]*moveTableBuilders, len(apl.migrationContext.MoveTables.TableNames)) + for _, mt := range apl.migrationContext.OrderedMoveTables() { + if mt.UniqueKey == nil { + return fmt.Errorf("move-table %s.%s has no unique key; cannot prepare queries", mt.SourceDatabaseName, mt.SourceTableName) + } + b := &moveTableBuilders{ + uniqueKey: mt.UniqueKey, + originalTableColumns: mt.OriginalTableColumns, + } + if b.dmlDeleteQueryBuilder, err = sql.NewDMLDeleteQueryBuilder( + mt.TargetDatabaseName, + mt.TargetTableName, + mt.OriginalTableColumns, + &mt.UniqueKey.Columns, + ); err != nil { + return err + } + if b.dmlInsertQueryBuilder, err = sql.NewDMLInsertQueryBuilder( + mt.TargetDatabaseName, + mt.TargetTableName, + mt.OriginalTableColumns, + mt.SharedColumns, + mt.MappedSharedColumns, + ); err != nil { + return err + } + if b.dmlUpdateQueryBuilder, err = sql.NewDMLUpdateQueryBuilder( + mt.TargetDatabaseName, + mt.TargetTableName, + mt.OriginalTableColumns, + mt.SharedColumns, + mt.MappedSharedColumns, + &mt.UniqueKey.Columns, + ); err != nil { + return err + } + if b.copySelectFirstQueryBuilder, err = sql.NewMoveTableCopySelectQueryBuilder( + mt.SourceDatabaseName, + mt.SourceTableName, + mt.OriginalTableColumns, + mt.UniqueKey.Name, + &mt.UniqueKey.Columns, + true, // <-- include start range values for first select query + ); err != nil { + return err } + if b.copySelectNextQueryBuilder, err = sql.NewMoveTableCopySelectQueryBuilder( + mt.SourceDatabaseName, + mt.SourceTableName, + mt.OriginalTableColumns, + mt.UniqueKey.Name, + &mt.UniqueKey.Columns, + false, + ); err != nil { + return err + } + if b.copyInsertQueryBuilder, err = sql.NewMoveTableCopyInsertQueryBuilder( + mt.TargetDatabaseName, + mt.TargetTableName, + mt.OriginalTableColumns, + ); err != nil { + return err + } + apl.moveTablesBuilders[mt.SourceTableName] = b } return nil } @@ -571,9 +599,6 @@ func (apl *Applier) tableExists(tableName string) (tableFound bool) { } func (apl *Applier) originalTableName() string { - if apl.migrationContext.IsMoveTablesMode() { - return apl.migrationContext.MoveTables.TableNames[0] - } return apl.migrationContext.OriginalTableName } @@ -887,6 +912,9 @@ func (apl *Applier) CreateCheckpointTable() error { if err := apl.DropCheckpointTable(); err != nil { return err } + if apl.migrationContext.IsMoveTablesMode() { + return apl.createMoveTablesCheckpointTable() + } colDefs := []string{ "`gh_ost_chk_id` bigint auto_increment primary key", "`gh_ost_chk_timestamp` bigint", @@ -896,12 +924,6 @@ func (apl *Applier) CreateCheckpointTable() error { "`gh_ost_dml_applied` bigint", "`gh_ost_is_cutover` tinyint(1) DEFAULT '0'", } - if apl.migrationContext.IsMoveTablesMode() { - colDefs = append(colDefs, - "`gh_ost_move_tables_cutover_started` tinyint(1) DEFAULT '0'", - "`gh_ost_move_tables_drain_gtid` text charset ascii", - ) - } for _, col := range apl.migrationContext.UniqueKey.Columns.Columns() { if col.MySQLType == "" { return fmt.Errorf("column %s has no type information. applyColumnTypes must be called", sql.EscapeName(col.Name)) @@ -929,6 +951,39 @@ func (apl *Applier) CreateCheckpointTable() error { return nil } +// createMoveTablesCheckpointTable creates the move-tables checkpoint table. It +// holds one row per migrated table, with the per-table iteration range stored +// in a table-agnostic, serialized text form (gh_ost_chk_range_min/max) so a +// single checkpoint table can serve tables with heterogeneous unique keys. The +// run-wide state (coords, totals, cutover markers, drain GTID) is replicated on +// every row, so the latest row carries the freshest run-wide state. +func (apl *Applier) createMoveTablesCheckpointTable() error { + colDefs := []string{ + "`gh_ost_chk_id` bigint auto_increment primary key", + "`gh_ost_chk_timestamp` bigint", + "`gh_ost_chk_table_name` varbinary(320)", + "`gh_ost_chk_coords` text charset ascii", + "`gh_ost_chk_iteration` bigint", + "`gh_ost_rows_copied` bigint", + "`gh_ost_dml_applied` bigint", + "`gh_ost_is_cutover` tinyint(1) DEFAULT '0'", + "`gh_ost_move_tables_cutover_started` tinyint(1) DEFAULT '0'", + "`gh_ost_move_tables_drain_gtid` text charset ascii", + "`gh_ost_chk_range_min` text charset ascii", + "`gh_ost_chk_range_max` text charset ascii", + } + query := fmt.Sprintf("create /* gh-ost */ table %s.%s (\n %s\n)", + sql.EscapeName(apl.checkpointDatabaseName()), + sql.EscapeName(apl.migrationContext.GetCheckpointTableName()), + strings.Join(colDefs, ",\n "), + ) + apl.migrationContext.Log.Infof("Created move-tables checkpoint table") + if _, err := sqlutils.ExecNoPrepare(apl.checkpointDB(), query); err != nil { + return err + } + return nil +} + // dropTable drops a given table on the applied host func (apl *Applier) dropTable(tableName string) error { query := fmt.Sprintf(`drop /* gh-ost */ table if exists %s.%s`, @@ -1094,7 +1149,8 @@ func (apl *Applier) WriteChangelogState(value string) (string, error) { return apl.WriteAndLogChangelog("state", value) } -// WriteCheckpoints writes a checkpoint to the _ghk table. +// WriteCheckpoint writes a standard-mode checkpoint row to the _ghk table. In +// move-tables mode use WriteMoveTableCheckpoints instead. func (apl *Applier) WriteCheckpoint(chk *Checkpoint) (int64, error) { var insertId int64 uniqueKeyArgs := sqlutils.Args(chk.IterationRangeMin.AbstractValues()...) @@ -1104,9 +1160,6 @@ func (apl *Applier) WriteCheckpoint(chk *Checkpoint) (int64, error) { return insertId, err } args := sqlutils.Args(chk.LastTrxCoords.String(), chk.Iteration, chk.RowsCopied, chk.DMLApplied, chk.IsCutover) - if apl.migrationContext.IsMoveTablesMode() { - args = append(args, chk.MoveTablesCutOverStarted, apl.checkpointDrainGTIDString(chk)) - } args = append(args, uniqueKeyArgs...) res, err := apl.checkpointDB().Exec(query, args...) if err != nil { @@ -1115,6 +1168,135 @@ func (apl *Applier) WriteCheckpoint(chk *Checkpoint) (int64, error) { return res.LastInsertId() } +// moveTablesCheckpointColumns lists the columns of the move-tables checkpoint +// table, in insert order. Run-wide columns are replicated on every per-table row. +var moveTablesCheckpointColumns = []string{ + "gh_ost_chk_timestamp", + "gh_ost_chk_table_name", + "gh_ost_chk_coords", + "gh_ost_chk_iteration", + "gh_ost_rows_copied", + "gh_ost_dml_applied", + "gh_ost_is_cutover", + "gh_ost_move_tables_cutover_started", + "gh_ost_move_tables_drain_gtid", + "gh_ost_chk_range_min", + "gh_ost_chk_range_max", +} + +// WriteMoveTableCheckpoints writes one checkpoint row per migrated table. All +// rows of a single call share the run-wide state (coords, totals, cutover +// markers, drain GTID); each row carries its own table name, iteration, +// rows-copied, and serialized iteration range. The latest row therefore always +// reflects the freshest run-wide state. +func (apl *Applier) WriteMoveTableCheckpoints(rows []*Checkpoint) error { + if len(rows) == 0 { + return nil + } + escaped := make([]string, len(moveTablesCheckpointColumns)) + for i, c := range moveTablesCheckpointColumns { + escaped[i] = sql.EscapeName(c) + } + placeholders := "(" + strings.TrimSuffix(strings.Repeat("?, ", len(moveTablesCheckpointColumns)), ", ") + ")" + query := fmt.Sprintf("insert /* gh-ost */ into %s.%s (%s) values %s", + sql.EscapeName(apl.checkpointDatabaseName()), + sql.EscapeName(apl.migrationContext.GetCheckpointTableName()), + strings.Join(escaped, ", "), + placeholders, + ) + now := time.Now().Unix() + for _, chk := range rows { + coordStr := "" + if chk.LastTrxCoords != nil { + coordStr = chk.LastTrxCoords.String() + } + args := sqlutils.Args( + now, + chk.TableName, + coordStr, + chk.Iteration, + chk.RowsCopied, + chk.DMLApplied, + chk.IsCutover, + chk.MoveTablesCutOverStarted, + apl.checkpointDrainGTIDString(chk), + serializeRangeValues(chk.IterationRangeMin), + serializeRangeValues(chk.IterationRangeMax), + ) + if _, err := apl.checkpointDB().Exec(query, args...); err != nil { + return err + } + } + return nil +} + +// ReadMoveTableCheckpoints returns the latest checkpoint row per migrated table, +// keyed by table name. The per-table iteration range is deserialized using each +// table's unique-key arity (taken from its container), so the move-table +// containers must be populated before calling this. +func (apl *Applier) ReadMoveTableCheckpoints() (map[string]*Checkpoint, error) { + dbName := sql.EscapeName(apl.checkpointDatabaseName()) + tableName := sql.EscapeName(apl.migrationContext.GetCheckpointTableName()) + query := fmt.Sprintf(`select /* gh-ost */ c.gh_ost_chk_id, c.gh_ost_chk_timestamp, c.gh_ost_chk_table_name, c.gh_ost_chk_coords, c.gh_ost_chk_iteration, c.gh_ost_rows_copied, c.gh_ost_dml_applied, c.gh_ost_is_cutover, c.gh_ost_move_tables_cutover_started, c.gh_ost_move_tables_drain_gtid, c.gh_ost_chk_range_min, c.gh_ost_chk_range_max from %s.%s c inner join (select gh_ost_chk_table_name, max(gh_ost_chk_id) as max_id from %s.%s group by gh_ost_chk_table_name) latest on c.gh_ost_chk_table_name = latest.gh_ost_chk_table_name and c.gh_ost_chk_id = latest.max_id`, + dbName, tableName, dbName, tableName) + rows, err := apl.checkpointDB().Query(query) + if err != nil { + return nil, err + } + defer rows.Close() + result := make(map[string]*Checkpoint) + for rows.Next() { + chk := &Checkpoint{} + var tableNameBytes []byte + var coordStr, drainGTIDStr, rangeMinStr, rangeMaxStr string + var timestamp int64 + if err := rows.Scan(&chk.Id, ×tamp, &tableNameBytes, &coordStr, &chk.Iteration, &chk.RowsCopied, &chk.DMLApplied, &chk.IsCutover, &chk.MoveTablesCutOverStarted, &drainGTIDStr, &rangeMinStr, &rangeMaxStr); err != nil { + return nil, err + } + chk.TableName = string(tableNameBytes) + chk.Timestamp = time.Unix(timestamp, 0) + if coordStr != "" { + coords, err := apl.parseCheckpointCoordinates(coordStr) + if err != nil { + return nil, err + } + chk.LastTrxCoords = coords + } + if drainGTIDStr != "" { + drainGTID, err := mysql.NewGTIDBinlogCoordinates(drainGTIDStr) + if err != nil { + return nil, err + } + chk.MoveTablesCutOverDrainGTID = drainGTID + } + arity := 0 + if mt := apl.migrationContext.GetMoveTable(chk.TableName); mt != nil && mt.UniqueKey != nil { + arity = mt.UniqueKey.Columns.Len() + } + chk.IterationRangeMin = deserializeRangeValues(rangeMinStr, arity) + chk.IterationRangeMax = deserializeRangeValues(rangeMaxStr, arity) + result[chk.TableName] = chk + } + if err := rows.Err(); err != nil { + return nil, err + } + if len(result) == 0 { + return nil, ErrNoCheckpointFound + } + return result, nil +} + +// parseCheckpointCoordinates parses a stored coordinate string into the binlog +// coordinate family configured for this migration. +func (apl *Applier) parseCheckpointCoordinates(coordStr string) (mysql.BinlogCoordinates, error) { + if apl.migrationContext.UseGTIDs { + return mysql.NewGTIDBinlogCoordinates(coordStr) + } + return mysql.ParseFileBinlogCoordinates(coordStr) +} + +// ReadLastCheckpoint reads the most recent standard-mode checkpoint row. In +// move-tables mode use ReadMoveTableCheckpoints instead. func (apl *Applier) ReadLastCheckpoint() (*Checkpoint, error) { minColumnNames, maxColumnNames := apl.checkpointRangeColumnNames() selectColumns := []string{ @@ -1126,9 +1308,6 @@ func (apl *Applier) ReadLastCheckpoint() (*Checkpoint, error) { "gh_ost_dml_applied", "gh_ost_is_cutover", } - if apl.migrationContext.IsMoveTablesMode() { - selectColumns = append(selectColumns, "gh_ost_move_tables_cutover_started", "gh_ost_move_tables_drain_gtid") - } selectColumns = append(selectColumns, minColumnNames...) selectColumns = append(selectColumns, maxColumnNames...) @@ -1143,12 +1322,9 @@ func (apl *Applier) ReadLastCheckpoint() (*Checkpoint, error) { IterationRangeMax: sql.NewColumnValues(apl.migrationContext.UniqueKey.Columns.Len()), } - var coordStr, drainGTIDStr string + var coordStr string var timestamp int64 ptrs := []interface{}{&chk.Id, ×tamp, &coordStr, &chk.Iteration, &chk.RowsCopied, &chk.DMLApplied, &chk.IsCutover} - if apl.migrationContext.IsMoveTablesMode() { - ptrs = append(ptrs, &chk.MoveTablesCutOverStarted, &drainGTIDStr) - } ptrs = append(ptrs, chk.IterationRangeMin.ValuesPointers...) ptrs = append(ptrs, chk.IterationRangeMax.ValuesPointers...) err := row.Scan(ptrs...) @@ -1159,26 +1335,11 @@ func (apl *Applier) ReadLastCheckpoint() (*Checkpoint, error) { return nil, err } chk.Timestamp = time.Unix(timestamp, 0) - if apl.migrationContext.UseGTIDs { - gtidCoords, err := mysql.NewGTIDBinlogCoordinates(coordStr) - if err != nil { - return nil, err - } - chk.LastTrxCoords = gtidCoords - } else { - fileCoords, err := mysql.ParseFileBinlogCoordinates(coordStr) - if err != nil { - return nil, err - } - chk.LastTrxCoords = fileCoords - } - if apl.migrationContext.IsMoveTablesMode() && drainGTIDStr != "" { - drainGTID, err := mysql.NewGTIDBinlogCoordinates(drainGTIDStr) - if err != nil { - return nil, err - } - chk.MoveTablesCutOverDrainGTID = drainGTID + coords, err := apl.parseCheckpointCoordinates(coordStr) + if err != nil { + return nil, err } + chk.LastTrxCoords = coords return chk, nil } diff --git a/go/logic/checkpoint.go b/go/logic/checkpoint.go index f81a2bb16..e8af08686 100644 --- a/go/logic/checkpoint.go +++ b/go/logic/checkpoint.go @@ -6,6 +6,9 @@ package logic import ( + "encoding/hex" + "fmt" + "strings" "time" "github.com/github/gh-ost/go/mysql" @@ -16,6 +19,10 @@ import ( type Checkpoint struct { Id int64 Timestamp time.Time + // TableName is the migrated table this checkpoint row belongs to. Empty in + // standard (single-table) mode; set per table in move-tables mode, where the + // checkpoint table holds one row per migrated table. + TableName string // LastTrxCoords are coordinates of a transaction // that has been applied on ghost table. LastTrxCoords mysql.BinlogCoordinates @@ -32,3 +39,56 @@ type Checkpoint struct { MoveTablesCutOverStarted bool MoveTablesCutOverDrainGTID mysql.BinlogCoordinates } + +// moveTableCheckpointNullToken marks a NULL value in a serialized range. Hex +// encoding never produces "~", so it is unambiguous. +const moveTableCheckpointNullToken = "~" + +// serializeRangeValues encodes a unique-key range (one or more column values) +// into a portable, table-agnostic text form: each value hex-encoded, comma- +// joined. This lets the single move-tables checkpoint table store ranges for +// tables with heterogeneous unique keys without per-key typed columns. +func serializeRangeValues(cv *sql.ColumnValues) string { + if cv == nil { + return "" + } + vals := cv.AbstractValues() + parts := make([]string, len(vals)) + for i, v := range vals { + if v == nil { + parts[i] = moveTableCheckpointNullToken + continue + } + var b []byte + switch t := v.(type) { + case []byte: + b = t + case string: + b = []byte(t) + default: + b = []byte(fmt.Sprintf("%v", t)) + } + parts[i] = hex.EncodeToString(b) + } + return strings.Join(parts, ",") +} + +// deserializeRangeValues reverses serializeRangeValues for a key of arity n. The +// values come back as []byte (or nil), which are accepted as prepared-statement +// args and coerced by MySQL to the target column type for comparison. +func deserializeRangeValues(s string, n int) *sql.ColumnValues { + abstract := make([]interface{}, n) + if s != "" { + parts := strings.Split(s, ",") + for i := 0; i < n && i < len(parts); i++ { + p := parts[i] + if p == "" || p == moveTableCheckpointNullToken { + continue // leave nil + } + if b, err := hex.DecodeString(p); err == nil { + abstract[i] = b + } + } + } + return sql.ToColumnValues(abstract) +} diff --git a/go/logic/inspect.go b/go/logic/inspect.go index 52d380348..a534a59a4 100644 --- a/go/logic/inspect.go +++ b/go/logic/inspect.go @@ -130,9 +130,6 @@ func (isp *Inspector) InspectOriginalTable() (err error) { } func (isp *Inspector) originalTableName() string { - if isp.migrationContext.IsMoveTablesMode() { - return isp.migrationContext.MoveTables.TableNames[0] - } return isp.migrationContext.OriginalTableName } @@ -482,28 +479,43 @@ func (isp *Inspector) validateLogSlaveUpdates() error { // validateTable makes sure the table we need to operate on actually exists func (isp *Inspector) validateTable() error { + if err := isp.validateTableExistsAndNotView(isp.originalTableName()); err != nil { + return err + } query := fmt.Sprintf(`show /* gh-ost */ table status from %s like '%s'`, sql.EscapeName(isp.migrationContext.DatabaseName), isp.originalTableName()) - - tableFound := false err := sqlutils.QueryRowsMap(isp.db, query, func(rowMap sqlutils.RowMap) error { isp.migrationContext.TableEngine = rowMap.GetString("Engine") isp.migrationContext.RowsEstimate = rowMap.GetInt64("Rows") isp.migrationContext.UsedRowsEstimateMethod = base.TableStatusRowsEstimate + return nil + }) + if err != nil { + return err + } + isp.migrationContext.Log.Infof("Table found. Engine=%s", isp.migrationContext.TableEngine) + isp.migrationContext.Log.Debugf("Estimated number of rows via STATUS: %d", isp.migrationContext.RowsEstimate) + return nil +} + +// validateTableExistsAndNotView verifies the named table exists and is a real +// table (not a view). Unlike validateTable it does not mutate shared migration +// state, so it is safe to call per table in move-tables mode. +func (isp *Inspector) validateTableExistsAndNotView(tableName string) error { + query := fmt.Sprintf(`show /* gh-ost */ table status from %s like '%s'`, sql.EscapeName(isp.migrationContext.DatabaseName), tableName) + tableFound := false + err := sqlutils.QueryRowsMap(isp.db, query, func(rowMap sqlutils.RowMap) error { if rowMap.GetString("Comment") == "VIEW" { - return fmt.Errorf("%s.%s is a VIEW, not a real table. Bailing out", sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(isp.originalTableName())) + return fmt.Errorf("%s.%s is a VIEW, not a real table. Bailing out", sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName)) } tableFound = true - return nil }) if err != nil { return err } if !tableFound { - return isp.migrationContext.Log.Errorf("cannot find table %s.%s!", sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(isp.originalTableName())) + return isp.migrationContext.Log.Errorf("cannot find table %s.%s!", sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName)) } - isp.migrationContext.Log.Infof("Table found. Engine=%s", isp.migrationContext.TableEngine) - isp.migrationContext.Log.Debugf("Estimated number of rows via STATUS: %d", isp.migrationContext.RowsEstimate) return nil } @@ -565,6 +577,12 @@ func (isp *Inspector) validateTableForeignKeysFor(tableName string, allowChildFo // validateTableTriggers makes sure no triggers exist on the migrated table. if --include_triggers is used then it fetches the triggers func (isp *Inspector) validateTableTriggers() error { + return isp.validateTableTriggersFor(isp.originalTableName()) +} + +// validateTableTriggersFor performs the trigger validation for a specific table, +// so it can be applied per table in move-tables mode. +func (isp *Inspector) validateTableTriggersFor(tableName string) error { query := ` SELECT /* gh-ost */ COUNT(*) AS num_triggers FROM @@ -579,15 +597,15 @@ func (isp *Inspector) validateTableTriggers() error { return nil }, isp.migrationContext.DatabaseName, - isp.originalTableName(), + tableName, ) if err != nil { return err } if numTriggers > 0 { if isp.migrationContext.IncludeTriggers { - isp.migrationContext.Log.Infof("Found %d triggers on %s.%s.", numTriggers, sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(isp.originalTableName())) - isp.migrationContext.Triggers, err = mysql.GetTriggers(isp.db, isp.migrationContext.DatabaseName, isp.originalTableName()) + isp.migrationContext.Log.Infof("Found %d triggers on %s.%s.", numTriggers, sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName)) + isp.migrationContext.Triggers, err = mysql.GetTriggers(isp.db, isp.migrationContext.DatabaseName, tableName) if err != nil { return err } @@ -599,7 +617,7 @@ func (isp *Inspector) validateTableTriggers() error { } return nil } - return isp.migrationContext.Log.Errorf("found triggers on %s.%s. Tables with triggers are supported only when using \"include-triggers\" flag. Bailing out", sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(isp.originalTableName())) + return isp.migrationContext.Log.Errorf("found triggers on %s.%s. Tables with triggers are supported only when using \"include-triggers\" flag. Bailing out", sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName)) } isp.migrationContext.Log.Debugf("Validated no triggers exist on table") return nil @@ -725,37 +743,72 @@ func (isp *Inspector) CountTableRows(ctx context.Context) error { isp.migrationContext.Log.Infof("As instructed, I'm issuing a SELECT COUNT(*) on the table. This may take a while") - conn, err := isp.db.Conn(ctx) + rowsEstimate, err := isp.countTableRowsFor(ctx, isp.originalTableName()) if err != nil { return err } + + // row count query finished. nil out the cancel func, so the main migration thread + // doesn't bother calling it after row copy is done. + isp.migrationContext.SetCountTableRowsCancelFunc(nil) + + atomic.StoreInt64(&isp.migrationContext.RowsEstimate, rowsEstimate) + isp.migrationContext.UsedRowsEstimateMethod = base.CountRowsEstimate + + isp.migrationContext.Log.Infof("Exact number of rows via COUNT: %d", rowsEstimate) + + return nil +} + +// CountMoveTablesRows counts exact rows across every migrated table, recording +// each table's count in its container and the sum as the run-wide estimate. It +// is the move-tables equivalent of CountTableRows, with no representative table. +func (isp *Inspector) CountMoveTablesRows(ctx context.Context) error { + atomic.StoreInt64(&isp.migrationContext.CountingRowsFlag, 1) + defer atomic.StoreInt64(&isp.migrationContext.CountingRowsFlag, 0) + + isp.migrationContext.Log.Infof("As instructed, counting exact rows across all migrated tables. This may take a while") + var total int64 + for _, mt := range isp.migrationContext.OrderedMoveTables() { + count, err := isp.countTableRowsFor(ctx, mt.SourceTableName) + if err != nil { + return err + } + atomic.StoreInt64(&mt.RowsEstimate, count) + total += count + } + + isp.migrationContext.SetCountTableRowsCancelFunc(nil) + atomic.StoreInt64(&isp.migrationContext.RowsEstimate, total) + isp.migrationContext.UsedRowsEstimateMethod = base.CountRowsEstimate + isp.migrationContext.Log.Infof("Exact number of rows via COUNT across %d table(s): %d", len(isp.migrationContext.MoveTables.TableNames), total) + return nil +} + +// countTableRowsFor issues a blocking SELECT COUNT(*) for a single table and +// returns the exact count. A cancelled context kills the running query. +func (isp *Inspector) countTableRowsFor(ctx context.Context, tableName string) (int64, error) { + conn, err := isp.db.Conn(ctx) + if err != nil { + return 0, err + } defer conn.Close() var connectionID string if err := conn.QueryRowContext(ctx, `SELECT /* gh-ost */ CONNECTION_ID()`).Scan(&connectionID); err != nil { - return err + return 0, err } - query := fmt.Sprintf(`select /* gh-ost */ count(*) as count_rows from %s.%s`, sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(isp.originalTableName())) + query := fmt.Sprintf(`select /* gh-ost */ count(*) as count_rows from %s.%s`, sql.EscapeName(isp.migrationContext.DatabaseName), sql.EscapeName(tableName)) var rowsEstimate int64 if err := conn.QueryRowContext(ctx, query).Scan(&rowsEstimate); err != nil { if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { isp.migrationContext.Log.Infof("exact row count cancelled (%s), likely because I'm about to cut over. I'm going to kill that query.", ctx.Err()) - return mysql.Kill(isp.db, connectionID) + return 0, mysql.Kill(isp.db, connectionID) } - return err + return 0, err } - - // row count query finished. nil out the cancel func, so the main migration thread - // doesn't bother calling it after row copy is done. - isp.migrationContext.SetCountTableRowsCancelFunc(nil) - - atomic.StoreInt64(&isp.migrationContext.RowsEstimate, rowsEstimate) - isp.migrationContext.UsedRowsEstimateMethod = base.CountRowsEstimate - - isp.migrationContext.Log.Infof("Exact number of rows via COUNT: %d", rowsEstimate) - - return nil + return rowsEstimate, nil } // applyColumnTypes diff --git a/go/logic/migrator.go b/go/logic/migrator.go index b01f48346..f0fdd26d9 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -421,7 +421,11 @@ func (mgtr *Migrator) countTableRows() (err error) { } countRowsFunc := func(ctx context.Context) error { - if err := mgtr.inspector.CountTableRows(ctx); err != nil { + if mgtr.migrationContext.IsMoveTablesMode() { + if err := mgtr.inspector.CountMoveTablesRows(ctx); err != nil { + return err + } + } else if err := mgtr.inspector.CountTableRows(ctx); err != nil { return err } if err := mgtr.hooksExecutor.OnRowCountComplete(); err != nil { @@ -804,26 +808,35 @@ func (mgtr *Migrator) Revert() error { } // prepareMoveTablesCopyState initializes per-table runtime state for row copy in -// move-tables mode (§2.1). Each migrated table is inspected independently into -// its own container (schema, unique key, row estimate, CREATE statement). The -// top-level migration-context fields stay bound to the primary table so the -// single-table code paths (checkpoint schema, status hint, naming) keep working; -// a single-entry --move-tables therefore behaves exactly as before. +// move-tables mode (§2.1). Each migrated table is inspected and validated +// independently into its own container (schema, unique key, row estimate, CREATE +// statement). There is no representative table: a single-entry --move-tables is +// simply an array of one, handled by the same per-table loop. func (mgtr *Migrator) prepareMoveTablesCopyState() error { mgtr.migrationContext.InitMoveTableContainers() var totalRowsEstimate int64 for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + // Validate each entry like a standard single-table run: it must exist, be a + // real table (not a view), have no unsupported foreign keys, and no triggers + // (unless --include-triggers). + if err := mgtr.inspector.validateTableExistsAndNotView(mt.SourceTableName); err != nil { + return fmt.Errorf("failed to validate move-table %s.%s: %w", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), err) + } columns, virtualColumns, uniqueKeys, uniqueKey, rowsEstimate, err := mgtr.inspector.InspectMoveTable(mt.SourceTableName) if err != nil { return fmt.Errorf("failed to inspect move-table %s.%s: %w", sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), err) } - // Validate each entry like a standard single-table run. if err := mgtr.inspector.validateTableForeignKeysFor(mt.SourceTableName, mgtr.migrationContext.DiscardForeignKeys); err != nil { return fmt.Errorf("failed to validate foreign keys on move-table %s.%s: %w", sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), err) } + if err := mgtr.inspector.validateTableTriggersFor(mt.SourceTableName); err != nil { + return fmt.Errorf("failed to validate triggers on move-table %s.%s: %w", + sql.EscapeName(mt.SourceDatabaseName), sql.EscapeName(mt.SourceTableName), err) + } createStatement, err := mgtr.inspector.showCreateTable(mt.SourceTableName) if err != nil { return fmt.Errorf("failed to fetch create table statement for %s.%s: %w", @@ -842,16 +855,6 @@ func (mgtr *Migrator) prepareMoveTablesCopyState() error { totalRowsEstimate += rowsEstimate } - // Keep top-level fields bound to the primary table for backward-compat with - // single-table code paths (checkpoint schema, status hint, naming). - if primary := mgtr.migrationContext.GetMoveTable(mgtr.migrationContext.MoveTablePrimaryName()); primary != nil { - mgtr.migrationContext.OriginalTableColumns = primary.OriginalTableColumns - mgtr.migrationContext.OriginalTableVirtualColumns = primary.OriginalTableVirtualColumns - mgtr.migrationContext.OriginalTableUniqueKeys = primary.OriginalTableUniqueKeys - mgtr.migrationContext.UniqueKey = primary.UniqueKey - mgtr.migrationContext.SharedColumns = primary.SharedColumns - mgtr.migrationContext.MappedSharedColumns = primary.MappedSharedColumns - } // Aggregate the row estimate across all tables for overall progress reporting. atomic.StoreInt64(&mgtr.migrationContext.RowsEstimate, totalRowsEstimate) return nil @@ -880,15 +883,6 @@ func (mgtr *Migrator) hydrateMoveTablesStateFromTarget() error { mt.SharedColumns = columns mt.MappedSharedColumns = columns } - - if primary := mgtr.migrationContext.GetMoveTable(mgtr.migrationContext.MoveTablePrimaryName()); primary != nil { - mgtr.migrationContext.OriginalTableColumns = primary.OriginalTableColumns - mgtr.migrationContext.OriginalTableVirtualColumns = primary.OriginalTableVirtualColumns - mgtr.migrationContext.OriginalTableUniqueKeys = primary.OriginalTableUniqueKeys - mgtr.migrationContext.UniqueKey = primary.UniqueKey - mgtr.migrationContext.SharedColumns = primary.SharedColumns - mgtr.migrationContext.MappedSharedColumns = primary.MappedSharedColumns - } return nil } @@ -910,28 +904,8 @@ func (mgtr *Migrator) persistMoveTablesCutOverCheckpoint(drainGTID mysql.BinlogC } safeCoords = safeCoords.Clone() - chk := &Checkpoint{ - LastTrxCoords: safeCoords, - IterationRangeMin: sql.NewColumnValues(mgtr.migrationContext.UniqueKey.Len()), - IterationRangeMax: sql.NewColumnValues(mgtr.migrationContext.UniqueKey.Len()), - Iteration: mgtr.migrationContext.GetIteration(), - RowsCopied: atomic.LoadInt64(&mgtr.migrationContext.TotalRowsCopied), - DMLApplied: atomic.LoadInt64(&mgtr.migrationContext.TotalDMLEventsApplied), - IsCutover: isCutover, - MoveTablesCutOverStarted: true, - MoveTablesCutOverDrainGTID: drainGTID, - } - mgtr.applier.LastIterationRangeMutex.Lock() - if mgtr.applier.LastIterationRangeMinValues != nil { - chk.IterationRangeMin = mgtr.applier.LastIterationRangeMinValues.Clone() - } - if mgtr.applier.LastIterationRangeMaxValues != nil { - chk.IterationRangeMax = mgtr.applier.LastIterationRangeMaxValues.Clone() - } - mgtr.applier.LastIterationRangeMutex.Unlock() - id, err := mgtr.applier.WriteCheckpoint(chk) - chk.Id = id - return err + rows := mgtr.buildMoveTableCheckpointRows(safeCoords, isCutover, true, drainGTID) + return mgtr.applier.WriteMoveTableCheckpoints(rows) } // moveTablesDrainCoordinateReached returns true when current is at-or-ahead of @@ -1071,16 +1045,13 @@ func (mgtr *Migrator) resumeMoveTablesCutOverFromCheckpoint(chk *Checkpoint) err } func (mgtr *Migrator) MoveTables() (err error) { - mgtr.migrationContext.Log.Infof("Moving tables %v from %s to %s (%s)", + mgtr.migrationContext.Log.Infof("Moving tables %v (run %s) from %s to %s (%s)", mgtr.migrationContext.MoveTables.TableNames, + mgtr.migrationContext.MoveTablesRunToken(), sql.EscapeName(mgtr.migrationContext.DatabaseName), sql.EscapeName(mgtr.migrationContext.GetTargetDatabaseName()), mgtr.migrationContext.MoveTables.TargetHost) mgtr.migrationContext.StartTime = time.Now() - if mgtr.migrationContext.OriginalTableName == "" { - mgtr.migrationContext.OriginalTableName = mgtr.migrationContext.MoveTables.TableNames[0] - } - // Ensure context is cancelled on exit (cleanup) defer mgtr.migrationContext.CancelContext() @@ -1200,19 +1171,39 @@ func (mgtr *Migrator) MoveTables() (err error) { return err } if mgtr.migrationContext.Checkpoint && mgtr.migrationContext.Resume { - lastCheckpoint, err := mgtr.applier.ReadLastCheckpoint() + checkpoints, err := mgtr.applier.ReadMoveTableCheckpoints() if err != nil { return mgtr.migrationContext.Log.Errorf("no checkpoint found, unable to resume: %+v", err) } - mgtr.migrationContext.Log.Infof("Resuming move-tables from checkpoint coords=%+v range_min=%+v range_max=%+v iteration=%d", - lastCheckpoint.LastTrxCoords, lastCheckpoint.IterationRangeMin.String(), lastCheckpoint.IterationRangeMax.String(), lastCheckpoint.Iteration) - - mgtr.migrationContext.MigrationIterationRangeMinValues = lastCheckpoint.IterationRangeMin - mgtr.migrationContext.MigrationIterationRangeMaxValues = lastCheckpoint.IterationRangeMax - mgtr.migrationContext.Iteration = lastCheckpoint.Iteration - atomic.StoreInt64(&mgtr.migrationContext.TotalRowsCopied, lastCheckpoint.RowsCopied) - atomic.StoreInt64(&mgtr.migrationContext.TotalDMLEventsApplied, lastCheckpoint.DMLApplied) - mgtr.migrationContext.InitialStreamerCoords = lastCheckpoint.LastTrxCoords + var resumeCoords mysql.BinlogCoordinates + var totalRowsCopied, totalDMLApplied int64 + for _, mt := range mgtr.migrationContext.OrderedMoveTables() { + chk, ok := checkpoints[mt.SourceTableName] + if !ok { + // No checkpoint row for this table yet; it resumes from scratch. + continue + } + mt.RestoreFromCheckpoint(chk.IterationRangeMin, chk.IterationRangeMax, chk.Iteration, chk.RowsCopied) + totalRowsCopied += chk.RowsCopied + if chk.DMLApplied > totalDMLApplied { + totalDMLApplied = chk.DMLApplied + } + // Resume the single applied stream from the earliest per-table frontier + // so no table misses events; re-applied row-copy/DML is idempotent. + if chk.LastTrxCoords != nil && !chk.LastTrxCoords.IsEmpty() { + if resumeCoords == nil || chk.LastTrxCoords.SmallerThan(resumeCoords) { + resumeCoords = chk.LastTrxCoords + } + } + mgtr.migrationContext.Log.Infof("Resuming move-table %s from checkpoint range_min=%+v range_max=%+v iteration=%d", + mt.SourceTableName, chk.IterationRangeMin.String(), chk.IterationRangeMax.String(), chk.Iteration) + } + atomic.StoreInt64(&mgtr.migrationContext.TotalRowsCopied, totalRowsCopied) + atomic.StoreInt64(&mgtr.migrationContext.TotalDMLEventsApplied, totalDMLApplied) + if resumeCoords != nil { + mgtr.migrationContext.InitialStreamerCoords = resumeCoords + } + mgtr.migrationContext.Log.Infof("Resuming move-tables from checkpoint coords=%+v", resumeCoords) } if err := mgtr.createFlagFiles(); err != nil { return err @@ -1941,11 +1932,16 @@ func (mgtr *Migrator) initiateInspector() (err error) { if err := mgtr.inspector.InitDBConnections(); err != nil { return err } - if err := mgtr.inspector.ValidateOriginalTable(); err != nil { - return fmt.Errorf("failed to validate original table: %w", err) - } - if err := mgtr.inspector.InspectOriginalTable(); err != nil { - return fmt.Errorf("failed to inspect original table: %w", err) + // Move-tables mode validates and inspects each table independently in + // prepareMoveTablesCopyState; there is no representative single table to run + // the standard single-table validation/inspection pass against. + if !mgtr.migrationContext.IsMoveTablesMode() { + if err := mgtr.inspector.ValidateOriginalTable(); err != nil { + return fmt.Errorf("failed to validate original table: %w", err) + } + if err := mgtr.inspector.InspectOriginalTable(); err != nil { + return fmt.Errorf("failed to inspect original table: %w", err) + } } // So far so good, table is accessible and valid. // Let's get master connection config @@ -2688,8 +2684,6 @@ func (mgtr *Migrator) iterateChunksMoveTables() error { } } - primaryName := mgtr.migrationContext.MoveTablePrimaryName() - // enqueueChunk builds and enqueues a single chunk-copy task bound to mt. enqueueChunk := func(mt *base.MoveTable) error { copyRowsFunc := func() error { @@ -2735,16 +2729,6 @@ func (mgtr *Migrator) iterateChunksMoveTables() error { } // Record this table's last successfully-copied range for checkpointing. mt.RecordLastIterationRange() - // Keep the applier-level last range in sync for the primary table so the - // existing single-table checkpoint path keeps working unchanged. - if mt.SourceTableName == primaryName { - mgtr.applier.LastIterationRangeMutex.Lock() - if mt.LastIterationRangeMinValues != nil && mt.LastIterationRangeMaxValues != nil { - mgtr.applier.LastIterationRangeMinValues = mt.LastIterationRangeMinValues.Clone() - mgtr.applier.LastIterationRangeMaxValues = mt.LastIterationRangeMaxValues.Clone() - } - mgtr.applier.LastIterationRangeMutex.Unlock() - } return nil } return base.SendWithContext(mgtr.migrationContext.GetContext(), mgtr.copyRowsQueue, copyRowsFunc) @@ -2845,6 +2829,9 @@ func (mgtr *Migrator) onApplyEventStruct(eventStruct *applyEventStruct) error { // applier reaches that trx. At that point it's safe to resume from these coordinates. func (mgtr *Migrator) Checkpoint(ctx context.Context) (*Checkpoint, error) { coords := mgtr.eventsStreamer.GetCurrentBinlogCoordinates() + if mgtr.migrationContext.IsMoveTablesMode() { + return mgtr.checkpointMoveTables(ctx, coords) + } mgtr.applier.LastIterationRangeMutex.Lock() if mgtr.applier.LastIterationRangeMaxValues == nil || mgtr.applier.LastIterationRangeMinValues == nil { mgtr.applier.LastIterationRangeMutex.Unlock() @@ -2871,16 +2858,70 @@ func (mgtr *Migrator) Checkpoint(ctx context.Context) (*Checkpoint, error) { mgtr.applier.CurrentCoordinatesMutex.Unlock() return chk, err } - // In move-tables mode we do not emit heartbeat rows into _ghc, so - // CurrentCoordinates may not advance while the system is otherwise idle. - // If there is no backlog in either queue, it is safe to treat the current - // streamer coordinates as applied for checkpointing purposes. - if mgtr.migrationContext.IsMoveTablesMode() && len(mgtr.applyEventsQueue) == 0 && (mgtr.eventsStreamer == nil || len(mgtr.eventsStreamer.eventsChannel) == 0) { - mgtr.applier.CurrentCoordinates = coords.Clone() - id, err := mgtr.applier.WriteCheckpoint(chk) - chk.Id = id + mgtr.applier.CurrentCoordinatesMutex.Unlock() + time.Sleep(500 * time.Millisecond) + } +} + +// buildMoveTableCheckpointRows builds one checkpoint row per migrated table. The +// run-wide fields (coords, total DML, cutover markers, drain GTID) are shared by +// every row; the per-table fields (iteration range, iteration, rows-copied) come +// from each table's own container. There is no representative table. +func (mgtr *Migrator) buildMoveTableCheckpointRows(coords mysql.BinlogCoordinates, isCutover, cutoverStarted bool, drainGTID mysql.BinlogCoordinates) []*Checkpoint { + totalDML := atomic.LoadInt64(&mgtr.migrationContext.TotalDMLEventsApplied) + tables := mgtr.migrationContext.OrderedMoveTables() + rows := make([]*Checkpoint, 0, len(tables)) + for _, mt := range tables { + rangeMin, rangeMax := mt.GetLastIterationRange() + rows = append(rows, &Checkpoint{ + TableName: mt.SourceTableName, + LastTrxCoords: coords, + IterationRangeMin: rangeMin, + IterationRangeMax: rangeMax, + Iteration: mt.GetIteration(), + RowsCopied: mt.GetRowsCopied(), + DMLApplied: totalDML, + IsCutover: isCutover, + MoveTablesCutOverStarted: cutoverStarted, + MoveTablesCutOverDrainGTID: drainGTID, + }) + } + return rows +} + +// moveTablesCheckpointSummary returns a representative-free Checkpoint used only +// for logging a single checkpoint event. Its (empty) range serializes to "". +func (mgtr *Migrator) moveTablesCheckpointSummary(coords mysql.BinlogCoordinates) *Checkpoint { + return &Checkpoint{ + LastTrxCoords: coords, + IterationRangeMin: sql.NewColumnValues(0), + IterationRangeMax: sql.NewColumnValues(0), + RowsCopied: atomic.LoadInt64(&mgtr.migrationContext.TotalRowsCopied), + DMLApplied: atomic.LoadInt64(&mgtr.migrationContext.TotalDMLEventsApplied), + } +} + +// checkpointMoveTables writes one checkpoint row per migrated table once the +// streamer frontier is known to be applied (or, on a quiet source with no +// backlog, treats the frontier as applied since move-tables emits no heartbeat). +func (mgtr *Migrator) checkpointMoveTables(ctx context.Context, coords mysql.BinlogCoordinates) (*Checkpoint, error) { + for { + if err := ctx.Err(); err != nil { + return nil, err + } + mgtr.applier.CurrentCoordinatesMutex.Lock() + applied := coords.SmallerThanOrEquals(mgtr.applier.CurrentCoordinates) + idle := len(mgtr.applyEventsQueue) == 0 && (mgtr.eventsStreamer == nil || len(mgtr.eventsStreamer.eventsChannel) == 0) + if applied || idle { + if !applied { + mgtr.applier.CurrentCoordinates = coords.Clone() + } mgtr.applier.CurrentCoordinatesMutex.Unlock() - return chk, err + rows := mgtr.buildMoveTableCheckpointRows(coords, false, false, nil) + if err := mgtr.applier.WriteMoveTableCheckpoints(rows); err != nil { + return nil, err + } + return mgtr.moveTablesCheckpointSummary(coords), nil } mgtr.applier.CurrentCoordinatesMutex.Unlock() time.Sleep(500 * time.Millisecond) diff --git a/go/logic/migrator_move_tables_cleanup_test.go b/go/logic/migrator_move_tables_cleanup_test.go index eccfe96b9..2e13e9e0e 100644 --- a/go/logic/migrator_move_tables_cleanup_test.go +++ b/go/logic/migrator_move_tables_cleanup_test.go @@ -55,7 +55,7 @@ func TestMoveTablesFinalCleanup_EmitsOperatorCommands(t *testing.T) { require.True(t, logger.has("-- drop table `source_db`.`_t_del`"), "must emit the command to drop the source rollback handle") - require.True(t, logger.has("-- drop table `target_db`.`_t_ghk`"), + require.True(t, logger.has(fmt.Sprintf("-- drop table `target_db`.`%s`", m.migrationContext.GetCheckpointTableName())), "must emit the command to drop the target checkpoint table") } From a85e5f62f52b311b17a21f79a92a8db3242e976c Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Mon, 22 Jun 2026 20:40:24 +0000 Subject: [PATCH 06/25] update GH_OST_TABLE_NAME --- go/logic/hooks.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/go/logic/hooks.go b/go/logic/hooks.go index 466f311b2..be3a1805c 100644 --- a/go/logic/hooks.go +++ b/go/logic/hooks.go @@ -221,7 +221,13 @@ func NewHooksExecutor(migrationContext *base.MigrationContext) *HooksExecutor { func (he *HooksExecutor) applyEnvironmentVariables(extraVariables ...string) []string { env := os.Environ() env = append(env, fmt.Sprintf("GH_OST_DATABASE_NAME=%s", he.migrationContext.DatabaseName)) - env = append(env, fmt.Sprintf("GH_OST_TABLE_NAME=%s", he.migrationContext.OriginalTableName)) + tableNameEnv := he.migrationContext.OriginalTableName + if he.migrationContext.IsMoveTablesMode() { + // No representative table: report the full migrated set (target names equal + // source names in move-tables mode). + tableNameEnv = strings.Join(he.migrationContext.MoveTables.TableNames, ",") + } + env = append(env, fmt.Sprintf("GH_OST_TABLE_NAME=%s", tableNameEnv)) env = append(env, fmt.Sprintf("GH_OST_GHOST_TABLE_NAME=%s", he.migrationContext.GetGhostTableName())) env = append(env, fmt.Sprintf("GH_OST_OLD_TABLE_NAME=%s", he.migrationContext.GetOldTableName())) env = append(env, fmt.Sprintf("GH_OST_DDL=%s", he.migrationContext.AlterStatement)) @@ -256,12 +262,16 @@ func (he *HooksExecutor) applyEnvironmentVariables(extraVariables ...string) []s env = append(env, fmt.Sprintf("GH_OST_REVERT=%t", he.migrationContext.Revert)) env = append(env, fmt.Sprintf("GH_OST_MOVE_TABLES=%t", he.migrationContext.IsMoveTablesMode())) if he.migrationContext.IsMoveTablesMode() { - // Comma-joined list of all migrated tables (§2.4). GH_OST_TABLE_NAME stays - // the primary table for backward compatibility. + // Comma-joined list of all migrated tables (§2.4). env = append(env, fmt.Sprintf("GH_OST_TABLES=%s", strings.Join(he.migrationContext.MoveTables.TableNames, ","))) } env = append(env, fmt.Sprintf("GH_OST_TARGET_DATABASE_NAME=%s", he.migrationContext.GetTargetDatabaseName())) - env = append(env, fmt.Sprintf("GH_OST_TARGET_TABLE_NAME=%s", he.migrationContext.GetTargetTableName())) + targetTableNameEnv := he.migrationContext.GetTargetTableName() + if he.migrationContext.IsMoveTablesMode() { + // Target tables keep their source names; there is no single ghost table. + targetTableNameEnv = strings.Join(he.migrationContext.MoveTables.TableNames, ",") + } + env = append(env, fmt.Sprintf("GH_OST_TARGET_TABLE_NAME=%s", targetTableNameEnv)) env = append(env, extraVariables...) return env From 79394c22c68ffdc5bf639b7314ac66ee8f52dea2 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 14:38:53 +0000 Subject: [PATCH 07/25] misc --- go/logic/applier.go | 2 +- go/logic/checkpoint.go | 20 ++++++++++++++++++++ go/logic/migrator.go | 9 ++++++++- script/move-tables/reset | 15 +++++++++++---- 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/go/logic/applier.go b/go/logic/applier.go index 2f48bb5a2..880906853 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -961,7 +961,7 @@ func (apl *Applier) createMoveTablesCheckpointTable() error { colDefs := []string{ "`gh_ost_chk_id` bigint auto_increment primary key", "`gh_ost_chk_timestamp` bigint", - "`gh_ost_chk_table_name` varbinary(320)", + "`gh_ost_chk_table_name` varchar(320) charset utf8mb4 collate utf8mb4_bin", "`gh_ost_chk_coords` text charset ascii", "`gh_ost_chk_iteration` bigint", "`gh_ost_rows_copied` bigint", diff --git a/go/logic/checkpoint.go b/go/logic/checkpoint.go index e8af08686..079cd69fa 100644 --- a/go/logic/checkpoint.go +++ b/go/logic/checkpoint.go @@ -92,3 +92,23 @@ func deserializeRangeValues(s string, n int) *sql.ColumnValues { } return sql.ToColumnValues(abstract) } + +// isEmptyRange reports whether a deserialized range carries no usable boundary +// (zero columns, or every column value nil). Such a range means the table had no +// completed chunk when the checkpoint was written, so on resume it must start +// from the table minimum rather than from this empty boundary. +func isEmptyRange(cv *sql.ColumnValues) bool { + if cv == nil { + return true + } + vals := cv.AbstractValues() + if len(vals) == 0 { + return true + } + for _, v := range vals { + if v != nil { + return false + } + } + return true +} diff --git a/go/logic/migrator.go b/go/logic/migrator.go index f0fdd26d9..96a5a582f 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -1183,7 +1183,8 @@ func (mgtr *Migrator) MoveTables() (err error) { // No checkpoint row for this table yet; it resumes from scratch. continue } - mt.RestoreFromCheckpoint(chk.IterationRangeMin, chk.IterationRangeMax, chk.Iteration, chk.RowsCopied) + // Run-wide state is replicated on every row; capture it regardless of + // whether this table had completed a chunk. totalRowsCopied += chk.RowsCopied if chk.DMLApplied > totalDMLApplied { totalDMLApplied = chk.DMLApplied @@ -1195,6 +1196,12 @@ func (mgtr *Migrator) MoveTables() (err error) { resumeCoords = chk.LastTrxCoords } } + // Only restore the per-table iteration window if a chunk actually + // completed; an empty range means this table must start from its minimum. + if isEmptyRange(chk.IterationRangeMin) || isEmptyRange(chk.IterationRangeMax) { + continue + } + mt.RestoreFromCheckpoint(chk.IterationRangeMin, chk.IterationRangeMax, chk.Iteration, chk.RowsCopied) mgtr.migrationContext.Log.Infof("Resuming move-table %s from checkpoint range_min=%+v range_max=%+v iteration=%d", mt.SourceTableName, chk.IterationRangeMin.String(), chk.IterationRangeMax.String(), chk.Iteration) } diff --git a/script/move-tables/reset b/script/move-tables/reset index 76a2f76ac..0555c112d 100755 --- a/script/move-tables/reset +++ b/script/move-tables/reset @@ -12,8 +12,6 @@ DATABASE_NAME="${GH_OST_TEST_DB:-test}" # sets up / cleans up all of them, regardless of how many you actually move, # so reset works the same no matter which scenario you just ran. TABLES=(gh_ost_test gh_ost_test_other gh_ost_test_third) -# The checkpoint table is named after the first (primary) migrated table: _
_ghk. -PRIMARY_TABLE="${TABLES[0]}" # Reset source table state regardless of whether a cutover renamed the originals # to their `_
_del` rollback handles. @@ -27,12 +25,21 @@ ${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" -e "DROP TABLE IF EXIS # Recreate and seed source table data, same fixture as setup uses. ${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" < "${GH_OST_ROOT}/localtests/move-tables/three/create.sql" -# Drop the moved tables and the checkpoint table on the target cluster. +# Drop the moved tables on the target cluster. target_drop="" for t in "${TABLES[@]}"; do target_drop+="${t}, " done -target_drop+="_${PRIMARY_TABLE}_ghk" +target_drop="${target_drop%, }" ${SCRIPT_PATH}/mysql-target-primary -D "${DATABASE_NAME}" -e "DROP TABLE IF EXISTS ${target_drop};" +# The checkpoint table is named from the run token (_gho__ghk), which +# depends on the exact set of moved tables, so we can't name it statically. Drop +# any checkpoint tables that exist for this database. +checkpoint_tables=$(${SCRIPT_PATH}/mysql-target-primary -N -B -D "${DATABASE_NAME}" -e \ + "SELECT GROUP_CONCAT(CONCAT('\`', table_name, '\`')) FROM information_schema.tables WHERE table_schema='${DATABASE_NAME}' AND table_name LIKE '\\_gho\\_%\\_ghk';") +if [[ -n "${checkpoint_tables}" && "${checkpoint_tables}" != "NULL" ]]; then + ${SCRIPT_PATH}/mysql-target-primary -D "${DATABASE_NAME}" -e "DROP TABLE IF EXISTS ${checkpoint_tables};" +fi + echo "Reset source and target tables (${TABLES[*]}) in ${DATABASE_NAME}" \ No newline at end of file From dd8b5815d0e0e91233cd4d2cbb03dc8162486747 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 15:26:41 +0000 Subject: [PATCH 08/25] strong guards (some panics) against any calls to .OriginalTableName or things like "GetTargetTableName" in move-tables mode --- go/base/context.go | 21 +++++++++++++------- go/logic/applier.go | 24 ++++++++++++++++------ go/logic/applier_test.go | 24 ++++++++++++++++------ go/logic/hooks.go | 34 +++++++++++++++++++++++-------- go/logic/inspect.go | 3 +++ go/logic/server.go | 43 ++++++++++++++++++++++++++++++++-------- go/logic/throttler.go | 15 ++++++++------ 7 files changed, 123 insertions(+), 41 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index 1b1b284d0..1358c9b96 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -554,6 +554,9 @@ func getSafeTableName(baseName string, suffix string) string { // GetGhostTableName generates the name of ghost table, based on original table name // or a given table name func (mctx *MigrationContext) GetGhostTableName() string { + if mctx.IsMoveTablesMode() { + panic("GetGhostTableName() must not be called in move-tables mode; there is no ghost table (the target keeps each migrated table's name)") + } if mctx.Revert { // When reverting the "ghost" table is the _del table from the original migration. return mctx.OldTableName @@ -567,8 +570,12 @@ func (mctx *MigrationContext) GetGhostTableName() string { // GetTargetTableName generates the name of the target table. In move-tables mode // each table keeps its own name on the target, so there is no single target -// table name; per-table code uses MoveTable.TargetTableName instead. +// table name; per-table code uses MoveTable.TargetTableName instead, and calling +// this is a programmer error that panics to fail fast. func (mctx *MigrationContext) GetTargetTableName() string { + if mctx.IsMoveTablesMode() { + panic("GetTargetTableName() must not be called in move-tables mode; use MoveTable.TargetTableName") + } return mctx.GetGhostTableName() } @@ -583,6 +590,9 @@ func (mctx *MigrationContext) GetTargetDatabaseName() string { // GetOldTableName generates the name of the "old" table, into which the original table is renamed. func (mctx *MigrationContext) GetOldTableName() string { + if mctx.IsMoveTablesMode() { + panic("GetOldTableName() must not be called in move-tables mode; use MoveTableDelName(tableName) for each migrated table's `_
_del` rollback handle") + } var tableName string if mctx.ForceTmpTableName != "" { tableName = mctx.ForceTmpTableName @@ -624,6 +634,9 @@ func (mctx *MigrationContext) MoveTableDelName(tableName string) string { // GetChangelogTableName generates the name of changelog table, based on original table name // or a given table name. func (mctx *MigrationContext) GetChangelogTableName() string { + if mctx.IsMoveTablesMode() { + panic("GetChangelogTableName() must not be called in move-tables mode; there is no changelog table (§1.2)") + } if mctx.ForceTmpTableName != "" { return getSafeTableName(mctx.ForceTmpTableName, "ghc") } else { @@ -644,12 +657,6 @@ func (mctx *MigrationContext) GetCheckpointTableName() string { return getSafeTableName(mctx.OriginalTableName, "ghk") } -// GetVoluntaryLockName returns a name of a voluntary lock to be used throughout -// the swap-tables process. -func (mctx *MigrationContext) GetVoluntaryLockName() string { - return fmt.Sprintf("%s.%s.lock", mctx.DatabaseName, mctx.OriginalTableName) -} - // RequiresBinlogFormatChange is `true` when the original binlog format isn't `ROW` func (mctx *MigrationContext) RequiresBinlogFormatChange() bool { return mctx.OriginalBinlogFormat != "ROW" diff --git a/go/logic/applier.go b/go/logic/applier.go index 880906853..0f06cb873 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -280,10 +280,15 @@ func (apl *Applier) AcquireMigrationLock(ctx context.Context) error { // One advisory lock per run. In move-tables mode it is keyed on the // set-derived run token (not any single table) so two processes moving the // same set of tables collide, while a single-table run keeps its table-keyed - // lock name. - lockTable := apl.originalTableName() + // lock name. lockSubject is a human-readable description used in contention + // errors; neither branch consults the representative table accessor. + var lockTable, lockSubject string if apl.migrationContext.IsMoveTablesMode() { lockTable = "movetables." + apl.migrationContext.MoveTablesRunToken() + lockSubject = fmt.Sprintf("tables %v", apl.migrationContext.MoveTables.TableNames) + } else { + lockTable = apl.originalTableName() + lockSubject = fmt.Sprintf("`%s`.`%s`", apl.migrationContext.DatabaseName, apl.originalTableName()) } lockName := buildMigrationLockName(apl.migrationContext.GetTargetDatabaseName(), lockTable) @@ -322,11 +327,11 @@ func (apl *Applier) AcquireMigrationLock(ctx context.Context) error { conn.Close() lockDB.Close() if holderID.Valid { - return fmt.Errorf("another gh-ost process is already migrating `%s`.`%s`: migration lock %s held by connection id %d", - apl.migrationContext.DatabaseName, apl.originalTableName(), lockName, holderID.Int64) + return fmt.Errorf("another gh-ost process is already migrating %s: migration lock %s held by connection id %d", + lockSubject, lockName, holderID.Int64) } - return fmt.Errorf("another gh-ost process is already migrating `%s`.`%s`: migration lock %s is held", - apl.migrationContext.DatabaseName, apl.originalTableName(), lockName) + return fmt.Errorf("another gh-ost process is already migrating %s: migration lock %s is held", + lockSubject, lockName) } apl.migrationLockConn = conn @@ -598,7 +603,14 @@ func (apl *Applier) tableExists(tableName string) (tableFound bool) { return (m != nil) } +// originalTableName returns the single migrated table. It is a representative +// accessor that has no meaning in move-tables mode (every table is handled +// through its own MoveTable container), so calling it there is a programmer +// error and panics to fail fast rather than silently operate on the wrong table. func (apl *Applier) originalTableName() string { + if apl.migrationContext.IsMoveTablesMode() { + panic("applier.originalTableName() must not be called in move-tables mode; use the per-table MoveTable container instead") + } return apl.migrationContext.OriginalTableName } diff --git a/go/logic/applier_test.go b/go/logic/applier_test.go index ebc4db08c..117c1155d 100644 --- a/go/logic/applier_test.go +++ b/go/logic/applier_test.go @@ -481,19 +481,31 @@ func (suite *ApplierTestSuite) TestFinalCleanupMoveTablesMode_SkipsDrops() { } // initiateStreaming() requires a binlog-capable MySQL connection to call directly. -// This test verifies IsMoveTablesMode() and that GetChangelogTableName() returns -// a derivable name. A new streamer always starts with zero listeners; the real -// proof that no changelog listener is registered comes from the full run not -// failing on a nonexistent _ghc table. +// This test verifies IsMoveTablesMode() and that no changelog table is referenced +// in move-tables mode (§1.2): no `_ghc` table exists on the source or target +// database. A new streamer always starts with zero listeners; the real proof that +// no changelog listener is registered comes from the full run not failing on a +// nonexistent _ghc table. func (suite *ApplierTestSuite) TestInitiateStreamingMoveTablesMode_NoChangelogListener() { + ctx := context.Background() migrationContext := newTestMigrationContext() migrationContext.MoveTables.TableNames = []string{testMysqlTableName} migrationContext.MoveTables.TargetDatabase = testMysqlDatabaseOther suite.Require().True(migrationContext.IsMoveTablesMode()) - changelogTableName := migrationContext.GetChangelogTableName() - suite.Require().NotEmpty(changelogTableName, "changelog table name should be derivable") + // In move-tables mode there is no changelog table. Verify none exists on + // either the source or target database (LIKE '%\_ghc' matches a literal + // trailing "_ghc"). + for _, schema := range []string{testMysqlDatabase, testMysqlDatabaseOther} { + var count int + err := suite.db.QueryRowContext(ctx, + `SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = ? AND table_name LIKE '%\_ghc'`, + schema, + ).Scan(&count) + suite.Require().NoError(err) + suite.Require().Equal(0, count, "no changelog (_ghc) table should exist in move-tables mode in schema %s", schema) + } streamer := NewEventsStreamer(migrationContext) suite.Require().Empty(streamer.listeners, "new streamer should have no listeners") diff --git a/go/logic/hooks.go b/go/logic/hooks.go index be3a1805c..8a0f086ee 100644 --- a/go/logic/hooks.go +++ b/go/logic/hooks.go @@ -221,15 +221,32 @@ func NewHooksExecutor(migrationContext *base.MigrationContext) *HooksExecutor { func (he *HooksExecutor) applyEnvironmentVariables(extraVariables ...string) []string { env := os.Environ() env = append(env, fmt.Sprintf("GH_OST_DATABASE_NAME=%s", he.migrationContext.DatabaseName)) - tableNameEnv := he.migrationContext.OriginalTableName + + var tableNameEnv string if he.migrationContext.IsMoveTablesMode() { - // No representative table: report the full migrated set (target names equal - // source names in move-tables mode). tableNameEnv = strings.Join(he.migrationContext.MoveTables.TableNames, ",") + } else { + tableNameEnv = he.migrationContext.OriginalTableName } env = append(env, fmt.Sprintf("GH_OST_TABLE_NAME=%s", tableNameEnv)) - env = append(env, fmt.Sprintf("GH_OST_GHOST_TABLE_NAME=%s", he.migrationContext.GetGhostTableName())) - env = append(env, fmt.Sprintf("GH_OST_OLD_TABLE_NAME=%s", he.migrationContext.GetOldTableName())) + var ghostTableNameEnv string + var oldTableNameEnv string + if he.migrationContext.IsMoveTablesMode() { + // No ghost or old tables in move-tables mode: the destination keeps each + // source table's name, and the rollback handles are the per-table + // `_
_del` tables produced by the atomic cutover RENAME. + ghostTableNameEnv = strings.Join(he.migrationContext.MoveTables.TableNames, ",") + delNames := make([]string, 0, len(he.migrationContext.MoveTables.TableNames)) + for _, tableName := range he.migrationContext.MoveTables.TableNames { + delNames = append(delNames, he.migrationContext.MoveTableDelName(tableName)) + } + oldTableNameEnv = strings.Join(delNames, ",") + } else { + ghostTableNameEnv = he.migrationContext.GetGhostTableName() + oldTableNameEnv = he.migrationContext.GetOldTableName() + } + env = append(env, fmt.Sprintf("GH_OST_GHOST_TABLE_NAME=%s", ghostTableNameEnv)) + env = append(env, fmt.Sprintf("GH_OST_OLD_TABLE_NAME=%s", oldTableNameEnv)) env = append(env, fmt.Sprintf("GH_OST_DDL=%s", he.migrationContext.AlterStatement)) env = append(env, fmt.Sprintf("GH_OST_ELAPSED_SECONDS=%f", he.migrationContext.ElapsedTime().Seconds())) env = append(env, fmt.Sprintf("GH_OST_ELAPSED_COPY_SECONDS=%f", he.migrationContext.ElapsedRowCopyTime().Seconds())) @@ -266,13 +283,14 @@ func (he *HooksExecutor) applyEnvironmentVariables(extraVariables ...string) []s env = append(env, fmt.Sprintf("GH_OST_TABLES=%s", strings.Join(he.migrationContext.MoveTables.TableNames, ","))) } env = append(env, fmt.Sprintf("GH_OST_TARGET_DATABASE_NAME=%s", he.migrationContext.GetTargetDatabaseName())) - targetTableNameEnv := he.migrationContext.GetTargetTableName() + + var targetTableNameEnv string if he.migrationContext.IsMoveTablesMode() { - // Target tables keep their source names; there is no single ghost table. targetTableNameEnv = strings.Join(he.migrationContext.MoveTables.TableNames, ",") + } else { + targetTableNameEnv = he.migrationContext.GetTargetTableName() } env = append(env, fmt.Sprintf("GH_OST_TARGET_TABLE_NAME=%s", targetTableNameEnv)) - env = append(env, extraVariables...) return env } diff --git a/go/logic/inspect.go b/go/logic/inspect.go index a534a59a4..dea1c86eb 100644 --- a/go/logic/inspect.go +++ b/go/logic/inspect.go @@ -130,6 +130,9 @@ func (isp *Inspector) InspectOriginalTable() (err error) { } func (isp *Inspector) originalTableName() string { + if isp.migrationContext.IsMoveTablesMode() { + panic("inspector.originalTableName() must not be called in move-tables mode; inspect each table via its name (e.g. validateTableFor/InspectMoveTable)") + } return isp.migrationContext.OriginalTableName } diff --git a/go/logic/server.go b/go/logic/server.go index 4705ba9b9..ecdf75522 100644 --- a/go/logic/server.go +++ b/go/logic/server.go @@ -192,6 +192,33 @@ func (srv *Server) onServerCommand(command string, writer *bufio.Writer) (err er return srv.migrationContext.Log.Errore(err) } +// commandArgMatchesMigration reports whether a table-name argument supplied with +// an interactive command refers to this migration. The argument is optional and +// acts as a courtesy safety check, so an operator who is connected to the wrong +// gh-ost socket is rejected. In standard mode it must equal the single migrated +// table; in move-tables mode it may be any one of the migrated tables. +func (srv *Server) commandArgMatchesMigration(arg string) bool { + if srv.migrationContext.IsMoveTablesMode() { + for _, tableName := range srv.migrationContext.MoveTables.TableNames { + if arg == tableName { + return true + } + } + return false + } + return arg == srv.migrationContext.OriginalTableName +} + +// migrationTargetDescription returns a human-readable description of the migrated +// table(s), used in interactive-command messages. In move-tables mode it is the +// comma-joined list of migrated tables; otherwise the single table name. +func (srv *Server) migrationTargetDescription() string { + if srv.migrationContext.IsMoveTablesMode() { + return strings.Join(srv.migrationContext.MoveTables.TableNames, ",") + } + return srv.migrationContext.OriginalTableName +} + // applyServerCommand parses and executes commands by user func (srv *Server) applyServerCommand(command string, writer *bufio.Writer) (printStatusRule PrintStatusRule, err error) { tokens := strings.SplitN(command, "=", 2) @@ -387,9 +414,9 @@ help # This message } case "throttle", "pause", "suspend": { - if arg != "" && arg != srv.migrationContext.OriginalTableName { + if arg != "" && !srv.commandArgMatchesMigration(arg) { // User explicitly provided table name. This is a courtesy protection mechanism - err := fmt.Errorf("user commanded 'throttle' on %s, but migrated table is %s; ignoring request", arg, srv.migrationContext.OriginalTableName) + err := fmt.Errorf("user commanded 'throttle' on %s, but migrated table is %s; ignoring request", arg, srv.migrationTargetDescription()) return NoPrintStatusRule, err } atomic.StoreInt64(&srv.migrationContext.ThrottleCommandedByUser, 1) @@ -398,9 +425,9 @@ help # This message } case "no-throttle", "unthrottle", "resume", "continue": { - if arg != "" && arg != srv.migrationContext.OriginalTableName { + if arg != "" && !srv.commandArgMatchesMigration(arg) { // User explicitly provided table name. This is a courtesy protection mechanism - err := fmt.Errorf("user commanded 'no-throttle' on %s, but migrated table is %s; ignoring request", arg, srv.migrationContext.OriginalTableName) + err := fmt.Errorf("user commanded 'no-throttle' on %s, but migrated table is %s; ignoring request", arg, srv.migrationTargetDescription()) return NoPrintStatusRule, err } atomic.StoreInt64(&srv.migrationContext.ThrottleCommandedByUser, 0) @@ -425,9 +452,9 @@ help # This message err := fmt.Errorf("user commanded 'unpostpone' without specifying table name, but --force-named-cut-over is set") return NoPrintStatusRule, err } - if arg != "" && arg != srv.migrationContext.OriginalTableName { + if arg != "" && !srv.commandArgMatchesMigration(arg) { // User explicitly provided table name. This is a courtesy protection mechanism - err := fmt.Errorf("user commanded 'unpostpone' on %s, but migrated table is %s; ignoring request", arg, srv.migrationContext.OriginalTableName) + err := fmt.Errorf("user commanded 'unpostpone' on %s, but migrated table is %s; ignoring request", arg, srv.migrationTargetDescription()) return NoPrintStatusRule, err } if atomic.LoadInt64(&srv.migrationContext.IsPostponingCutOver) > 0 { @@ -444,9 +471,9 @@ help # This message err := fmt.Errorf("user commanded 'panic' without specifying table name, but --force-named-panic is set") return NoPrintStatusRule, err } - if arg != "" && arg != srv.migrationContext.OriginalTableName { + if arg != "" && !srv.commandArgMatchesMigration(arg) { // User explicitly provided table name. This is a courtesy protection mechanism - err := fmt.Errorf("user commanded 'panic' on %s, but migrated table is %s; ignoring request", arg, srv.migrationContext.OriginalTableName) + err := fmt.Errorf("user commanded 'panic' on %s, but migrated table is %s; ignoring request", arg, srv.migrationTargetDescription()) return NoPrintStatusRule, err } err := fmt.Errorf("user commanded 'panic'. The migration will be aborted without cleanup. Please drop the gh-ost tables before trying again") diff --git a/go/logic/throttler.go b/go/logic/throttler.go index e5413ab28..777cbf564 100644 --- a/go/logic/throttler.go +++ b/go/logic/throttler.go @@ -193,12 +193,15 @@ func (thlr *Throttler) collectControlReplicasLag() { return } - replicationLagQuery := fmt.Sprintf(` - select value from %s.%s where hint = 'heartbeat' and id <= 255 - `, - sql.EscapeName(thlr.migrationContext.DatabaseName), - sql.EscapeName(thlr.migrationContext.GetChangelogTableName()), - ) + var replicationLagQuery string + if !thlr.migrationContext.IsMoveTablesMode() { + replicationLagQuery = fmt.Sprintf(` + select value from %s.%s where hint = 'heartbeat' and id <= 255 + `, + sql.EscapeName(thlr.migrationContext.DatabaseName), + sql.EscapeName(thlr.migrationContext.GetChangelogTableName()), + ) + } readReplicaLag := func(connectionConfig *mysql.ConnectionConfig) (lag time.Duration, err error) { dbUri := connectionConfig.GetDBUri("information_schema") From a4fe2d2db4f9af811525918989605f1c0187e5ea Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 16:10:56 +0000 Subject: [PATCH 09/25] remove dead code --- go/logic/applier.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/go/logic/applier.go b/go/logic/applier.go index 0f06cb873..c45658b10 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -789,18 +789,6 @@ func (apl *Applier) CreateGhostTable() error { return apl.createTargetTable(apl.migrationContext.GetGhostTableName()) } -// CreateTargetTable creates the target table on the target host (for move-tables). -// It aborts with an error if the target table already exists on the target cluster, -// to prevent silently writing into a table that has unrelated data or a different -// schema (move_table_mode.md §1.3: "Don't use IF NOT EXISTS for the target table. -// An existing table is an error condition, not a no-op."). -func (apl *Applier) CreateTargetTable(createStatement string) error { - if !apl.migrationContext.IsMoveTablesMode() { - return errors.New("CreateTargetTable is only available in MoveTables mode") - } - return apl.CreateTargetTableForName(apl.originalTableName(), createStatement) -} - // CreateTargetTableForName creates the named target table on the target host // from the given CREATE statement. In multi-table move-tables mode it is called // once per migrated table. From 43db172b979493fd1999802c56837b05bd0430ef Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 16:23:30 +0000 Subject: [PATCH 10/25] guard read/write checkpoint funcs --- go/logic/applier.go | 12 ++++++++-- go/logic/applier_test.go | 51 ++++++++++++++++++++++++++-------------- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/go/logic/applier.go b/go/logic/applier.go index c45658b10..50d6f4548 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -1150,8 +1150,12 @@ func (apl *Applier) WriteChangelogState(value string) (string, error) { } // WriteCheckpoint writes a standard-mode checkpoint row to the _ghk table. In -// move-tables mode use WriteMoveTableCheckpoints instead. +// move-tables mode use WriteMoveTableCheckpoints instead; calling this there is a +// programmer error (the checkpoint schema and query builder are standard-only). func (apl *Applier) WriteCheckpoint(chk *Checkpoint) (int64, error) { + if apl.migrationContext.IsMoveTablesMode() { + panic("WriteCheckpoint() must not be called in move-tables mode; use WriteMoveTableCheckpoints") + } var insertId int64 uniqueKeyArgs := sqlutils.Args(chk.IterationRangeMin.AbstractValues()...) uniqueKeyArgs = append(uniqueKeyArgs, chk.IterationRangeMax.AbstractValues()...) @@ -1296,8 +1300,12 @@ func (apl *Applier) parseCheckpointCoordinates(coordStr string) (mysql.BinlogCoo } // ReadLastCheckpoint reads the most recent standard-mode checkpoint row. In -// move-tables mode use ReadMoveTableCheckpoints instead. +// move-tables mode use ReadMoveTableCheckpoints instead; calling this there is a +// programmer error (the checkpoint schema is standard-only). func (apl *Applier) ReadLastCheckpoint() (*Checkpoint, error) { + if apl.migrationContext.IsMoveTablesMode() { + panic("ReadLastCheckpoint() must not be called in move-tables mode; use ReadMoveTableCheckpoints") + } minColumnNames, maxColumnNames := apl.checkpointRangeColumnNames() selectColumns := []string{ "gh_ost_chk_id", diff --git a/go/logic/applier_test.go b/go/logic/applier_test.go index 117c1155d..e6b13b554 100644 --- a/go/logic/applier_test.go +++ b/go/logic/applier_test.go @@ -824,7 +824,7 @@ func (suite *ApplierTestSuite) TestCreateTargetTable_HappyPath() { suite.Require().NoError(err) suite.Require().Equal(0, count, "precondition: target table must not exist before CreateTargetTable") - err = applier.CreateTargetTable(sourceCreateDDL) + err = applier.CreateTargetTableForName(testMysqlTableName, sourceCreateDDL) suite.Require().NoError(err) var targetTableName, targetCreateDDL string @@ -884,7 +884,7 @@ func (suite *ApplierTestSuite) TestCreateTargetTable_AbortsIfExists() { err = suite.db.QueryRow(fmt.Sprintf("SHOW CREATE TABLE %s", getTestTableName())).Scan(&dummy, &sourceCreateDDL) suite.Require().NoError(err) - err = applier.CreateTargetTable(sourceCreateDDL) + err = applier.CreateTargetTableForName(testMysqlTableName, sourceCreateDDL) suite.Require().Error(err, "CreateTargetTable must return an error when target table already exists") suite.Require().Contains(err.Error(), "already exists", "error message must mention 'already exists'") suite.Require().Contains(err.Error(), testMysqlTableName, "error message must name the table") @@ -1162,12 +1162,18 @@ func (suite *ApplierTestSuite) TestWriteCheckpointMoveTables() { Columns: *sql.NewColumnList([]string{"id", "id2"}), } + // Populate the per-table container the move-tables checkpoint path operates on. + migrationContext.InitMoveTableContainers() + mt := migrationContext.GetMoveTable(testMysqlTableName) + suite.Require().NotNil(mt) + mt.OriginalTableColumns = migrationContext.OriginalTableColumns + mt.SharedColumns = migrationContext.SharedColumns + mt.MappedSharedColumns = migrationContext.MappedSharedColumns + mt.UniqueKey = migrationContext.UniqueKey + inspector := NewInspector(migrationContext) suite.Require().NoError(inspector.InitDBConnections()) - err = inspector.applyColumnTypes(testMysqlDatabase, testMysqlTableName, &migrationContext.UniqueKey.Columns) - suite.Require().NoError(err) - applier := NewApplier(migrationContext) err = applier.InitDBConnections() @@ -1179,7 +1185,7 @@ func (suite *ApplierTestSuite) TestWriteCheckpointMoveTables() { err = applier.prepareQueries() suite.Require().NoError(err) - err = applier.ReadMigrationRangeValues(inspector.db) + err = applier.ReadMoveTableMigrationRangeValues(inspector.db, mt) suite.Require().NoError(err) coords, err := mysql.NewGTIDBinlogCoordinates("00000000-0000-0000-0000-000000000001:1-10") @@ -1188,9 +1194,10 @@ func (suite *ApplierTestSuite) TestWriteCheckpointMoveTables() { suite.Require().NoError(err) chk := &Checkpoint{ + TableName: testMysqlTableName, LastTrxCoords: coords, - IterationRangeMin: applier.migrationContext.MigrationRangeMinValues, - IterationRangeMax: applier.migrationContext.MigrationRangeMaxValues, + IterationRangeMin: mt.MigrationRangeMinValues, + IterationRangeMax: mt.MigrationRangeMaxValues, Iteration: 3, RowsCopied: 1000, DMLApplied: 2000, @@ -1198,12 +1205,13 @@ func (suite *ApplierTestSuite) TestWriteCheckpointMoveTables() { MoveTablesCutOverStarted: true, MoveTablesCutOverDrainGTID: drainGTID, } - id, err := applier.WriteCheckpoint(chk) + err = applier.WriteMoveTableCheckpoints([]*Checkpoint{chk}) suite.Require().NoError(err) - suite.Require().Equal(int64(1), id) - gotChk, err := applier.ReadLastCheckpoint() + gotCheckpoints, err := applier.ReadMoveTableCheckpoints() suite.Require().NoError(err) + gotChk := gotCheckpoints[testMysqlTableName] + suite.Require().NotNil(gotChk) suite.Require().Equal(chk.Iteration, gotChk.Iteration) suite.Require().Equal(chk.LastTrxCoords.String(), gotChk.LastTrxCoords.String()) @@ -1248,27 +1256,36 @@ func (suite *ApplierTestSuite) TestReadMoveTablesCutOverCheckpointIgnoresRowCopy Columns: *sql.NewColumnList([]string{"id"}), } + migrationContext.InitMoveTableContainers() + mt := migrationContext.GetMoveTable(testMysqlTableName) + suite.Require().NotNil(mt) + mt.OriginalTableColumns = migrationContext.OriginalTableColumns + mt.SharedColumns = migrationContext.SharedColumns + mt.MappedSharedColumns = migrationContext.MappedSharedColumns + mt.UniqueKey = migrationContext.UniqueKey + inspector := NewInspector(migrationContext) suite.Require().NoError(inspector.InitDBConnections()) - err = inspector.applyColumnTypes(testMysqlDatabase, testMysqlTableName, &migrationContext.UniqueKey.Columns) - suite.Require().NoError(err) applier := NewApplier(migrationContext) suite.Require().NoError(applier.InitDBConnections()) suite.Require().NoError(applier.CreateCheckpointTable()) suite.Require().NoError(applier.prepareQueries()) - suite.Require().NoError(applier.ReadMigrationRangeValues(inspector.db)) + suite.Require().NoError(applier.ReadMoveTableMigrationRangeValues(inspector.db, mt)) coords := mysql.NewFileBinlogCoordinates("mysql-bin.000003", int64(1234)) + // A row-copy checkpoint: cutover has not started, so the cutover-resume read + // must ignore it. chk := &Checkpoint{ + TableName: testMysqlTableName, LastTrxCoords: coords, - IterationRangeMin: applier.migrationContext.MigrationRangeMinValues, - IterationRangeMax: applier.migrationContext.MigrationRangeMaxValues, + IterationRangeMin: mt.MigrationRangeMinValues, + IterationRangeMax: mt.MigrationRangeMaxValues, Iteration: 1, RowsCopied: 3, DMLApplied: 0, } - _, err = applier.WriteCheckpoint(chk) + err = applier.WriteMoveTableCheckpoints([]*Checkpoint{chk}) suite.Require().NoError(err) _, err = applier.ReadMoveTablesCutOverCheckpoint() From 42ce923e8132a4e6da70a7ae251749884f8124f3 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 18:04:53 +0000 Subject: [PATCH 11/25] fix row closure lint --- go/logic/applier.go | 57 +++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/go/logic/applier.go b/go/logic/applier.go index 50d6f4548..3d6ba3800 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -1626,49 +1626,46 @@ func (apl *Applier) ReadMoveTableMigrationRangeValues(db *gosql.DB, mt *base.Mov if err != nil { return err } - minRows, err := tx.Query(minQuery) - if err != nil { + if mt.MigrationRangeMinValues, err = apl.scanMoveTableRangeBoundary(tx, minQuery, mt.UniqueKey.Len()); err != nil { return err } - for minRows.Next() { - mt.MigrationRangeMinValues = sql.NewColumnValues(mt.UniqueKey.Len()) - if err = minRows.Scan(mt.MigrationRangeMinValues.ValuesPointers...); err != nil { - minRows.Close() - return err - } - } - if err = minRows.Err(); err != nil { - minRows.Close() - return err - } - minRows.Close() maxQuery, err := sql.BuildUniqueKeyMaxValuesPreparedQuery(mt.SourceDatabaseName, mt.SourceTableName, mt.UniqueKey) if err != nil { return err } - maxRows, err := tx.Query(maxQuery) - if err != nil { - return err - } - for maxRows.Next() { - mt.MigrationRangeMaxValues = sql.NewColumnValues(mt.UniqueKey.Len()) - if err = maxRows.Scan(mt.MigrationRangeMaxValues.ValuesPointers...); err != nil { - maxRows.Close() - return err - } - } - if err = maxRows.Err(); err != nil { - maxRows.Close() + if mt.MigrationRangeMaxValues, err = apl.scanMoveTableRangeBoundary(tx, maxQuery, mt.UniqueKey.Len()); err != nil { return err } - maxRows.Close() apl.migrationContext.Log.Infof("Move-table %s.%s migration range: [%s]..[%s]", mt.SourceDatabaseName, mt.SourceTableName, mt.MigrationRangeMinValues, mt.MigrationRangeMaxValues) return tx.Commit() } +// scanMoveTableRangeBoundary runs a single min/max unique-key boundary query and +// returns the scanned values (nil if the table is empty). The result set is +// closed via defer, so each boundary query is fully closed before the next one +// runs on the same transaction. +func (apl *Applier) scanMoveTableRangeBoundary(tx *gosql.Tx, query string, keyLen int) (*sql.ColumnValues, error) { + rows, err := tx.Query(query) + if err != nil { + return nil, err + } + defer rows.Close() + var values *sql.ColumnValues + for rows.Next() { + values = sql.NewColumnValues(keyLen) + if err := rows.Scan(values.ValuesPointers...); err != nil { + return nil, err + } + } + if err := rows.Err(); err != nil { + return nil, err + } + return values, nil +} + // CalculateMoveTableNextIterationRangeEndValues computes the next chunk's // range-end for a single migrated table, storing it in the table's container. // It returns false when the table has no further range to iterate (row copy @@ -1701,19 +1698,17 @@ func (apl *Applier) CalculateMoveTableNextIterationRangeEndValues(db *gosql.DB, if err != nil { return hasFurtherRange, err } + defer rows.Close() iterationRangeMaxValues := sql.NewColumnValues(mt.UniqueKey.Len()) for rows.Next() { if err = rows.Scan(iterationRangeMaxValues.ValuesPointers...); err != nil { - rows.Close() return hasFurtherRange, err } hasFurtherRange = true } if err = rows.Err(); err != nil { - rows.Close() return hasFurtherRange, err } - rows.Close() if hasFurtherRange { mt.MigrationIterationRangeMaxValues = iterationRangeMaxValues return hasFurtherRange, nil From b13302920590ff24606df255a3b49af4ed883e44 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 18:20:24 +0000 Subject: [PATCH 12/25] more guards on non-move-table specific funcs. fix up front table existance checks --- go/logic/applier.go | 49 +++++++++++++++++++++++++++++++++++++------- go/logic/inspect.go | 12 +++++++++++ go/logic/migrator.go | 6 ++++++ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/go/logic/applier.go b/go/logic/applier.go index 3d6ba3800..ac89c1013 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -789,6 +789,19 @@ func (apl *Applier) CreateGhostTable() error { return apl.createTargetTable(apl.migrationContext.GetGhostTableName()) } +// targetTableExists reports whether the named table already exists on the +// move-tables target database. +func (apl *Applier) targetTableExists(targetTableName string) (bool, error) { + var count int + if err := apl.moveTablesTargetDB.QueryRow( + "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema=? AND table_name=?", + apl.migrationContext.GetTargetDatabaseName(), targetTableName, + ).Scan(&count); err != nil { + return false, fmt.Errorf("failed to check for existing target table %s: %w", sql.EscapeName(targetTableName), err) + } + return count > 0, nil +} + // CreateTargetTableForName creates the named target table on the target host // from the given CREATE statement. In multi-table move-tables mode it is called // once per migrated table. @@ -801,15 +814,11 @@ func (apl *Applier) CreateTargetTableForName(targetTableName, createStatement st // Explicit pre-check: abort before any data is copied if the target table // already exists. The CREATE TABLE would also fail (MySQL ERROR 1050), but // this gives operators a clear gh-ost error message explaining what to do. - var count int - err := apl.moveTablesTargetDB.QueryRow( - "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema=? AND table_name=?", - targetDatabase, targetTableName, - ).Scan(&count) + exists, err := apl.targetTableExists(targetTableName) if err != nil { - return fmt.Errorf("failed to check for existing target table: %w", err) + return err } - if count > 0 { + if exists { return fmt.Errorf("target table %s.%s already exists on the target cluster. Aborting to prevent writing into a table with unrelated data. Drop the table manually if this is intentional", sql.EscapeName(targetDatabase), sql.EscapeName(targetTableName)) } @@ -817,6 +826,32 @@ func (apl *Applier) CreateTargetTableForName(targetTableName, createStatement st return apl.createTargetTableFromStatement(targetTableName, createStatement) } +// ValidateMoveTablesTargetsAbsent verifies that none of the migrated tables +// already exist on the target cluster, before any of them are created. This +// makes a collision abort cleanly up front rather than after partially creating +// the earlier tables in the set. +func (apl *Applier) ValidateMoveTablesTargetsAbsent() error { + if !apl.migrationContext.IsMoveTablesMode() { + return errors.New("ValidateMoveTablesTargetsAbsent is only available in MoveTables mode") + } + targetDatabase := apl.migrationContext.GetTargetDatabaseName() + var existing []string + for _, mt := range apl.migrationContext.OrderedMoveTables() { + exists, err := apl.targetTableExists(mt.TargetTableName) + if err != nil { + return err + } + if exists { + existing = append(existing, fmt.Sprintf("%s.%s", sql.EscapeName(targetDatabase), sql.EscapeName(mt.TargetTableName))) + } + } + if len(existing) > 0 { + return fmt.Errorf("the following target table(s) already exist on the target cluster: %s. Aborting before creating any tables to avoid leaving partial state; drop them manually if this is intentional", + strings.Join(existing, ", ")) + } + return nil +} + // AlterGhost applies `alter` statement on ghost table func (apl *Applier) AlterGhost() error { query := fmt.Sprintf(`alter /* gh-ost */ table %s.%s %s`, diff --git a/go/logic/inspect.go b/go/logic/inspect.go index dea1c86eb..3d5dbd3ad 100644 --- a/go/logic/inspect.go +++ b/go/logic/inspect.go @@ -86,6 +86,9 @@ func (isp *Inspector) InitDBConnections() (err error) { } func (isp *Inspector) ValidateOriginalTable() (err error) { + if isp.migrationContext.IsMoveTablesMode() { + return errors.New("ValidateOriginalTable is not available in move-tables mode; each migrated table is validated individually via validateTableExistsAndNotView / validateTableForeignKeysFor / validateTableTriggersFor") + } if err := isp.validateTable(); err != nil { return err } @@ -118,6 +121,9 @@ func (isp *Inspector) InspectTableColumnsAndUniqueKeys(tableName string) (column } func (isp *Inspector) InspectOriginalTable() (err error) { + if isp.migrationContext.IsMoveTablesMode() { + return errors.New("InspectOriginalTable is not available in move-tables mode; use InspectMoveTable per table") + } isp.migrationContext.OriginalTableColumns, isp.migrationContext.OriginalTableVirtualColumns, isp.migrationContext.OriginalTableUniqueKeys, err = isp.InspectTableColumnsAndUniqueKeys(isp.originalTableName()) if err != nil { return err @@ -741,6 +747,9 @@ func (isp *Inspector) InspectMoveTable(tableName string) (columns *sql.ColumnLis // CountTableRows counts exact number of rows on the original table func (isp *Inspector) CountTableRows(ctx context.Context) error { + if isp.migrationContext.IsMoveTablesMode() { + return errors.New("CountTableRows is not available in move-tables mode; use CountMoveTablesRows") + } atomic.StoreInt64(&isp.migrationContext.CountingRowsFlag, 1) defer atomic.StoreInt64(&isp.migrationContext.CountingRowsFlag, 0) @@ -767,6 +776,9 @@ func (isp *Inspector) CountTableRows(ctx context.Context) error { // each table's count in its container and the sum as the run-wide estimate. It // is the move-tables equivalent of CountTableRows, with no representative table. func (isp *Inspector) CountMoveTablesRows(ctx context.Context) error { + if !isp.migrationContext.IsMoveTablesMode() { + return errors.New("CountMoveTablesRows is only available in move-tables mode; use CountTableRows") + } atomic.StoreInt64(&isp.migrationContext.CountingRowsFlag, 1) defer atomic.StoreInt64(&isp.migrationContext.CountingRowsFlag, 0) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 96a5a582f..44eb46ed2 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -2478,6 +2478,12 @@ func (mgtr *Migrator) initiateApplier() error { if mgtr.migrationContext.IsMoveTablesMode() { if !mgtr.migrationContext.Resume { + // Fail early and cleanly: if any target table already exists, abort + // before creating any of them so we never leave a partially-created set + // on the target. + if err := mgtr.applier.ValidateMoveTablesTargetsAbsent(); err != nil { + return err + } // Create every migrated table on the target from its captured CREATE // statement (§2.1). Containers were populated by prepareMoveTablesCopyState. for _, mt := range mgtr.migrationContext.OrderedMoveTables() { From a52ebf76239cbb845aeb544c90426b92ee805176 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 18:46:24 +0000 Subject: [PATCH 13/25] fix existing test case that queried the checkpoint table --- .../move-tables/resume-panic-on-row-copy/test.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/localtests/move-tables/resume-panic-on-row-copy/test.sh b/localtests/move-tables/resume-panic-on-row-copy/test.sh index d683eee72..47ab0dee5 100644 --- a/localtests/move-tables/resume-panic-on-row-copy/test.sh +++ b/localtests/move-tables/resume-panic-on-row-copy/test.sh @@ -39,8 +39,15 @@ echo -e "\n\n\n\n\n" echo "⚙️ Validating checkpointed state on unexpected exit..." -# checkpoint file exists on target and is non-empty -mysql-exec target primary $database -sNe "SELECT 1 FROM _${table_name}_ghk LIMIT 1;" +# checkpoint table exists on target. In move-tables mode the checkpoint table is +# named from the run token (_gho__ghk), not from any single migrated +# table, so we look it up by pattern rather than a static per-table name. +checkpoint_table=$(mysql-exec target primary $database -sNe "SELECT table_name FROM information_schema.tables WHERE table_schema='${database}' AND table_name LIKE '\\_gho\\_%\\_ghk' LIMIT 1;") +if [ -z "$checkpoint_table" ]; then + echo "ERROR: Checkpoint table does not exist." + return 1 +fi +mysql-exec target primary $database -sNe "SELECT 1 FROM \`${checkpoint_table}\` LIMIT 1;" if [ $? -gt 0 ]; then echo "ERROR: Checkpoint file is empty or does not exist." return 1 From 19002c0405f9583eec43be99cb1f0cd32206ca45 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 18:51:42 +0000 Subject: [PATCH 14/25] merge conflict --- go/logic/migrator.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 83db32db5..967c3d754 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -1038,6 +1038,7 @@ func (mgtr *Migrator) resumeMoveTablesCutOverFromCheckpoint(chk *Checkpoint) err } atomic.StoreInt64(&mgtr.migrationContext.CutOverCompleteFlag, 1) mgtr.migrationContext.Log.Debugf("T4: CutOverCompleteFlag set") + mgtr.migrationContext.MoveTables.DrainGTID = chk.MoveTablesCutOverDrainGTID if err := mgtr.hooksExecutor.OnSuccess(false); err != nil { return fmt.Errorf("on-success hook failed: %w", err) } From dc1d94381a52c6bc7135fba43b8eabac5ef4d931 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 19:02:44 +0000 Subject: [PATCH 15/25] rename func to be more clear for move-tables multi table drop --- go/logic/migrator.go | 17 ++++++++++------- go/logic/migrator_move_tables_cleanup_test.go | 6 +++--- go/logic/migrator_move_tables_cutover_test.go | 15 ++++++++++----- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 967c3d754..10cb81106 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -1904,12 +1904,15 @@ func (mgtr *Migrator) validateMoveTablesSourceReadHost() error { return fmt.Errorf("move-tables source --host %+v is the cluster primary; reading the full table copy from the primary is the load move-tables is meant to avoid. Point --host at a replica so reads come off the primary, or pass --allow-on-source-primary to proceed against the primary anyway", spc.Key) } -// dropSourceOldTable drops the source `_
_del` rollback handle(s) on the -// source primary. The inspector/streamer source connections may be a read -// replica, so the drop cannot go through them; it must use the writable -// source-primary handle. In multi-table mode every renamed source table's `_del` -// handle is dropped. -func (mgtr *Migrator) dropSourceOldTable() error { +// dropMoveTablesSourceOldTables drops every source `_
_del` rollback +// handle on the source primary. Move-tables only: each migrated table leaves a +// `_del` handle behind after the atomic cutover RENAME, and there may be several. +// The inspector/streamer source connections may be a read replica, so the drop +// cannot go through them; it must use the writable source-primary handle. +func (mgtr *Migrator) dropMoveTablesSourceOldTables() error { + if !mgtr.migrationContext.IsMoveTablesMode() { + return errors.New("dropMoveTablesSourceOldTables is only available in move-tables mode") + } if mgtr.sourcePrimaryDB == nil { return errors.New("source primary connection not initialized; cannot drop source __del table") } @@ -3164,7 +3167,7 @@ func (mgtr *Migrator) moveTablesFinalCleanup() error { // inspector/streamer source connections may point at a read replica, so the // drop goes through the dedicated source-primary handle. if !mgtr.migrationContext.Noop { - if err := mgtr.retryOperation(mgtr.dropSourceOldTable); err != nil { + if err := mgtr.retryOperation(mgtr.dropMoveTablesSourceOldTables); err != nil { return err } } diff --git a/go/logic/migrator_move_tables_cleanup_test.go b/go/logic/migrator_move_tables_cleanup_test.go index 2e13e9e0e..48b95b0ce 100644 --- a/go/logic/migrator_move_tables_cleanup_test.go +++ b/go/logic/migrator_move_tables_cleanup_test.go @@ -70,13 +70,13 @@ func TestLogMoveTablesRollbackHint_EmitsRenameCommand(t *testing.T) { "must emit the rename command to roll the source table back") } -// TestMoveTablesDropSourceOldTable_NilSourcePrimaryErrors verifies the source +// TestDropMoveTablesSourceOldTables_NilSourcePrimaryErrors verifies the source // `__del` drop fails cleanly (rather than panicking) when the source-primary // connection was never initialized. The drop must never silently no-op. -func TestMoveTablesDropSourceOldTable_NilSourcePrimaryErrors(t *testing.T) { +func TestDropMoveTablesSourceOldTables_NilSourcePrimaryErrors(t *testing.T) { m, _ := newCleanupTestMigrator() - err := m.dropSourceOldTable() + err := m.dropMoveTablesSourceOldTables() require.Error(t, err) require.Contains(t, err.Error(), "source primary connection not initialized") diff --git a/go/logic/migrator_move_tables_cutover_test.go b/go/logic/migrator_move_tables_cutover_test.go index 578e68645..d22c1d7ce 100644 --- a/go/logic/migrator_move_tables_cutover_test.go +++ b/go/logic/migrator_move_tables_cutover_test.go @@ -178,6 +178,10 @@ func TestResumeMoveTablesCutOverFromCheckpointAlreadyDrained(t *testing.T) { require.Equal(t, int64(1), atomic.LoadInt64(&ctx.CutOverCompleteFlag), "post-state: resume path must set CutOverCompleteFlag before exiting") + require.NotNil(t, ctx.MoveTables.DrainGTID, + "post-state: resume must set MoveTables.DrainGTID so the on-success hook gets GH_OST_DRAIN_GTID") + require.Equal(t, drainGTID.String(), ctx.MoveTables.DrainGTID.String(), + "post-state: MoveTables.DrainGTID must equal the checkpoint drain GTID") require.Equal(t, []string{"fake:OnSuccess"}, calls, "post-state: resume path should jump directly to T5 without rerunning T0/T1") if m.applier.CurrentCoordinates != nil { @@ -345,20 +349,21 @@ func (s *MoveTablesCutOverSuite) buildMigrator(fakeHooks base.Hooks, initialCoor return m, mc } -// TestDropSourceOldTableUsesSourcePrimary verifies the source `__del` rollback -// handle is dropped through the dedicated source-primary connection. In +// TestDropMoveTablesSourceOldTablesUsesSourcePrimary verifies the source `__del` +// rollback handle is dropped through the dedicated source-primary connection. In // production the inspector/streamer source connections may be a read replica, so // the drop must not route through them. -func (s *MoveTablesCutOverSuite) TestDropSourceOldTableUsesSourcePrimary() { +func (s *MoveTablesCutOverSuite) TestDropMoveTablesSourceOldTablesUsesSourcePrimary() { ctx := context.Background() _, err := s.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY)", getTestOldTableName())) s.Require().NoError(err) var calls []string fakeHooks := &recordingHooks{name: "fake", calls: &calls} - m, _ := s.buildMigrator(fakeHooks, s.containingDrainGTID()) + m, mc := s.buildMigrator(fakeHooks, s.containingDrainGTID()) + mc.MoveTables.TableNames = []string{testMysqlTableName} - s.Require().NoError(m.dropSourceOldTable()) + s.Require().NoError(m.dropMoveTablesSourceOldTables()) var name string err = s.db.QueryRow(fmt.Sprintf("SHOW TABLES IN %s LIKE '_%s_del'", From d38bf67ee908a1d3a318ffebe2647431da785012 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 19:15:46 +0000 Subject: [PATCH 16/25] applier_test fixes --- go/logic/applier_test.go | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/go/logic/applier_test.go b/go/logic/applier_test.go index e6b13b554..f22c7d257 100644 --- a/go/logic/applier_test.go +++ b/go/logic/applier_test.go @@ -1215,8 +1215,12 @@ func (suite *ApplierTestSuite) TestWriteCheckpointMoveTables() { suite.Require().Equal(chk.Iteration, gotChk.Iteration) suite.Require().Equal(chk.LastTrxCoords.String(), gotChk.LastTrxCoords.String()) - suite.Require().Equal(chk.IterationRangeMin.String(), gotChk.IterationRangeMin.String()) - suite.Require().Equal(chk.IterationRangeMax.String(), gotChk.IterationRangeMax.String()) + // The fresh read yields typed values (e.g. int -> "212") while the checkpoint + // round-trips them as []byte (-> hex "323132"). Both serialize identically and + // are used identically as prepared-statement args on resume, so compare the + // serialized (resumable) form rather than the typed String() rendering. + suite.Require().Equal(serializeRangeValues(chk.IterationRangeMin), serializeRangeValues(gotChk.IterationRangeMin)) + suite.Require().Equal(serializeRangeValues(chk.IterationRangeMax), serializeRangeValues(gotChk.IterationRangeMax)) suite.Require().Equal(chk.RowsCopied, gotChk.RowsCopied) suite.Require().Equal(chk.DMLApplied, gotChk.DMLApplied) suite.Require().Equal(chk.IsCutover, gotChk.IsCutover) @@ -2074,6 +2078,16 @@ func (suite *ApplierTestSuite) TestApplyDMLEventQueriesMoveTablesMode() { migrationContext.MoveTables.TableNames = []string{testMysqlTableName} migrationContext.MoveTables.TargetDatabase = testMysqlDatabaseOther + // Populate the per-table container that prepareQueries/ApplyDMLEventQueries + // route DML through (there is no representative table in move-tables mode). + migrationContext.InitMoveTableContainers() + mt := migrationContext.GetMoveTable(testMysqlTableName) + suite.Require().NotNil(mt) + mt.OriginalTableColumns = migrationContext.OriginalTableColumns + mt.SharedColumns = migrationContext.SharedColumns + mt.MappedSharedColumns = migrationContext.MappedSharedColumns + mt.UniqueKey = migrationContext.UniqueKey + applier := NewApplier(migrationContext) suite.Require().NoError(applier.prepareQueries()) defer applier.Teardown() @@ -2157,9 +2171,6 @@ func (suite *ApplierTestSuite) TestApplyIterationMoveTableCopyQueries() { err = applier.InitDBConnections() suite.Require().NoError(err) - err = applier.CreateChangelogTable() - suite.Require().NoError(err) - err = applier.ReadMoveTableMigrationRangeValues(nil, mt) suite.Require().NoError(err) From 56a6c4a992e16dedaab2179574216c2f8d8d4e4a Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 19:22:00 +0000 Subject: [PATCH 17/25] test coverage fix, mising movetables.tableNames --- go/logic/migrator_move_tables_cutover_test.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/go/logic/migrator_move_tables_cutover_test.go b/go/logic/migrator_move_tables_cutover_test.go index d22c1d7ce..e7c213ba1 100644 --- a/go/logic/migrator_move_tables_cutover_test.go +++ b/go/logic/migrator_move_tables_cutover_test.go @@ -330,6 +330,11 @@ func (s *MoveTablesCutOverSuite) buildMigrator(fakeHooks base.Hooks, initialCoor mc.ApplierConnectionConfig = connectionConfig mc.InspectorConnectionConfig = connectionConfig mc.MoveTables.SourcePrimaryConnectionConfig = connectionConfig + // Every consumer of buildMigrator is a move-tables cutover test, so put the + // migrator in move-tables mode with the canonical single table. The cutover + // path builds its atomic RENAME from MoveTables.TableNames; leaving it empty + // produces `rename table ;` (Error 1064). + mc.MoveTables.TableNames = []string{testMysqlTableName} mc.SetConnectionConfig("innodb") mc.Hooks = fakeHooks @@ -360,8 +365,7 @@ func (s *MoveTablesCutOverSuite) TestDropMoveTablesSourceOldTablesUsesSourcePrim var calls []string fakeHooks := &recordingHooks{name: "fake", calls: &calls} - m, mc := s.buildMigrator(fakeHooks, s.containingDrainGTID()) - mc.MoveTables.TableNames = []string{testMysqlTableName} + m, _ := s.buildMigrator(fakeHooks, s.containingDrainGTID()) s.Require().NoError(m.dropMoveTablesSourceOldTables()) From 6a3055abb53ccde691051bc6f9428c444a4bae3c Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 19:33:12 +0000 Subject: [PATCH 18/25] failpoint merge conflict --- go/logic/migrator.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 10cb81106..d68491f9c 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -2775,6 +2775,10 @@ func (mgtr *Migrator) iterateChunksMoveTables() error { } return terminateRowIteration(err) } + // Mirrors the standard iterateChunks failpoint: fires after a chunk is + // enqueued so resume tests can crash mid-copy (move-tables uses this + // loop, not iterateChunks, so the failpoint must live here too). + mgtr.migrationContext.NewFailPoint("panic-after-row-copy", base.WithFailPointWait(2*time.Second)) if atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { return nil } From cdebc6c14e3c407d8248da4378e3264e49670450 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 19:35:20 +0000 Subject: [PATCH 19/25] fixed callsite of move-tables-panic-after-row-copy --- go/logic/migrator.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index d68491f9c..25bc8153b 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -1460,7 +1460,7 @@ func (mgtr *Migrator) moveTablesCutOver() (err error) { } } - mgtr.migrationContext.NewFailPoint("panic-before-drain-completion", base.WithFailPointWait(2*time.Second)) + mgtr.migrationContext.NewFailPoint("move-tables-panic-before-drain-completion", base.WithFailPointWait(2*time.Second)) // ------ T3: draining applier to drain GTID ----------- if err := mgtr.drainMoveTablesCutOver(drainGTID); err != nil { @@ -1480,7 +1480,7 @@ func (mgtr *Migrator) moveTablesCutOver() (err error) { atomic.StoreInt64(&mgtr.migrationContext.CutOverCompleteFlag, 1) mgtr.migrationContext.Log.Debugf("T4: CutOverCompleteFlag set") - mgtr.migrationContext.NewFailPoint("panic-before-on-success-hook", base.WithFailPointWait(2*time.Second)) + mgtr.migrationContext.NewFailPoint("move-tables-panic-before-on-success-hook", base.WithFailPointWait(2*time.Second)) // ----- T5: on-success hook ----- // Hook unlocks user_rw@target via db-user-management and flips the @@ -2670,8 +2670,6 @@ func (mgtr *Migrator) iterateChunks() error { } return terminateRowIteration(err) } - - mgtr.migrationContext.NewFailPoint("panic-after-row-copy", base.WithFailPointWait(2*time.Second)) } } @@ -2778,7 +2776,7 @@ func (mgtr *Migrator) iterateChunksMoveTables() error { // Mirrors the standard iterateChunks failpoint: fires after a chunk is // enqueued so resume tests can crash mid-copy (move-tables uses this // loop, not iterateChunks, so the failpoint must live here too). - mgtr.migrationContext.NewFailPoint("panic-after-row-copy", base.WithFailPointWait(2*time.Second)) + mgtr.migrationContext.NewFailPoint("move-tables-panic-after-row-copy", base.WithFailPointWait(2*time.Second)) if atomic.LoadInt64(&mgtr.rowCopyCompleteFlag) == 1 { return nil } From dd6d32ad18fb3ffaedb5b08493eb104cdfe36826 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 19:41:47 +0000 Subject: [PATCH 20/25] naming things --- localtests/move-tables/{three => multiple-three}/create.sql | 2 +- localtests/move-tables/{three => multiple-three}/tables.txt | 0 localtests/move-tables/{multi => multiple-two}/create.sql | 0 localtests/move-tables/{multi => multiple-two}/tables.txt | 0 script/move-tables/README.md | 2 +- script/move-tables/reset | 4 ++-- script/move-tables/setup | 2 +- 7 files changed, 5 insertions(+), 5 deletions(-) rename localtests/move-tables/{three => multiple-three}/create.sql (98%) rename localtests/move-tables/{three => multiple-three}/tables.txt (100%) rename localtests/move-tables/{multi => multiple-two}/create.sql (100%) rename localtests/move-tables/{multi => multiple-two}/tables.txt (100%) diff --git a/localtests/move-tables/three/create.sql b/localtests/move-tables/multiple-three/create.sql similarity index 98% rename from localtests/move-tables/three/create.sql rename to localtests/move-tables/multiple-three/create.sql index 349b8a27c..631653315 100644 --- a/localtests/move-tables/three/create.sql +++ b/localtests/move-tables/multiple-three/create.sql @@ -5,7 +5,7 @@ -- -- These three tables are the canonical superset used by the manual harness -- (script/move-tables/setup, reset, insert-source-primary-loop). The `single` --- and `multi` localtest fixtures move subsets of them. +-- and `multiple-two` localtest fixtures move subsets of them. drop table if exists gh_ost_test; create table gh_ost_test ( diff --git a/localtests/move-tables/three/tables.txt b/localtests/move-tables/multiple-three/tables.txt similarity index 100% rename from localtests/move-tables/three/tables.txt rename to localtests/move-tables/multiple-three/tables.txt diff --git a/localtests/move-tables/multi/create.sql b/localtests/move-tables/multiple-two/create.sql similarity index 100% rename from localtests/move-tables/multi/create.sql rename to localtests/move-tables/multiple-two/create.sql diff --git a/localtests/move-tables/multi/tables.txt b/localtests/move-tables/multiple-two/tables.txt similarity index 100% rename from localtests/move-tables/multi/tables.txt rename to localtests/move-tables/multiple-two/tables.txt diff --git a/script/move-tables/README.md b/script/move-tables/README.md index 66e74d3a7..38fa4279f 100644 --- a/script/move-tables/README.md +++ b/script/move-tables/README.md @@ -2,7 +2,7 @@ Setup the multi-cluster topology and seed the data. This always seeds the same canonical **three** tables on the source — `gh_ost_test`, `gh_ost_test_other`, -and `gh_ost_test_third` (see `localtests/move-tables/three/create.sql`) — into +and `gh_ost_test_third` (see `localtests/move-tables/multiple-three/create.sql`) — into the `test` database. You then choose how many of them to move via `--move-tables`, so `setup`/`reset`/`teardown` behave identically regardless of which scenario you run. diff --git a/script/move-tables/reset b/script/move-tables/reset index 0555c112d..b5835a145 100755 --- a/script/move-tables/reset +++ b/script/move-tables/reset @@ -8,7 +8,7 @@ SCRIPT_PATH="${GH_OST_ROOT}/script/move-tables" DATABASE_NAME="${GH_OST_TEST_DB:-test}" # The canonical superset of tables seeded by setup (and listed in -# localtests/move-tables/three/tables.txt). The manual harness always +# localtests/move-tables/multiple-three/tables.txt). The manual harness always # sets up / cleans up all of them, regardless of how many you actually move, # so reset works the same no matter which scenario you just ran. TABLES=(gh_ost_test gh_ost_test_other gh_ost_test_third) @@ -23,7 +23,7 @@ source_drop="${source_drop%, }" ${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" -e "DROP TABLE IF EXISTS ${source_drop};" # Recreate and seed source table data, same fixture as setup uses. -${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" < "${GH_OST_ROOT}/localtests/move-tables/three/create.sql" +${SCRIPT_PATH}/mysql-source-primary -D "${DATABASE_NAME}" < "${GH_OST_ROOT}/localtests/move-tables/multiple-three/create.sql" # Drop the moved tables on the target cluster. target_drop="" diff --git a/script/move-tables/setup b/script/move-tables/setup index 125900eb8..df7644c3c 100755 --- a/script/move-tables/setup +++ b/script/move-tables/setup @@ -155,7 +155,7 @@ setup() { echo "OK" echo -n "Seeding data in source cluster..." - exec-mysql-source-primary -D $DATABASE_NAME < "$GH_OST_ROOT/localtests/move-tables/three/create.sql" + exec-mysql-source-primary -D $DATABASE_NAME < "$GH_OST_ROOT/localtests/move-tables/multiple-three/create.sql" echo "OK" } From b2035576bf7a2af46c7f4eaa10b111a7091787bc Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 19:45:05 +0000 Subject: [PATCH 21/25] dynamic lookup _ghk --- .../resume-panic-before-drain-complete/test.sh | 14 +++++++++++--- .../move-tables/resume-panic-on-row-copy/test.sh | 4 ++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/localtests/move-tables/resume-panic-before-drain-complete/test.sh b/localtests/move-tables/resume-panic-before-drain-complete/test.sh index 39392d53a..d73436f3d 100644 --- a/localtests/move-tables/resume-panic-before-drain-complete/test.sh +++ b/localtests/move-tables/resume-panic-before-drain-complete/test.sh @@ -76,14 +76,22 @@ if [ $? -gt 0 ]; then return 1 fi -# validate last checkpoint (cutover started and drain GTID are set) -cutover_started=$(mysql-exec target primary $database -Ne "SELECT gh_ost_move_tables_cutover_started FROM _${table_name}_ghk ORDER BY gh_ost_chk_id DESC LIMIT 1;") +# validate last checkpoint (cutover started and drain GTID are set). The +# checkpoint table is named from the run token (_gho__ghk), so look it up +# by pattern rather than a static per-table name. +checkpoint_table=$(mysql-exec target primary $database -sNe "SELECT table_name FROM information_schema.tables WHERE table_schema='${database}' AND table_name LIKE '\\_gho\\_%\\_ghk' LIMIT 1;") +if [ -z "$checkpoint_table" ]; then + echo "ERROR: Checkpoint table does not exist." + return 1 +fi + +cutover_started=$(mysql-exec target primary $database -Ne "SELECT gh_ost_move_tables_cutover_started FROM \`${checkpoint_table}\` ORDER BY gh_ost_chk_id DESC LIMIT 1;") if [ "$cutover_started" != 1 ]; then echo "ERROR: Expected cutover started to be set in last checkpoint." return 1 fi -drain_gtid=$(mysql-exec target primary $database -Ne "SELECT gh_ost_move_tables_drain_gtid FROM _${table_name}_ghk ORDER BY gh_ost_chk_id DESC LIMIT 1;") +drain_gtid=$(mysql-exec target primary $database -Ne "SELECT gh_ost_move_tables_drain_gtid FROM \`${checkpoint_table}\` ORDER BY gh_ost_chk_id DESC LIMIT 1;") if [ "$drain_gtid" == "" ]; then echo "ERROR: Expected drain GTID to be set in last checkpoint." return 1 diff --git a/localtests/move-tables/resume-panic-on-row-copy/test.sh b/localtests/move-tables/resume-panic-on-row-copy/test.sh index 3b94b0293..12835c64b 100644 --- a/localtests/move-tables/resume-panic-on-row-copy/test.sh +++ b/localtests/move-tables/resume-panic-on-row-copy/test.sh @@ -66,7 +66,7 @@ if [ $? -gt 0 ]; then fi # validate we processed a single row-copy chunk (10 rows) and there are 20 total to process -rows_copied=$(mysql-exec target primary $database -Ne "SELECT gh_ost_rows_copied FROM _${table_name}_ghk ORDER BY gh_ost_chk_id DESC LIMIT 1;") +rows_copied=$(mysql-exec target primary $database -Ne "SELECT gh_ost_rows_copied FROM \`${checkpoint_table}\` ORDER BY gh_ost_chk_id DESC LIMIT 1;") if [ $rows_copied -ne 10 ]; then echo "ERROR: Expected last checkpoint to show 10 rows copied." return 1 @@ -110,7 +110,7 @@ echo -e "\n\n\n\n\n" echo "⚙️ Validating checkpointed state after resumed migration..." # validate we processed a single row-copy chunk (10 rows) and there are 20 total to process -rows_copied=$(mysql-exec target primary $database -Ne "SELECT gh_ost_rows_copied FROM _${table_name}_ghk ORDER BY gh_ost_chk_id DESC LIMIT 1;") +rows_copied=$(mysql-exec target primary $database -Ne "SELECT gh_ost_rows_copied FROM \`${checkpoint_table}\` ORDER BY gh_ost_chk_id DESC LIMIT 1;") if [ $rows_copied -ne 20 ]; then echo "ERROR: Expected last checkpoint to show 20 rows copied." return 1 From b68a807ad574300d2e7cf66ff7861fa113500213 Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Tue, 23 Jun 2026 20:16:13 +0000 Subject: [PATCH 22/25] couple straggling issues from e2e testing --- go/logic/migrator.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 25bc8153b..ea7d075c6 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -2750,7 +2750,13 @@ func (mgtr *Migrator) iterateChunksMoveTables() error { return err } // Record this table's last successfully-copied range for checkpointing. - mt.RecordLastIterationRange() + // Skip the final completion-detection pass: it advanced the iteration min + // to the previous max without copying anything (and set rowCopyComplete), + // so recording here would overwrite the real [min..max] span of the last + // actual chunk with a degenerate [max..max]. + if !mt.IsRowCopyComplete() { + mt.RecordLastIterationRange() + } return nil } return base.SendWithContext(mgtr.migrationContext.GetContext(), mgtr.copyRowsQueue, copyRowsFunc) @@ -3006,6 +3012,13 @@ func (mgtr *Migrator) checkpointLoop() { } else { mgtr.migrationContext.Log.Errorf("error attempting checkpoint: %+v", err) } + } else if mgtr.migrationContext.IsMoveTablesMode() { + // Move-tables writes one checkpoint row per table; the per-table range + // and iteration live in those rows (and the status output). The single + // run-wide summary line has no representative range, so report the + // aggregate progress instead of the (empty) single-table range fields. + mgtr.migrationContext.Log.Infof("checkpoint success at coords=%+v tables=%d rows_copied=%d dml_applied=%d", + chk.LastTrxCoords.DisplayString(), len(mgtr.migrationContext.MoveTables.TableNames), chk.RowsCopied, chk.DMLApplied) } else { mgtr.migrationContext.Log.Infof("checkpoint success at coords=%+v range_min=%+v range_max=%+v iteration=%d", chk.LastTrxCoords.DisplayString(), chk.IterationRangeMin.String(), chk.IterationRangeMax.String(), chk.Iteration) From 9e752b84b9bdef5a797ccc1c1ab8d4ee1449fdcd Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Wed, 24 Jun 2026 15:57:32 +0000 Subject: [PATCH 23/25] added some unit tests for checkpoint --- go/base/context_test.go | 38 +++++++++++++++++++ go/logic/checkpoint_test.go | 75 +++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 go/logic/checkpoint_test.go diff --git a/go/base/context_test.go b/go/base/context_test.go index 35b1b4304..b0331d075 100644 --- a/go/base/context_test.go +++ b/go/base/context_test.go @@ -61,6 +61,44 @@ func TestGetTableNames(t *testing.T) { } } +func TestMoveTableDelName(t *testing.T) { + context := NewMigrationContext() + // Per-table `_
_del` rollback handle, independent of any other table. + require.Equal(t, "_some_table_del", context.MoveTableDelName("some_table")) + require.Equal(t, "_other_del", context.MoveTableDelName("other")) + + // Honors --timestamp-old-table like the single-table GetOldTableName does. + context.TimestampOldTable = true + longForm := "Jan 2, 2006 at 3:04pm (MST)" + context.StartTime, _ = time.Parse(longForm, "Feb 3, 2013 at 7:54pm (PST)") + require.Equal(t, "_some_table_20130203195400_del", context.MoveTableDelName("some_table")) +} + +func TestMoveTablesRunToken(t *testing.T) { + // Empty outside move-tables mode. + require.Equal(t, "", NewMigrationContext().MoveTablesRunToken()) + + context := NewMigrationContext() + context.MoveTables.TableNames = []string{"a", "b", "c"} + token := context.MoveTablesRunToken() + // Fixed length, lowercase hex (12 chars / 48 bits). + require.Len(t, token, 12) + require.Regexp(t, "^[0-9a-f]{12}$", token) + // Deterministic: the same set always yields the same token (so a resumed run + // finds the same run-wide artifacts). + require.Equal(t, token, context.MoveTablesRunToken()) + + // Order-independent: --move-tables=a,b,c and =c,b,a match. + reordered := NewMigrationContext() + reordered.MoveTables.TableNames = []string{"c", "b", "a"} + require.Equal(t, token, reordered.MoveTablesRunToken()) + + // A different set yields a different token. + different := NewMigrationContext() + different.MoveTables.TableNames = []string{"a", "b", "d"} + require.NotEqual(t, token, different.MoveTablesRunToken()) +} + func TestGetTriggerNames(t *testing.T) { { context := NewMigrationContext() diff --git a/go/logic/checkpoint_test.go b/go/logic/checkpoint_test.go new file mode 100644 index 000000000..63efca3c1 --- /dev/null +++ b/go/logic/checkpoint_test.go @@ -0,0 +1,75 @@ +/* + Copyright 2025 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ + +package logic + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/github/gh-ost/go/sql" +) + +// TestSerializeRangeValues covers the table-agnostic, hex-per-value encoding used +// to store a move-table's unique-key range in the single checkpoint table. +func TestSerializeRangeValues(t *testing.T) { + // nil ColumnValues serializes to the empty string. + require.Equal(t, "", serializeRangeValues(nil)) + + // A single integer key: hex of its decimal text ("172" -> 31 37 32). + require.Equal(t, "313732", serializeRangeValues(sql.ToColumnValues([]interface{}{172}))) + + // A varchar key: hex of the UTF-8 bytes ("code_8" -> 63 6f 64 65 5f 38). + require.Equal(t, "636f64655f38", serializeRangeValues(sql.ToColumnValues([]interface{}{"code_8"}))) + + // A compound key of heterogeneous types is comma-joined. + require.Equal(t, "3235,636f64655f38", + serializeRangeValues(sql.ToColumnValues([]interface{}{25, "code_8"}))) + + // Raw bytes are hex-encoded as-is. + require.Equal(t, "e590", + serializeRangeValues(sql.ToColumnValues([]interface{}{[]byte{0xe5, 0x90}}))) + + // A nil column value is encoded with the unambiguous NULL token. + require.Equal(t, moveTableCheckpointNullToken, + serializeRangeValues(sql.ToColumnValues([]interface{}{nil}))) +} + +// TestDeserializeRangeValuesRoundTrip verifies the encode->store->decode cycle. +// Values come back as []byte (accepted directly as prepared-statement args), so +// the round trip is checked on the serialized (canonical) form, which is what a +// resumed run actually compares. +func TestDeserializeRangeValuesRoundTrip(t *testing.T) { + orig := sql.ToColumnValues([]interface{}{172, "code_8"}) + s := serializeRangeValues(orig) + + got := deserializeRangeValues(s, 2) + require.Equal(t, s, serializeRangeValues(got), "re-serializing the decoded range must reproduce the stored text") + + vals := got.AbstractValues() + require.Len(t, vals, 2) + require.Equal(t, []byte("172"), vals[0]) + require.Equal(t, []byte("code_8"), vals[1]) +} + +// TestDeserializeRangeValuesNullToken verifies the NULL marker decodes back to a +// nil column value while other columns decode normally. +func TestDeserializeRangeValuesNullToken(t *testing.T) { + got := deserializeRangeValues(moveTableCheckpointNullToken+",3235", 2) + vals := got.AbstractValues() + require.Len(t, vals, 2) + require.Nil(t, vals[0]) + require.Equal(t, []byte("25"), vals[1]) +} + +// TestIsEmptyRange verifies the predicate that tells a resumed run a table had no +// completed chunk yet (so it must restart from the table minimum). +func TestIsEmptyRange(t *testing.T) { + require.True(t, isEmptyRange(nil), "nil range is empty") + require.True(t, isEmptyRange(sql.NewColumnValues(0)), "zero-column range is empty") + require.True(t, isEmptyRange(deserializeRangeValues(moveTableCheckpointNullToken, 1)), "all-nil range is empty") + require.False(t, isEmptyRange(sql.ToColumnValues([]interface{}{1})), "a range with a value is not empty") +} From 69e1110b359cd23d20e7ad6213c8b03212afc7fc Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Wed, 24 Jun 2026 16:43:08 +0000 Subject: [PATCH 24/25] integration tests --- .../atomic-multi-table-cutover/create.sql | 33 ++++++ .../atomic-multi-table-cutover/tables.txt | 2 + .../atomic-multi-table-cutover/test.sh | 108 ++++++++++++++++++ .../create.sql | 92 +++++++++++++++ .../on_test.sh | 11 ++ .../tables.txt | 3 + script/move-tables/README.md | 23 +++- 7 files changed, 266 insertions(+), 6 deletions(-) create mode 100644 localtests/move-tables/atomic-multi-table-cutover/create.sql create mode 100644 localtests/move-tables/atomic-multi-table-cutover/tables.txt create mode 100755 localtests/move-tables/atomic-multi-table-cutover/test.sh create mode 100644 localtests/move-tables/multiple-three-concurrent-writes/create.sql create mode 100755 localtests/move-tables/multiple-three-concurrent-writes/on_test.sh create mode 100644 localtests/move-tables/multiple-three-concurrent-writes/tables.txt diff --git a/localtests/move-tables/atomic-multi-table-cutover/create.sql b/localtests/move-tables/atomic-multi-table-cutover/create.sql new file mode 100644 index 000000000..0209fb320 --- /dev/null +++ b/localtests/move-tables/atomic-multi-table-cutover/create.sql @@ -0,0 +1,33 @@ +-- Atomic multi-table cutover test. +-- +-- Two tables of identical shape with a correlation column `txn_id`. The test +-- workload (see test.sh) commits transactions that write the SAME txn_id into +-- BOTH tables. Because all migrated tables are renamed in a single atomic +-- `RENAME TABLE t1 TO ..., t2 TO ...` at cutover, every such transaction lands on +-- the target entirely or not at all -- so the target tables must hold exactly the +-- same set of txn_ids. A regression to per-table sequential RENAME would split a +-- boundary transaction and leave an orphan. + +drop table if exists gh_ost_test; +create table gh_ost_test ( + id int(11) NOT NULL AUTO_INCREMENT, + txn_id int(11) NOT NULL, + payload varchar(32) NOT NULL, + PRIMARY KEY (id), + KEY txn_ix (txn_id) +); + +insert into gh_ost_test (txn_id, payload) values + (0, 'seed'), (0, 'seed'), (0, 'seed'), (0, 'seed'), (0, 'seed'); + +drop table if exists gh_ost_test_other; +create table gh_ost_test_other ( + id int(11) NOT NULL AUTO_INCREMENT, + txn_id int(11) NOT NULL, + payload varchar(32) NOT NULL, + PRIMARY KEY (id), + KEY txn_ix (txn_id) +); + +insert into gh_ost_test_other (txn_id, payload) values + (0, 'seed'), (0, 'seed'), (0, 'seed'), (0, 'seed'), (0, 'seed'); diff --git a/localtests/move-tables/atomic-multi-table-cutover/tables.txt b/localtests/move-tables/atomic-multi-table-cutover/tables.txt new file mode 100644 index 000000000..30fa51c70 --- /dev/null +++ b/localtests/move-tables/atomic-multi-table-cutover/tables.txt @@ -0,0 +1,2 @@ +gh_ost_test +gh_ost_test_other diff --git a/localtests/move-tables/atomic-multi-table-cutover/test.sh b/localtests/move-tables/atomic-multi-table-cutover/test.sh new file mode 100755 index 000000000..ae5cf4fd3 --- /dev/null +++ b/localtests/move-tables/atomic-multi-table-cutover/test.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# Atomic multi-table cutover test. +# +# A workload commits transactions that each write the SAME txn_id into BOTH +# migrated tables, in a tight loop, right up to the cutover. Because gh-ost +# renames every migrated table in ONE atomic `RENAME TABLE t1 TO ..., t2 TO ...`, +# each cross-table transaction lands on the target entirely or not at all. +# +# Verification is deterministic (final-state set + checksum comparison, no timing +# assertions): the set of txn_ids in target gh_ost_test must exactly equal the set +# in target gh_ost_test_other. A regression from the atomic multi-table RENAME to +# a per-table sequential RENAME splits a boundary transaction across the two +# tables and leaves an orphan txn_id, failing this test. + +database=test + +build_binary +build_ghost_command + +###################################################################################################### +### Drive cross-table transactions, then cut over while they are still committing +###################################################################################################### + +echo "⚙️ Starting cross-table transaction workload..." + +# Each iteration commits one transaction touching BOTH tables with a shared +# txn_id. The loop runs with no delay until the tables are renamed at cutover +# (the INSERT then errors and the loop exits), so cross-table transactions are +# committing continuously while the cutover happens. Note: we do NOT (and cannot, +# from a shell) control whether a transaction is literally mid-commit at the +# RENAME instant -- that is timing-dependent. Correctness is asserted +# deterministically on the final state below (no orphaned txn_id across the pair +# + per-table checksums), not on hitting that instant. +( + n=1 + while true; do + mysql-exec source primary $database -e \ + "START TRANSACTION; \ + INSERT INTO gh_ost_test (txn_id, payload) VALUES ($n, 'a'); \ + INSERT INTO gh_ost_test_other (txn_id, payload) VALUES ($n, 'b'); \ + COMMIT;" 2>/dev/null || break + n=$((n + 1)) + done +) & +workload_pid=$! + +# Remove the postpone flag so cutover proceeds while the workload is still +# committing cross-table transactions. This is a best-effort time-based overlap, +# not a guarantee that a transaction is mid-commit at the exact RENAME; the +# atomicity guarantee is verified on the final target state below. +( + sleep 4 + echo "⏩ Sending unpostpone cutover" + rm $postpone_cutover_flag_file &> /dev/null +) & + +echo > $test_logfile +bash -c "$cmd" >> $test_logfile 2>&1 +ghost_result=$? + +kill $workload_pid &> /dev/null + +if [ $ghost_result -ne 0 ]; then + echo "ERROR: gh-ost should have succeeded but did not. ($ghost_result)" + return 1 +fi + +echo -e "\n\n\n\n\n" + +###################################################################################################### +### Validate atomicity + data integrity (read primaries to avoid replication lag) +###################################################################################################### + +echo "⚙️ Validating atomic multi-table cutover..." + +# Sanity: the workload must have landed cross-table rows on the target, otherwise +# the atomicity assertion below would be vacuously true. +paired=$(mysql-exec target primary $database -sNe "SELECT COUNT(*) FROM gh_ost_test WHERE txn_id > 0;") +if [ -z "$paired" ] || [ "$paired" -lt 1 ]; then + echo "ERROR: workload produced no cross-table rows on target; test would be vacuous." + return 1 +fi + +# Atomicity invariant: every cross-table transaction landed entirely or not at +# all, i.e. the txn_id sets match across the two target tables (no orphans). +orphans_a=$(mysql-exec target primary $database -sNe \ + "SELECT COUNT(*) FROM gh_ost_test t1 WHERE t1.txn_id > 0 \ + AND NOT EXISTS (SELECT 1 FROM gh_ost_test_other t2 WHERE t2.txn_id = t1.txn_id);") +orphans_b=$(mysql-exec target primary $database -sNe \ + "SELECT COUNT(*) FROM gh_ost_test_other t2 WHERE t2.txn_id > 0 \ + AND NOT EXISTS (SELECT 1 FROM gh_ost_test t1 WHERE t1.txn_id = t2.txn_id);") + +if [ "$orphans_a" != "0" ] || [ "$orphans_b" != "0" ]; then + echo "ERROR: non-atomic cutover: ${orphans_a} txn_id(s) in gh_ost_test missing from gh_ost_test_other; ${orphans_b} the other way." + return 1 +fi + +# Full data integrity: each migrated table matches its source rollback handle. +for table_name in gh_ost_test gh_ost_test_other; do + src_checksum=$(mysql-exec source primary $database -ss -e "SELECT * FROM _${table_name}_del ORDER BY id" | md5sum) + dst_checksum=$(mysql-exec target primary $database -ss -e "SELECT * FROM ${table_name} ORDER BY id" | md5sum) + if [ "$src_checksum" != "$dst_checksum" ]; then + echo "ERROR: checksum mismatch on ${table_name} between source _del and target." + return 1 + fi +done + +echo "✅ Atomic multi-table cutover validated: ${paired} cross-table transactions, no orphans, checksums match." diff --git a/localtests/move-tables/multiple-three-concurrent-writes/create.sql b/localtests/move-tables/multiple-three-concurrent-writes/create.sql new file mode 100644 index 000000000..f92cef32e --- /dev/null +++ b/localtests/move-tables/multiple-three-concurrent-writes/create.sql @@ -0,0 +1,92 @@ +-- Three tables with distinct schemas, primary-key types, and row counts. This +-- exercises the multi-table move-tables path at its widest: per-table +-- runtime state, per-table query builders, interleaved row copy where the tables +-- finish at different times, and a single atomic multi-table RENAME at cutover. +-- +-- These three tables are the canonical superset used by the manual harness +-- (script/move-tables/setup, reset, insert-source-primary-loop). The `single` +-- and `multiple-two` localtest fixtures move subsets of them. + +drop table if exists gh_ost_test; +create table gh_ost_test ( + id bigint(20) NOT NULL AUTO_INCREMENT, + column1 int(11) NOT NULL, + column2 smallint(5) unsigned NOT NULL, + column3 mediumint(8) unsigned NOT NULL, + column4 tinyint(3) unsigned NOT NULL, + column5 int(11) NOT NULL, + column6 int(11) NOT NULL, + PRIMARY KEY (id), + KEY c12_ix (column1, column2) +) auto_increment=1; + +insert into gh_ost_test values + (NULL, 1001, 100, 500000, 10, 1700000001, 1700000002), + (NULL, 1002, 200, 600000, 20, 1700000003, 1700000004), + (NULL, 1003, 300, 700000, 30, 1700000005, 1700000006), + (NULL, 1004, 400, 800000, 40, 1700000007, 1700000008), + (NULL, 1005, 500, 900000, 50, 1700000009, 1700000010), + (NULL, 1006, 600, 1000000, 60, 1700000011, 1700000012), + (NULL, 1007, 700, 1100000, 70, 1700000013, 1700000014), + (NULL, 1008, 800, 1200000, 80, 1700000015, 1700000016), + (NULL, 1009, 900, 1300000, 90, 1700000017, 1700000018), + (NULL, 1010, 1000, 1400000, 100, 1700000019, 1700000020), + (NULL, 1011, 1100, 1500000, 110, 1700000021, 1700000022), + (NULL, 1012, 1200, 1600000, 120, 1700000023, 1700000024), + (NULL, 1013, 1300, 1700000, 130, 1700000025, 1700000026), + (NULL, 1014, 1400, 1800000, 140, 1700000027, 1700000028), + (NULL, 1015, 1500, 1900000, 150, 1700000029, 1700000030), + (NULL, 1016, 1600, 2000000, 160, 1700000031, 1700000032), + (NULL, 1017, 1700, 2100000, 170, 1700000033, 1700000034), + (NULL, 1018, 1800, 2200000, 180, 1700000035, 1700000036), + (NULL, 1019, 1900, 2300000, 190, 1700000037, 1700000038), + (NULL, 1020, 2000, 2400000, 200, 1700000039, 1700000040), + (NULL, 1021, 2100, 2500000, 210, 1700000041, 1700000042), + (NULL, 1022, 2200, 2600000, 220, 1700000043, 1700000044), + (NULL, 1023, 2300, 2700000, 230, 1700000045, 1700000046), + (NULL, 1024, 2400, 2800000, 240, 1700000047, 1700000048), + (NULL, 1025, 2500, 2900000, 250, 1700000049, 1700000050); + +drop table if exists gh_ost_test_other; +create table gh_ost_test_other ( + uid int(11) NOT NULL, + name varchar(64) NOT NULL, + amount decimal(10,2) NOT NULL, + created_at datetime NOT NULL, + PRIMARY KEY (uid), + UNIQUE KEY name_uq (name) +); + +insert into gh_ost_test_other values + (1, 'alpha', 10.50, '2024-01-01 10:00:00'), + (2, 'bravo', 20.75, '2024-01-02 11:00:00'), + (3, 'charlie', 30.00, '2024-01-03 12:00:00'), + (4, 'delta', 40.25, '2024-01-04 13:00:00'), + (5, 'echo', 50.50, '2024-01-05 14:00:00'), + (6, 'foxtrot', 60.75, '2024-01-06 15:00:00'), + (7, 'golf', 70.00, '2024-01-07 16:00:00'), + (8, 'hotel', 80.25, '2024-01-08 17:00:00'), + (9, 'india', 90.50, '2024-01-09 18:00:00'), + (10, 'juliet', 100.75, '2024-01-10 19:00:00'), + (11, 'kilo', 110.00, '2024-01-11 20:00:00'), + (12, 'lima', 120.25, '2024-01-12 21:00:00'); + +drop table if exists gh_ost_test_third; +create table gh_ost_test_third ( + code varchar(32) NOT NULL, + label varchar(128) NOT NULL, + score double NOT NULL, + updated_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (code), + KEY score_ix (score) +); + +insert into gh_ost_test_third (code, label, score) values + ('code_1', 'label_1', 1.5), + ('code_2', 'label_2', 2.5), + ('code_3', 'label_3', 3.5), + ('code_4', 'label_4', 4.5), + ('code_5', 'label_5', 5.5), + ('code_6', 'label_6', 6.5), + ('code_7', 'label_7', 7.5), + ('code_8', 'label_8', 8.5); diff --git a/localtests/move-tables/multiple-three-concurrent-writes/on_test.sh b/localtests/move-tables/multiple-three-concurrent-writes/on_test.sh new file mode 100755 index 000000000..06d703361 --- /dev/null +++ b/localtests/move-tables/multiple-three-concurrent-writes/on_test.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Three-table move with sustained DML on all three tables during the +# copy. insert-source-primary-loop auto-detects every seeded fixture +# (gh_ost_test, gh_ost_test_other, gh_ost_test_third) and writes to all of them, +# so each migrated table sees concurrent inserts while gh-ost copies and drains. +# The harness then validates per-table structure + content checksums (source +# `_
_del` vs target), which deterministically proves every concurrent +# write was captured on the target. +DATABASE=test script/move-tables/insert-source-primary-loop 100 0.01 100 & +sleep 5 && kill $! diff --git a/localtests/move-tables/multiple-three-concurrent-writes/tables.txt b/localtests/move-tables/multiple-three-concurrent-writes/tables.txt new file mode 100644 index 000000000..72f7ba8f6 --- /dev/null +++ b/localtests/move-tables/multiple-three-concurrent-writes/tables.txt @@ -0,0 +1,3 @@ +gh_ost_test +gh_ost_test_other +gh_ost_test_third diff --git a/script/move-tables/README.md b/script/move-tables/README.md index 38fa4279f..398957821 100644 --- a/script/move-tables/README.md +++ b/script/move-tables/README.md @@ -93,13 +93,24 @@ script/move-tables/teardown The same fixtures back the CI integration tests, run via `localtests/move-tables-test.sh [filter]`. Each test directory under -`localtests/move-tables/` is self-contained (its own `create.sql` + `tables.txt`): - -- `single` — moves 1 table (`gh_ost_test`) -- `multi` — moves 2 tables (`gh_ost_test`, `gh_ost_test_other`) -- `three` — moves 3 tables (`gh_ost_test`, `gh_ost_test_other`, `gh_ost_test_third`) +`localtests/move-tables/` is self-contained (its own `create.sql` + `tables.txt`, +plus an optional `on_test.sh` for concurrent workload or `test.sh` for a fully +custom scenario): + +- `single` — moves 1 table, idle source +- `single-concurrent-writes` — moves 1 table with sustained DML during copy +- `single-with-hooks` — moves 1 table and asserts the hook env vars +- `multiple-two` — moves 2 tables, idle source +- `multiple-three` — moves 3 tables, idle source +- `multiple-three-concurrent-writes` — moves 3 tables with sustained DML on all three +- `atomic-multi-table-cutover` — moves 2 tables while committing cross-table + transactions up to cutover; asserts the atomic multi-table RENAME leaves no + orphaned rows across the pair +- `resume-panic-on-row-copy`, `resume-panic-before-drain-complete`, + `resume-panic-before-on-success-hook` — crash mid-run via a failpoint, then + `--resume` to completion Run a single scenario by name, e.g.: ```bash -localtests/move-tables-test.sh three +localtests/move-tables-test.sh multiple-three ``` \ No newline at end of file From 87385247fda7121685a20f905c0dc6b847c3fa1b Mon Sep 17 00:00:00 2001 From: Zach Sierakowski Date: Fri, 26 Jun 2026 13:41:47 +0000 Subject: [PATCH 25/25] get rid of GetTargetTableName --- go/base/context.go | 11 ----------- go/logic/applier.go | 4 ++-- go/logic/hooks.go | 2 +- go/logic/migrator.go | 6 +++--- 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index 47719985a..305769381 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -571,17 +571,6 @@ func (mctx *MigrationContext) GetGhostTableName() string { } } -// GetTargetTableName generates the name of the target table. In move-tables mode -// each table keeps its own name on the target, so there is no single target -// table name; per-table code uses MoveTable.TargetTableName instead, and calling -// this is a programmer error that panics to fail fast. -func (mctx *MigrationContext) GetTargetTableName() string { - if mctx.IsMoveTablesMode() { - panic("GetTargetTableName() must not be called in move-tables mode; use MoveTable.TargetTableName") - } - return mctx.GetGhostTableName() -} - // GetTargetDatabaseName fetches the name of the target database, which defaults to the original // database name unless we're in move-tables mode. func (mctx *MigrationContext) GetTargetDatabaseName() string { diff --git a/go/logic/applier.go b/go/logic/applier.go index ac89c1013..0a99919ce 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -164,7 +164,7 @@ func (apl *Applier) compileMigrationKeyWarningRegex() (*regexp.Regexp, error) { if apl.migrationContext.IsMoveTablesMode() { return apl.compileMoveTablesKeyWarningRegex() } - return compileKeyWarningRegex(apl.migrationContext.GetTargetTableName(), apl.migrationContext.UniqueKey.NameInGhostTable) + return compileKeyWarningRegex(apl.migrationContext.GetGhostTableName(), apl.migrationContext.UniqueKey.NameInGhostTable) } // compileMoveTablesKeyWarningRegex builds one duplicate-key warning regex @@ -422,7 +422,7 @@ func (apl *Applier) prepareQueries() (err error) { targetDatabaseName := apl.migrationContext.GetTargetDatabaseName() if !apl.migrationContext.IsMoveTablesMode() { - targetTableName := apl.migrationContext.GetTargetTableName() + targetTableName := apl.migrationContext.GetGhostTableName() if apl.dmlDeleteQueryBuilder, err = sql.NewDMLDeleteQueryBuilder( targetDatabaseName, targetTableName, diff --git a/go/logic/hooks.go b/go/logic/hooks.go index 8a0f086ee..28cde0ca6 100644 --- a/go/logic/hooks.go +++ b/go/logic/hooks.go @@ -288,7 +288,7 @@ func (he *HooksExecutor) applyEnvironmentVariables(extraVariables ...string) []s if he.migrationContext.IsMoveTablesMode() { targetTableNameEnv = strings.Join(he.migrationContext.MoveTables.TableNames, ",") } else { - targetTableNameEnv = he.migrationContext.GetTargetTableName() + targetTableNameEnv = he.migrationContext.GetGhostTableName() } env = append(env, fmt.Sprintf("GH_OST_TARGET_TABLE_NAME=%s", targetTableNameEnv)) env = append(env, extraVariables...) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index ea7d075c6..d0d6c39c8 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -2078,11 +2078,11 @@ func (mgtr *Migrator) printMigrationStatusHint(writers ...io.Writer) { mgtr.migrationContext.StartTime.Format(time.RubyDate), ) } else { - fmt.Fprintf(w, "# Migrating %s.%s; Target table is %s.%s\n", + fmt.Fprintf(w, "# Migrating %s.%s; Ghost table is %s.%s\n", sql.EscapeName(mgtr.migrationContext.DatabaseName), sql.EscapeName(mgtr.migrationContext.OriginalTableName), - sql.EscapeName(mgtr.migrationContext.GetTargetDatabaseName()), - sql.EscapeName(mgtr.migrationContext.GetTargetTableName()), + sql.EscapeName(mgtr.migrationContext.DatabaseName), + sql.EscapeName(mgtr.migrationContext.GetGhostTableName()), ) fmt.Fprintf(w, "# Migrating %+v; inspecting %+v; executing on %+v\n", *mgtr.applier.connectionConfig.ImpliedKey,